{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T05:01:55Z","timestamp":1743138115863,"version":"3.40.3"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030687793"},{"type":"electronic","value":"9783030687809"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-68780-9_30","type":"book-chapter","created":{"date-parts":[[2021,2,24]],"date-time":"2021-02-24T17:04:13Z","timestamp":1614186253000},"page":"353-365","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["CC-LSTM: Cross and Conditional Long-Short Time Memory for Video Captioning"],"prefix":"10.1007","author":[{"given":"Jiangbo","family":"Ai","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yang","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xing","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jie","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Heng Tao","family":"Shen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,2,25]]},"reference":[{"key":"30_CR1","doi-asserted-by":"crossref","unstructured":"Aafaq, N., Akhtar, N., Liu, W., Gilani, S.Z., Mian, A.: Spatio-temporal dynamics and semantic attribute enriched visual encoding for video captioning. arXiv preprint arXiv:1902.10322 (2019)","DOI":"10.1109\/CVPR.2019.01277"},{"key":"30_CR2","doi-asserted-by":"crossref","unstructured":"Aneja, J., Deshpande, A., Schwing, A.G.: Convolutional image captioning. In: CVPR, pp. 5561\u20135570 (2018)","DOI":"10.1109\/CVPR.2018.00583"},{"key":"30_CR3","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 (2014)"},{"key":"30_CR4","doi-asserted-by":"crossref","unstructured":"Bin, Y., Yang, Y., Shen, F., Xu, X., Shen, H.T.: Bidirectional long-short term memory for video description. In: ACMMM, pp. 436\u2013440. ACM (2016)","DOI":"10.1145\/2964284.2967258"},{"key":"30_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/978-3-642-15561-1_2","volume-title":"Computer Vision \u2013 ECCV 2010","author":"A Farhadi","year":"2010","unstructured":"Farhadi, A., Hejrati, M., Sadeghi, M.A., Young, P., Rashtchian, C., Hockenmaier, J., Forsyth, D.: Every picture tells a story: generating sentences from images. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010. LNCS, vol. 6314, pp. 15\u201329. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15561-1_2"},{"key":"30_CR6","doi-asserted-by":"crossref","unstructured":"Guadarrama, S., et al.: Youtube2text: recognizing and describing arbitrary activities using semantic hierarchies and zero-shot recognition. In: ICCV, pp. 2712\u20132719 (2013)","DOI":"10.1109\/ICCV.2013.337"},{"key":"30_CR7","unstructured":"He, X., Shi, B., Bai, X., Xia, G.S., Zhang, Z., Dong, W.: Image caption generation with part of speech guidance. PRL (2017)"},{"key":"30_CR8","doi-asserted-by":"crossref","unstructured":"Jia, X., Gavves, E., Fernando, B., Tuytelaars, T.: Guiding the long-short term memory model for image caption generation. In: ICCV, pp. 2407\u20132415 (2015)","DOI":"10.1109\/ICCV.2015.277"},{"key":"30_CR9","doi-asserted-by":"crossref","unstructured":"Krause, J., Johnson, J., Krishna, R., Fei-Fei, L.: A hierarchical approach for generating descriptive image paragraphs. arXiv preprint arXiv:1611.06607 (2016)","DOI":"10.1109\/CVPR.2017.356"},{"key":"30_CR10","unstructured":"Lavie, A., Agarwal, A.: Meteor: An automatic metric for MT evaluation with improved correlation with human judgments. In: Proceedings of the EMNLP 2011 Workshop on Statistical Machine Translation, pp. 65\u201372 (2005)"},{"key":"30_CR11","unstructured":"Li, S., Kulkarni, G., Berg, T.L., Berg, A.C., Choi, Y.: Composing simple image descriptions using web-scale n-grams. In: Proceedings of the Fifteenth Conference on Computational Natural Language Learning, pp. 220\u2013228. ACL (2011)"},{"key":"30_CR12","unstructured":"Mao, J., Xu, W., Yang, Y., Wang, J., Huang, Z., Yuille, A.: Deep captioning with multimodal recurrent neural networks (M-RNN). arXiv preprint arXiv:1412.6632 (2014)"},{"key":"30_CR13","doi-asserted-by":"crossref","unstructured":"Pan, P., Xu, Z., Yang, Y., Wu, F., Zhuang, Y.: Hierarchical recurrent neural encoder for video representation with application to captioning. In: CVPR, pp. 1029\u20131038 (2016)","DOI":"10.1109\/CVPR.2016.117"},{"key":"30_CR14","doi-asserted-by":"crossref","unstructured":"Pan, Y., Mei, T., Yao, T., Li, H., Rui, Y.: Jointly modeling embedding and translation to bridge video and language. In: CVPR pp. 4594\u20134602 (2016)","DOI":"10.1109\/CVPR.2016.497"},{"key":"30_CR15","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting on Association for Computational Linguistics, pp. 311\u2013318. ACL (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"30_CR16","doi-asserted-by":"crossref","unstructured":"Pasunuru, R., Bansal, M.: Multi-task video captioning with video and entailment generation. arXiv preprint arXiv:1704.07489 (2017)","DOI":"10.18653\/v1\/D17-1103"},{"issue":"12","key":"30_CR17","doi-asserted-by":"publisher","first-page":"3034","DOI":"10.1109\/TPAMI.2018.2789887","volume":"40","author":"F Shen","year":"2018","unstructured":"Shen, F., Xu, Y., Liu, L., Yang, Y., Huang, Z., Shen, H.T.: Unsupervised deep hashing with similarity-adaptive and discrete optimization. IEEE Trans. Pattern Anal. Mach. Intell. 40(12), 3034\u20133044 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"30_CR18","doi-asserted-by":"crossref","unstructured":"Szegedy, C., et al.: Going deeper with convolutions. In: CVPR, pp. 1\u20139 (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"30_CR19","doi-asserted-by":"crossref","unstructured":"Venugopalan, S., Rohrbach, M., Donahue, J., Mooney, R., Darrell, T., Saenko, K.: Sequence to sequence-video to text. In: ICCV, pp. 4534\u20134542 (2015)","DOI":"10.1109\/ICCV.2015.515"},{"key":"30_CR20","doi-asserted-by":"crossref","unstructured":"Venugopalan, S., Xu, H., Donahue, J., Rohrbach, M., Mooney, R., Saenko, K.: Translating videos to natural language using deep recurrent neural networks. arXiv preprint arXiv:1412.4729 (2014)","DOI":"10.3115\/v1\/N15-1173"},{"key":"30_CR21","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. In: CVPR, pp. 3156\u20133164 (2015)","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"30_CR22","unstructured":"Wang, J., Wang, W., Huang, Y., Wang, L., Tan, T.: Multimodal memory modelling for video captioning. arXiv preprint arXiv:1611.05592 (2016)"},{"issue":"2","key":"30_CR23","doi-asserted-by":"publisher","first-page":"657","DOI":"10.1007\/s11280-018-0541-x","volume":"22","author":"X Xu","year":"2018","unstructured":"Xu, X., He, L., Lu, H., Gao, L., Ji, Y.: Deep adversarial metric learning for cross-modal retrieval. World Wide Web 22(2), 657\u2013672 (2018). https:\/\/doi.org\/10.1007\/s11280-018-0541-x","journal-title":"World Wide Web"},{"issue":"5","key":"30_CR24","doi-asserted-by":"publisher","first-page":"2494","DOI":"10.1109\/TIP.2017.2676345","volume":"26","author":"X Xu","year":"2017","unstructured":"Xu, X., Shen, F., Yang, Y., Shen, H.T., Li, X.: Learning discriminative binary codes for large-scale cross-modal retrieval. IEEE Trans. Image Process. 26(5), 2494\u20132507 (2017)","journal-title":"IEEE Trans. Image Process."},{"key":"30_CR25","doi-asserted-by":"crossref","unstructured":"Yao, L., et al.: Describing videos by exploiting temporal structure. In: ICCV, pp. 4507\u20134515 (2015)","DOI":"10.1109\/ICCV.2015.512"},{"key":"30_CR26","doi-asserted-by":"crossref","unstructured":"Yao, T., Pan, Y., Li, Y., Qiu, Z., Mei, T.: Boosting image captioning with attributes. arXiv preprint arXiv:1611.01646 (2016)","DOI":"10.1109\/ICCV.2017.524"},{"key":"30_CR27","doi-asserted-by":"crossref","unstructured":"Yu, H., Wang, J., Huang, Z., Yang, Y., Xu, W.: Video paragraph captioning using hierarchical recurrent neural networks. In: CVPR, pp. 4584\u20134593 (2016)","DOI":"10.1109\/CVPR.2016.496"},{"issue":"11","key":"30_CR28","doi-asserted-by":"publisher","first-page":"5264","DOI":"10.1109\/TNNLS.2018.2797248","volume":"29","author":"L Zhu","year":"2018","unstructured":"Zhu, L., Huang, Z., Li, Z., Xie, L., Shen, H.T.: Exploring auxiliary context: discrete semantic transfer hashing for scalable image retrieval. IEEE Trans. Neural Networks Learn. Syst. 29(11), 5264\u20135276 (2018)","journal-title":"IEEE Trans. Neural Networks Learn. Syst."},{"issue":"11","key":"30_CR29","doi-asserted-by":"publisher","first-page":"5264","DOI":"10.1109\/TNNLS.2018.2797248","volume":"29","author":"L Zhu","year":"2018","unstructured":"Zhu, L., Huang, Z., Li, Z., Xie, L., Shen, H.T.: Exploring auxiliary context: discrete semantic transfer hashing for scalable image retrieval. IEEE Trans. Neural Netw. Learning Syst. 29(11), 5264\u20135276 (2018)","journal-title":"IEEE Trans. Neural Netw. Learning Syst."}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition. ICPR International Workshops and Challenges"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-68780-9_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,2,24]],"date-time":"2021-02-24T17:38:04Z","timestamp":1614188284000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-68780-9_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030687793","9783030687809"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-68780-9_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"25 February 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 January 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 January 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ICPR2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.icpr2020.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}