{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T14:39:36Z","timestamp":1742999976846,"version":"3.40.3"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030339814"},{"type":"electronic","value":"9783030339821"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-33982-1_9","type":"book-chapter","created":{"date-parts":[[2019,10,31]],"date-time":"2019-10-31T13:27:48Z","timestamp":1572528468000},"page":"105-115","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Boundary Detector Encoder and Decoder with Soft Attention for Video Captioning"],"prefix":"10.1007","author":[{"given":"Tangming","family":"Chen","sequence":"first","affiliation":[]},{"given":"Qike","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Jingkuan","family":"Song","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,11,1]]},"reference":[{"key":"9_CR1","unstructured":"Banerjee, S., Lavie, A.: METEOR: an automatic metric for MT evaluation with improved correlation with human judgments. In: Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization, pp. 65\u201372 (2005)"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Baraldi, L., Grana, C., Cucchiara, R.: Hierarchical boundary-aware neural encoder for video captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1657\u20131666 (2017)","DOI":"10.1109\/CVPR.2017.339"},{"key":"9_CR3","unstructured":"Barbu, A., et al.: Video in sentences out. arXiv preprint \n                      arXiv:1204.2742\n                      \n                     (2012)"},{"key":"9_CR4","unstructured":"Bengio, S., Vinyals, O., Jaitly, N., Shazeer, N.: Scheduled sampling for sequence prediction with recurrent neural networks. In: Advances in Neural Information Processing Systems, pp. 1171\u20131179 (2015)"},{"key":"9_CR5","unstructured":"Bengio, Y., L\u00e9onard, N., Courville, A.: Estimating or propagating gradients through stochastic neurons for conditional computation. arXiv preprint \n                      arXiv:1308.3432\n                      \n                     (2013)"},{"key":"9_CR6","unstructured":"Chen, D.L., Dolan, W.B.: Collecting highly parallel data for paraphrase evaluation. In: Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies, vol. 1, pp. 190\u2013200 (2011)"},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Fang, H., et al.: From captions to visual concepts and back. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1473\u20131482 (2015)","DOI":"10.1109\/CVPR.2015.7298754"},{"issue":"9","key":"9_CR8","doi-asserted-by":"publisher","first-page":"2045","DOI":"10.1109\/TMM.2017.2729019","volume":"19","author":"L Gao","year":"2017","unstructured":"Gao, L., Guo, Z., Zhang, H., Xu, X., Shen, H.T.: Video captioning with attention-based LSTM and semantic consistency. IEEE Trans. Multimed. 19(9), 2045\u20132055 (2017)","journal-title":"IEEE Trans. Multimed."},{"key":"9_CR9","doi-asserted-by":"crossref","unstructured":"Gers, F.A., Schmidhuber, J., Cummins, F.: Learning to forget: continual prediction with LSTM (1999)","DOI":"10.1049\/cp:19991218"},{"key":"9_CR10","doi-asserted-by":"crossref","unstructured":"Graves, A., Mohamed, A., Hinton, G.: Speech recognition with deep recurrent neural networks. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 6645\u20136649 (2013)","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"Guadarrama, S., et al.: Youtube2text: recognizing and describing arbitrary activities using semantic hierarchies and zero-shot recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2712\u20132719 (2013)","DOI":"10.1109\/ICCV.2013.337"},{"issue":"8","key":"9_CR12","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"9_CR13","unstructured":"Khan, M.U.G., Gotoh, Y.: Describing video contents in natural language. In: Proceedings of the Workshop on Innovative Hybrid Approaches to the Processing of Textual Data, pp. 27\u201335 (2012)"},{"key":"9_CR14","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.-J.: BLEU: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting on Association for Computational Linguistics, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"9_CR15","unstructured":"Raiko, T., Berglund, M., Alain, G., Dinh, L.: Techniques for learning binary stochastic feedforward neural networks. arXiv preprint \n                      arXiv:1406.2989\n                      \n                     (2014)"},{"issue":"1","key":"9_CR16","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1007\/s11263-016-0987-1","volume":"123","author":"A Rohrbach","year":"2017","unstructured":"Rohrbach, A., et al.: Movie description. Int. J. Comput. Vis. 123(1), 94\u2013120 (2017)","journal-title":"Int. J. Comput. Vis."},{"issue":"3","key":"9_CR17","doi-asserted-by":"publisher","first-page":"757","DOI":"10.1162\/neco.2007.19.3.757","volume":"19","author":"J Schmidhuber","year":"2007","unstructured":"Schmidhuber, J., Wierstra, D., Gagliolo, M., Gomez, F.: Training recurrent networks by evolino. Neural Comput. 19(3), 757\u2013779 (2007)","journal-title":"Neural Comput."},{"key":"9_CR18","doi-asserted-by":"crossref","unstructured":"Song, J., Guo, Z., Gao, L., Liu, W., Zhang, D., Shen, H.T.: Hierarchical LSTM with adjusted temporal attention for video captioning. arXiv preprint \n                      arXiv:1706.01231\n                      \n                     (2017)","DOI":"10.24963\/ijcai.2017\/381"},{"key":"9_CR19","unstructured":"Sutskever, I., Vinyals, O., Le, Q.V.: Sequence to sequence learning with neural networks. In: Advances in Neural Information Processing Systems, pp. 3104\u20133112 (2014)"},{"key":"9_CR20","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Lawrence Zitnick, C., Parikh, D.: CIDEr: consensus-based image description evaluation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4566\u20134575 (2015)","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"9_CR21","doi-asserted-by":"crossref","unstructured":"Venugopalan, S., Rohrbach, M., Donahue, J., Mooney, R., Darrell, T., Saenko, K.: Sequence to sequence-video to text. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4534\u20134542 (2015)","DOI":"10.1109\/ICCV.2015.515"},{"key":"9_CR22","doi-asserted-by":"crossref","unstructured":"Venugopalan, S., Xu, H., Donahue, J., Rohrbach, M., Mooney, R., Saenko, K.: Translating videos to natural language using deep recurrent neural networks. arXiv preprint \n                      arXiv:1412.4729\n                      \n                     (2014)","DOI":"10.3115\/v1\/N15-1173"},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Wang, J., Wang, W., Huang, Y., Wang, L., Tan, T.: M3: multimodal memory modelling for video captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7512\u20137520 (2018)","DOI":"10.1109\/CVPR.2018.00784"},{"key":"9_CR24","doi-asserted-by":"crossref","unstructured":"Xu, J., Mei, T., Yao, T., Rui, Y.: MSR-VTT: a large video description dataset for bridging video and language. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5288\u20135296 (2016)","DOI":"10.1109\/CVPR.2016.571"},{"key":"9_CR25","doi-asserted-by":"crossref","unstructured":"Yao, L., et al.: Describing videos by exploiting temporal structure. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4507\u20134515 (2015)","DOI":"10.1109\/ICCV.2015.512"}],"container-title":["Lecture Notes in Computer Science","Web and Big Data"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-33982-1_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T06:40:59Z","timestamp":1572590459000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-33982-1_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030339814","9783030339821"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-33982-1_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"1 November 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"APWeb-WAIM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asia-Pacific Web (APWeb) and Web-Age Information Management (WAIM) Joint International Conference on Web and Big Data","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chengdu","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 August 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 August 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"apwebwaim2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/cfm.uestc.edu.cn\/apwebwaim2019\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Research Microsoft","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"180","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"42","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"23% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}