{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T06:56:41Z","timestamp":1763535401474,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":21,"publisher":"ACM","license":[{"start":{"date-parts":[[2016,10,1]],"date-time":"2016-10-01T00:00:00Z","timestamp":1475280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Key Research and Development Plan","award":["2016YFB1001202"],"award-info":[{"award-number":["2016YFB1001202"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,10]]},"DOI":"10.1145\/2964284.2984065","type":"proceedings-article","created":{"date-parts":[[2016,9,29]],"date-time":"2016-09-29T19:17:32Z","timestamp":1475176652000},"page":"1087-1091","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":75,"title":["Describing Videos using Multi-modal Fusion"],"prefix":"10.1145","author":[{"given":"Qin","family":"Jin","sequence":"first","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jia","family":"Chen","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shizhe","family":"Chen","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yifan","family":"Xiong","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexander","family":"Hauptmann","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2016,10]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_2_1_2_1","first-page":"2048","volume-title":"Proceedings of the 32nd International Conference on Machine Learning, ICML 2015","author":"Xu Kelvin","year":"2015"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.515"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.512"},{"volume-title":"Hierarchical recurrent neural encoder for video representation with application to captioning. CoRR, abs\/1511.03476","year":"2015","author":"Pan Pingbo","key":"e_1_3_2_1_5_1"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2911996.2912043"},{"volume-title":"Jointly modeling embedding and translation to bridge video and language. CoRR, abs\/1505.01861","year":"2015","author":"Pan Yingwei","key":"e_1_3_2_1_7_1"},{"volume-title":"Video paragraph captioning using hierarchical recurrent neural networks. CoRR, abs\/1510.07712","year":"2015","author":"Yu Haonan","key":"e_1_3_2_1_8_1"},{"volume-title":"Very deep convolutional networks for large-scale image recognition. CoRR, abs\/1409.1556","year":"2014","author":"Simonyan K.","key":"e_1_3_2_1_9_1"},{"volume-title":"C3d: generic features for video analysis. CoRR, abs\/1412.0767, 2:7","year":"2014","author":"Tran Du","key":"e_1_3_2_1_10_1"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.441"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1980.1163420"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853821"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-013-0636-x"},{"key":"e_1_3_2_1_15_1","first-page":"3111","volume-title":"Advances in neural information processing systems","author":"Mikolov Tomas","year":"2013"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.571"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3348"},{"volume-title":"Text summarization branches out: Proceedings of the ACL-04 workshop","year":"2004","author":"Lin Chin-Yew","key":"e_1_3_2_1_20_1"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"}],"event":{"name":"MM '16: ACM Multimedia Conference","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Amsterdam The Netherlands","acronym":"MM '16"},"container-title":["Proceedings of the 24th ACM international conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2964284.2984065","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2964284.2984065","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T03:40:00Z","timestamp":1750218000000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2964284.2984065"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,10]]},"references-count":21,"alternative-id":["10.1145\/2964284.2984065","10.1145\/2964284"],"URL":"https:\/\/doi.org\/10.1145\/2964284.2984065","relation":{},"subject":[],"published":{"date-parts":[[2016,10]]},"assertion":[{"value":"2016-10-01","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}