{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T20:41:41Z","timestamp":1768596101078,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":11,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,8,7]],"date-time":"2017-08-07T00:00:00Z","timestamp":1502064000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Fund of China","award":["61572140"],"award-info":[{"award-number":["61572140"]}]},{"name":"National Natural Science Fund of China","award":["61672165"],"award-info":[{"award-number":["61672165"]}]},{"name":"The Application of Big Data Computing Platform in Smart Lingang New City based BIM and GIS","award":["ZN2016020103"],"award-info":[{"award-number":["ZN2016020103"]}]},{"name":"Shanghai Municipal Science and Technology Commission","award":["16JC1420401"],"award-info":[{"award-number":["16JC1420401"]}]},{"name":"Shanghai Municipal Science and Technology Commission","award":["16511104704"],"award-info":[{"award-number":["16511104704"]}]},{"name":"Shanghai Municipal Science and Technology Commission","award":["16511105402"],"award-info":[{"award-number":["16511105402"]}]},{"name":"Shanghai Municipality Program of Technology Research Leader","award":["17XD1425000"],"award-info":[{"award-number":["17XD1425000"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,8,7]]},"DOI":"10.1145\/3077136.3080671","type":"proceedings-article","created":{"date-parts":[[2017,7,28]],"date-time":"2017-07-28T19:35:01Z","timestamp":1501270501000},"page":"889-892","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":18,"title":["A Hierarchical Multimodal Attention-based Neural Network for Image Captioning"],"prefix":"10.1145","author":[{"given":"Yong","family":"Cheng","sequence":"first","affiliation":[{"name":"Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fei","family":"Huang","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lian","family":"Zhou","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cheng","family":"Jin","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuejie","family":"Zhang","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai University of Finance and Economics, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2017,8,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995466"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"e_1_3_2_1_5_1","first-page":"595","author":"Kiros R.","year":"2014","unstructured":"Kiros , R. , Salakhutdinov , R. , and Zemel , R. , \" Multimodal neural language models,\" ICML 2014 , pp. 595 -- 603 , 2014. Kiros, R., Salakhutdinov, R., and Zemel, R., \"Multimodal neural language models,\" ICML 2014, pp. 595--603, 2014.","journal-title":"ICML"},{"key":"e_1_3_2_1_6_1","volume-title":"Deep captioning with multimodal Recurrent Neural Networks (m-RNN),\" arXiv:1410.1090","author":"Mao J. H.","year":"2014","unstructured":"Mao , J. H. , Xu , W. , and Yuille , A. , \" Deep captioning with multimodal Recurrent Neural Networks (m-RNN),\" arXiv:1410.1090 , 2014 . Mao, J. H., Xu, W., and Yuille, A., \"Deep captioning with multimodal Recurrent Neural Networks (m-RNN),\" arXiv:1410.1090, 2014."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"e_1_3_2_1_8_1","first-page":"2048","author":"Xu K.","year":"2015","unstructured":"Xu , K. , Ba , J. L. , and Bengio , Y. \" Show , attend and tell: Neural image caption generation with visual attention,\" ICML 2015 , pp. 2048 -- 2057 , 2015. Xu, K., Ba, J. L., and Bengio, Y. \"Show, attend and tell: Neural image caption generation with visual attention,\" ICML 2015, pp. 2048--2057, 2015.","journal-title":"ICML"},{"key":"e_1_3_2_1_9_1","volume-title":"Image captioning with semantic attention,\" arXiv:1603.03925","author":"You Q. Z.","year":"2016","unstructured":"You , Q. Z. , Jin , H. L. , Wang , Z. W. , Fang , C. , and Luo , J. B. , \" Image captioning with semantic attention,\" arXiv:1603.03925 , 2016 . You, Q. Z., Jin, H. L., Wang, Z. W., Fang, C., and Luo, J. B., \"Image captioning with semantic attention,\" arXiv:1603.03925, 2016."},{"key":"e_1_3_2_1_10_1","volume-title":"P., and Zitnick, C. L., \"Microsoft COCO captions: Data collection and evaluation server,\" arXiv:1504.00325","author":"Chen X. L.","year":"2015","unstructured":"Chen , X. L. , Fang , H. , Lin , T. Y. , Vedantam , R. , Gupta , S. , Dollar , P., and Zitnick, C. L., \"Microsoft COCO captions: Data collection and evaluation server,\" arXiv:1504.00325 , 2015 . Chen, X. L., Fang, H., Lin, T. Y., Vedantam, R., Gupta, S., Dollar, P., and Zitnick, C. L., \"Microsoft COCO captions: Data collection and evaluation server,\" arXiv:1504.00325, 2015."},{"key":"e_1_3_2_1_11_1","first-page":"2015","author":"Jia X.","year":"2015","unstructured":"Jia , X. , Gavves , E. , and Tuytelaars , T. , \" Guiding long-short term memory for image caption generation,\" ICCV 2015 , 2015 . Jia, X., Gavves, E., and Tuytelaars, T., \"Guiding long-short term memory for image caption generation,\" ICCV 2015, 2015.","journal-title":"ICCV"}],"event":{"name":"SIGIR '17: The 40th International ACM SIGIR conference on research and development in Information Retrieval","location":"Shinjuku Tokyo Japan","acronym":"SIGIR '17","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3077136.3080671","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3077136.3080671","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T03:37:12Z","timestamp":1750217832000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3077136.3080671"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,8,7]]},"references-count":11,"alternative-id":["10.1145\/3077136.3080671","10.1145\/3077136"],"URL":"https:\/\/doi.org\/10.1145\/3077136.3080671","relation":{},"subject":[],"published":{"date-parts":[[2017,8,7]]},"assertion":[{"value":"2017-08-07","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}