{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:30:32Z","timestamp":1750221032835,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,10,15]],"date-time":"2018-10-15T00:00:00Z","timestamp":1539561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100011002","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61631017"],"award-info":[{"award-number":["61631017"]}],"id":[{"id":"10.13039\/501100011002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,10,15]]},"DOI":"10.1145\/3240508.3240695","type":"proceedings-article","created":{"date-parts":[[2018,10,18]],"date-time":"2018-10-18T17:52:08Z","timestamp":1539885128000},"page":"1435-1443","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":13,"title":["Paragraph Generation Network with Visual Relationship Detection"],"prefix":"10.1145","author":[{"given":"Wenbin","family":"Che","sequence":"first","affiliation":[{"name":"Harbin Institute of Technology, Harbin, China"}]},{"given":"Xiaopeng","family":"Fan","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Harbin, China"}]},{"given":"Ruiqin","family":"Xiong","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"given":"Debin","family":"Zhao","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Harbin, China"}]}],"member":"320","published-online":{"date-parts":[[2018,10,15]]},"reference":[{"volume-title":"SCA-CNN: Spatial and Channel-Wise Attention in Convolutional Networks for Image Captioning. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 6298--6306","year":"2017","author":"Chen Long","key":"e_1_3_2_1_1_1"},{"volume-title":"Detecting Visual Relationships With Deep Relational Networks. In The IEEE Conference on Computer Vision and Pattern Recognition(CVPR). 3298--3308","year":"2017","author":"Dai Bo","key":"e_1_3_2_1_2_1"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3348"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2599174"},{"key":"e_1_3_2_1_5_1","unstructured":"Andrea Frome Gregory S Corrado Jonathon Shlens Samy Bengio Jeffrey Dean Marcaurelio Ranzato and Tomas Mikolov. 2013. DeViSE: A Deep Visual-Semantic Embedding Model. neural information processing systems (2013) 2121--2129.   Andrea Frome Gregory S Corrado Jonathon Shlens Samy Bengio Jeffrey Dean Marcaurelio Ranzato and Tomas Mikolov. 2013. DeViSE: A Deep Visual-Semantic Embedding Model. neural information processing systems (2013) 2121--2129."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-008-0140-x"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2009.83"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.5555\/2566972.2566993"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Justin Johnson Andrej Karpathy and Li Fei-Fei. 2016. DenseCap: Fully Convolutional Localization Networks for Dense Captioning. In Computer Vision and Pattern Recognition. 4565--4574.  Justin Johnson Andrej Karpathy and Li Fei-Fei. 2016. DenseCap: Fully Convolutional Localization Networks for Dense Captioning. In Computer Vision and Pattern Recognition. 4565--4574.","DOI":"10.1109\/CVPR.2016.494"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2598339"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.356"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"volume-title":"Visual Relationship Detection with Language Priors. European Conference on Computer Vision","year":"2016","author":"Lu Cewu","key":"e_1_3_2_1_13_1"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.345"},{"key":"e_1_3_2_1_15_1","unstructured":"Junhua Mao Wei Xu Yi Yang Jiang Wang and Alan L. Yuille. 2014. Explain Images with Multimodal Recurrent Neural Networks. Computer Science (2014).  Junhua Mao Wei Xu Yi Yang Jiang Wang and Alan L. Yuille. 2014. Explain Images with Multimodal Recurrent Neural Networks. Computer Science (2014)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"volume-title":"Areas of Attention for Image Captioning. In The IEEE International Conference on Computer Vision (ICCV). 1251--1259","year":"2017","author":"Pedersoli Marco","key":"e_1_3_2_1_17_1"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"e_1_3_2_1_19_1","unstructured":"Lin Tsung-Yi Maire Michael Belongie Serge Bourdev Lubomir Girshick Ross Hays James Perona Pietro Ramanan Deva Zitnick C. Lawrence and Dollar Piotr. 2015. Microsoft COCO: Common Objects in Context. https:\/\/arxiv.org\/pdf\/1405.0312.pdf  Lin Tsung-Yi Maire Michael Belongie Serge Bourdev Lubomir Girshick Ross Hays James Perona Pietro Ramanan Deva Zitnick C. Lawrence and Dollar Piotr. 2015. Microsoft COCO: Common Objects in Context. https:\/\/arxiv.org\/pdf\/1405.0312.pdf"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Ramakrishna Vedantam C Lawrence Zitnick and Devi Parikh. 2015. CIDEr: Consensus-based image description evaluation. computer vision and pattern recognition (2015) 4566--4575.  Ramakrishna Vedantam C Lawrence Zitnick and Devi Parikh. 2015. CIDEr: Consensus-based image description evaluation. computer vision and pattern recognition (2015) 4566--4575.","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Oriol Vinyals Alexander Toshev Samy Bengio and Dumitru Erhan. 2015. Show and tell: A neural image caption generator. In Computer Vision and Pattern Recognition . 3156--3164.  Oriol Vinyals Alexander Toshev Samy Bengio and Dumitru Erhan. 2015. Show and tell: A neural image caption generator. In Computer Vision and Pattern Recognition . 3156--3164.","DOI":"10.1109\/CVPR.2015.7298935"},{"volume-title":"Dense Captioning With Joint Inference and Visual Context. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 1978--1987","year":"2017","author":"Yang Linjie","key":"e_1_3_2_1_22_1"},{"volume-title":"Visual Translation Embedding Network for Visual Relation Detection. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) . 3107--3115","year":"2017","author":"Zhang Hanwang","key":"e_1_3_2_1_23_1"},{"volume-title":"Relationship Proposal Networks. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 5226--5234","year":"2017","author":"Zhang Ji","key":"e_1_3_2_1_24_1"}],"event":{"name":"MM '18: ACM Multimedia Conference","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Seoul Republic of Korea","acronym":"MM '18"},"container-title":["Proceedings of the 26th ACM international conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3240508.3240695","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3240508.3240695","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T00:43:31Z","timestamp":1750207411000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3240508.3240695"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,10,15]]},"references-count":24,"alternative-id":["10.1145\/3240508.3240695","10.1145\/3240508"],"URL":"https:\/\/doi.org\/10.1145\/3240508.3240695","relation":{},"subject":[],"published":{"date-parts":[[2018,10,15]]},"assertion":[{"value":"2018-10-15","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}