{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:16:32Z","timestamp":1750220192138,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":18,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,9,23]],"date-time":"2022-09-23T00:00:00Z","timestamp":1663891200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,9,23]]},"DOI":"10.1145\/3573942.3574091","type":"proceedings-article","created":{"date-parts":[[2023,5,16]],"date-time":"2023-05-16T23:45:42Z","timestamp":1684280742000},"page":"753-760","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Multimodal Dialogue Generation Based on Transformer and Collaborative Attention"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5832-2027","authenticated-orcid":false,"given":"Wei","family":"Guan","sequence":"first","affiliation":[{"name":"Xi'an University of Posts and Telecommunications, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3697-2605","authenticated-orcid":false,"given":"Zhen","family":"Zhang","sequence":"additional","affiliation":[{"name":"Xi'an University of Posts and Telecommunications, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4359-0050","authenticated-orcid":false,"given":"Li","family":"Ma","sequence":"additional","affiliation":[{"name":"Xi'an University of Posts and Telecommunications, China"}]}],"member":"320","published-online":{"date-parts":[[2023,5,16]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"1862","article-title":"A review of research on task-based dialogue systems [J]","volume":"2020","author":"Yangyang Zhao","unstructured":"Zhao Yangyang, Wang Zhenyu, Wang Pei, Yang Tian, Zhang Rui, Yin Kai. A review of research on task-based dialogue systems [J]. Journal of Computers,2020,43(10):1862-1896.(in Chinese)","journal-title":"Journal of Computers"},{"key":"e_1_3_2_1_2_1","first-page":"1137","article-title":"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks[J]","volume":"2017","author":"Ren","unstructured":"Ren S, He K, Girshick R, and Sun J. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks[J]. IEEE Transactions on Pattern Analysis and Machine Intelligence,2017:39(6)1137-1149.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Yu Z Yu J Cui Y Deep modular co-attention networks for visual question answering[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2019: 6281-6290.","DOI":"10.1109\/CVPR.2019.00644"},{"key":"e_1_3_2_1_4_1","volume-title":"Open-Domain Dialogue Generation Dataset with Visual Contexts[J].","author":"Wang S","year":"2021","unstructured":"Wang S , Meng Y , Li X , OpenViDial 2.0: A Larger-Scale, Open-Domain Dialogue Generation Dataset with Visual Contexts[J]. 2021."},{"key":"e_1_3_2_1_5_1","volume-title":"Modeling Text-visual Mutual Dependency for Multi-modal Dialog Generation[J]","author":"Wang","year":"2021","unstructured":"Wang S, Meng Y, Sun X, Modeling Text-visual Mutual Dependency for Multi-modal Dialog Generation[J]. 2021."},{"key":"e_1_3_2_1_6_1","volume-title":"Emotional Chatting Machine: Emotional Conversation Generation with Internal and External Memory[J]","author":"Zhou H","year":"2017","unstructured":"Zhou H , Huang M , Zhang T , Emotional Chatting Machine: Emotional Conversation Generation with Internal and External Memory[J]. 2017."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2018.09.006"},{"key":"e_1_3_2_1_8_1","volume-title":"Linjie Li, Jingjing Liu, and Jianfeng Gao. Multi-step reasoning via recurrent dual attention for visual dialog. arXiv preprint arXiv:1902.00579","author":"Gan Zhe","year":"2019","unstructured":"Zhe Gan, Yu Cheng, Ahmed El Kholy, Linjie Li, Jingjing Liu, and Jianfeng Gao. Multi-step reasoning via recurrent dual attention for visual dialog. arXiv preprint arXiv:1902.00579, 2019."},{"key":"e_1_3_2_1_9_1","volume-title":"Dual attention networks for visual reference resolution in visual dialog. arXiv preprint arXiv:1902.09368","author":"Kang Gi-Cheon","year":"2019","unstructured":"Gi-Cheon Kang, Jaeseo Lim, and Byoung-Tak Zhang. Dual attention networks for visual reference resolution in visual dialog. arXiv preprint arXiv:1902.09368, 2019."},{"key":"e_1_3_2_1_10_1","volume-title":"Large-scale adver sarial training for vision-and-language representation learning. arXiv preprint arXiv:2006.06195","author":"Gan Zhe","year":"2020","unstructured":"Zhe Gan, Yen-Chun Chen, Linjie Li, Chen Zhu, Yu Cheng, and Jingjing Liu. Large-scale adver sarial training for vision-and-language representation learning. arXiv preprint arXiv:2006.06195, 2020."},{"key":"e_1_3_2_1_11_1","unstructured":"Jaemin Cho Jie Lei Hao Tan and Mohit Bansal. Unifying vision-and-language tasks via text generation.2021."},{"issue":"02","key":"e_1_3_2_1_12_1","first-page":"250","article-title":"Multi-module collaborative attention model for visual question answering [J]","volume":"48","author":"Pinrong Zou","year":"2022","unstructured":"Zou Pinrong, Xiao Feng, Zhang Wenjuan, Zhang Wanyu, Wang Chenyang. Multi-module collaborative attention model for visual question answering [J]. Computer Engineering, 2022,48(02):250-260.(in Chinese)","journal-title":"Computer Engineering"},{"key":"e_1_3_2_1_13_1","first-page":"137","volume-title":"European Conference on Computer Vision","author":"Li Xiujun","unstructured":"Xiujun Li, Xi Yin, Chunyuan Li, Pengchuan Zhang, Xiaowei Hu, Lei Zhang, Lijuan Wang, Houdong Hu, Li Dong, Furu Wei, Oscar: Object-semantics aligned pre-training for vision-language tasks. In European Conference on Computer Vision, pages 121\u2013137. Springer, 2020."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331226"},{"key":"e_1_3_2_1_15_1","volume-title":"Image chat: Engaging grounded conversations. arXiv preprint arXiv:1811.00945","author":"Shuster Kurt","year":"2018","unstructured":"Kurt Shuster, Samuel Humeau, Antoine Bordes, and Jason Weston. Image chat: Engaging grounded conversations. arXiv preprint arXiv:1811.00945, 2018."},{"key":"e_1_3_2_1_16_1","volume-title":"Da Ju, and Jason Weston. Multi-modal open-domain dialogue. arXiv preprint arXiv:2010.01082","author":"Shuster Kurt","year":"2020","unstructured":"Kurt Shuster, Eric Michael Smith, Da Ju, and Jason Weston. Multi-modal open-domain dialogue. arXiv preprint arXiv:2010.01082, 2020."},{"key":"e_1_3_2_1_17_1","volume-title":"Infusing Multi-Source Knowledge with Heterogeneous Graph Neural Network for Emotional Conversation Generation[J]","author":"Liang","year":"2020","unstructured":"Liang Y, Meng, Zhang Y , Infusing Multi-Source Knowledge with Heterogeneous Graph Neural Network for Emotional Conversation Generation[J]. 2020."},{"key":"e_1_3_2_1_18_1","volume-title":"Song Y","author":"Cai","year":"2020","unstructured":"Cai H, Chen H, Song Y , Data Manipulation: Towards Effective Instance Learning for Neural Dialogue Generation via Learning to Augment and Reweight[J]. 2020"}],"event":{"name":"AIPR 2022: 2022 5th International Conference on Artificial Intelligence and Pattern Recognition","acronym":"AIPR 2022","location":"Xiamen China"},"container-title":["Proceedings of the 2022 5th International Conference on Artificial Intelligence and Pattern Recognition"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3573942.3574091","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3573942.3574091","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:32Z","timestamp":1750186952000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3573942.3574091"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9,23]]},"references-count":18,"alternative-id":["10.1145\/3573942.3574091","10.1145\/3573942"],"URL":"https:\/\/doi.org\/10.1145\/3573942.3574091","relation":{},"subject":[],"published":{"date-parts":[[2022,9,23]]},"assertion":[{"value":"2023-05-16","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}