{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:56:25Z","timestamp":1781538985822,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810805","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1787-1792","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["On the Effectiveness of Integration Methods for Multimodal Dialogue Response Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3398-0144","authenticated-orcid":false,"given":"Seongbo","family":"Jang","sequence":"first","affiliation":[{"name":"Myongji University, Seoul, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0039-8920","authenticated-orcid":false,"given":"Seonghyeon","family":"Lee","sequence":"additional","affiliation":[{"name":"Kyungpook National University, Daegu, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2173-3476","authenticated-orcid":false,"given":"Dongha","family":"Lee","sequence":"additional","affiliation":[{"name":"Yonsei University, Seoul, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7510-0255","authenticated-orcid":false,"given":"Hwanjo","family":"Yu","sequence":"additional","affiliation":[{"name":"Pohang University of Science and Technology, Pohang, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","series-title":"(NIPS \u201921)","volume-title":"Proceedings of the 35th International Conference on Neural Information Processing Systems","author":"Akbari Hassan","year":"2021","unstructured":"Hassan Akbari, Liangzhe Yuan, Rui Qian, Wei-Hong Chuang, Shih-Fu Chang, Yin Cui, and Boqing Gong. 2021. VATT: transformers for multimodal self-supervised learning from raw video, audio and text. In Proceedings of the 35th International Conference on Neural Information Processing Systems(NIPS \u201921). Curran Associates Inc., Red Hook, NY, USA, Article 1853, 16\u00a0pages. https:\/\/dl.acm.org\/doi\/10.5555\/3540261.3542114"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.334"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.121"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1423"},{"key":"e_1_3_3_1_7_2","volume-title":"International Conference on Learning Representations","author":"Dinan Emily","year":"2019","unstructured":"Emily Dinan, Stephen Roller, Kurt Shuster, Angela Fan, Michael Auli, and Jason Weston. 2019. Wizard of Wikipedia: Knowledge-Powered Conversational Agents. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=r1l73iRqKm"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412330"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_1_10_2","unstructured":"Matthew Henderson Rami Al-Rfou Brian Strope Yun-Hsuan Sung L\u00e1szl\u00f3 Luk\u00e1cs Ruiqi Guo Sanjiv Kumar Balint Miklos and Ray Kurzweil. 2017. Efficient natural language response suggestion for smart reply. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1705.00652 (2017). https:\/\/arxiv.org\/abs\/1705.00652"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1536"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v8i1.14578"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/2505515.2505665"},{"key":"e_1_3_3_1_14_2","volume-title":"International Conference on Learning Representations","author":"Humeau Samuel","year":"2020","unstructured":"Samuel Humeau, Kurt Shuster, Marie-Anne Lachaux, and Jason Weston. 2020. Poly-encoders: Architectures and Pre-training Strategies for Fast and Accurate Multi-sentence Scoring. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SkxgnnNFvH"},{"key":"e_1_3_3_1_15_2","volume-title":"ICLR (Poster)","author":"Kingma Diederik\u00a0P","year":"2015","unstructured":"Diederik\u00a0P Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In ICLR (Poster). https:\/\/arxiv.org\/abs\/1412.6980"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","unstructured":"Tian Lan Deng Cai Yan Wang Yixuan Su Heyan Huang and Xian-Ling Mao. 2024. Exploring Dense Retrieval for Dialogue Response Selection. ACM Trans. Inf. Syst. 42 3 Article 84 (Jan. 2024) 29\u00a0pages. 10.1145\/3632750","DOI":"10.1145\/3632750"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-short.113"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00475"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W15-4640"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1298"},{"key":"e_1_3_3_1_22_2","first-page":"462","volume-title":"Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)","author":"Mostafazadeh Nasrin","year":"2017","unstructured":"Nasrin Mostafazadeh, Chris Brockett, Bill Dolan, Michel Galley, Jianfeng Gao, Georgios Spithourakis, and Lucy Vanderwende. 2017. Image-Grounded Conversations: Multimodal Context for Natural Question and Response Generation. In Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers). Asian Federation of Natural Language Processing, Taipei, Taiwan, 462\u2013472. https:\/\/aclanthology.org\/I17-1047"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.303"},{"key":"e_1_3_3_1_24_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748\u20138763. https:\/\/proceedings.mlr.press\/v139\/radford21a\/radford21a.pdf"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.24"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.219"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.222"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.398"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.204"},{"key":"e_1_3_3_1_30_2","unstructured":"Iulia Turc Ming-Wei Chang Kenton Lee and Kristina Toutanova. 2019. Well-read students learn better: On the importance of pre-training compact models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1908.08962 (2019). https:\/\/arxiv.org\/abs\/1908.08962"},{"key":"e_1_3_3_1_31_2","first-page":"5998","volume-title":"Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in neural information processing systems. 5998\u20136008. https:\/\/papers.nips.cc\/paper\/2017\/hash\/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html"},{"key":"e_1_3_3_1_32_2","first-page":"1585","volume-title":"INTERSPEECH","author":"Whang Taesun","year":"2020","unstructured":"Taesun Whang, Dongyub Lee, Chanhee Lee, Kisu Yang, Dongsuk Oh, and Heuiseok Lim. 2020. An Effective Domain Adaptive Post-Training Method for BERT in Response Selection.. In INTERSPEECH. 1585\u20131589. https:\/\/arxiv.org\/abs\/1908.04812"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.66"},{"key":"e_1_3_3_1_34_2","volume-title":"Thirty-Fifth Conference on Neural Information Processing Systems","author":"Wu Ruihan","year":"2021","unstructured":"Ruihan Wu, Chuan Guo, Awni Hannun, and Laurens van\u00a0der Maaten. 2021. Fixes That Fail: Self-Defeating Improvements in Machine-Learning Systems. In Thirty-Fifth Conference on Neural Information Processing Systems. https:\/\/openreview.net\/forum?id=xZvuqfT6Otj"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"crossref","unstructured":"Yu Wu Zhoujun Li Wei Wu and Ming Zhou. 2018. Response selection with topic clues for retrieval-based chatbots. Neurocomputing 316 (2018) 251\u2013261. https:\/\/www.sciencedirect.com\/science\/article\/abs\/pii\/S0925231218309093","DOI":"10.1016\/j.neucom.2018.07.073"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"crossref","unstructured":"Ruijian Xu Chongyang Tao Daxin Jiang Xueliang Zhao Dongyan Zhao and Rui Yan. 2021. Learning an Effective Context-Response Matching Model with Self-Supervised Tasks for Retrieval-based Dialogues. Proceedings of the AAAI Conference on Artificial Intelligence 35 16 (May 2021) 14158\u201314166. https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/17666","DOI":"10.1609\/aaai.v35i16.17666"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i16.17668"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/2911451.2911542"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"crossref","unstructured":"Zhao Yan Nan Duan Junwei Bao Peng Chen Ming Zhou and Zhoujun Li. 2018. Response selection from unstructured documents for human-computer conversation systems. Knowledge-Based Systems 142 (2018) 149\u2013159. https:\/\/www.sciencedirect.com\/science\/article\/abs\/pii\/S0950705117305646","DOI":"10.1016\/j.knosys.2017.11.033"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-3022"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.479"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00688"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1205"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1036"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:30:38Z","timestamp":1781537438000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810805"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":44,"alternative-id":["10.1145\/3805622.3810805","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810805","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}