{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T08:02:58Z","timestamp":1761897778015,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031171192"},{"type":"electronic","value":"9783031171208"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-17120-8_58","type":"book-chapter","created":{"date-parts":[[2022,9,23]],"date-time":"2022-09-23T13:02:58Z","timestamp":1663938178000},"page":"749-761","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["MCIC: Multimodal Conversational Intent Classification for\u00a0E-commerce Customer Service"],"prefix":"10.1007","author":[{"given":"Shaozu","family":"Yuan","sequence":"first","affiliation":[]},{"given":"Xin","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Yuming","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Hang","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Zhiling","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Ruixue","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Meng","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,9,24]]},"reference":[{"key":"58_CR1","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1007\/978-3-030-60450-9_6","volume-title":"Natural Language Processing and Chinese Computing","author":"R Liu","year":"2020","unstructured":"Liu, R., Chen, M., Liu, H., Shen, L., Song, Y., He, X.: Enhancing multi-turn dialogue modeling with intent information for E-commerce customer service. In: Zhu, X., Zhang, M., Hong, Yu., He, R. (eds.) NLPCC 2020. LNCS (LNAI), vol. 12430, pp. 65\u201377. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-60450-9_6"},{"key":"58_CR2","unstructured":"Chen, M., et al.: The jddc corpus: a large-scale multi-turn Chinese dialogue dataset for e-commerce customer service. In: Proceedings of LREC 2022 (2020)"},{"key":"58_CR3","doi-asserted-by":"crossref","unstructured":"Liao, L., Ma, Y., He, X., Hong, R., Chua, T.: Knowledge-aware multimodal dialogue systems. In: Proceedings of ACM MM 2018 (2018)","DOI":"10.1145\/3240508.3240605"},{"key":"58_CR4","doi-asserted-by":"crossref","unstructured":"Das, A., et al.: Visual dialog. In: Proceedings of CVPR 2017 (2017)","DOI":"10.1109\/CVPR.2017.121"},{"key":"58_CR5","doi-asserted-by":"crossref","unstructured":"Cai, Y., Cai, H., Wan, X.: Multi-modal sarcasm detection in twitter with hierarchical fusion model. In: Proceedings of ACL 2019 (2019)","DOI":"10.18653\/v1\/P19-1239"},{"key":"58_CR6","doi-asserted-by":"crossref","unstructured":"Antol, S., et al.: Vqa: visual question answering. In: Proceedings of ICCV 2015 (2015)","DOI":"10.1109\/ICCV.2015.279"},{"key":"58_CR7","doi-asserted-by":"crossref","unstructured":"Cadene, R., Ben-Younes, H., Cord, M., Thome, N.: Murel: multimodal relational reasoning for visual question answering. In: Proceedings of CVPR 2019 (2019)","DOI":"10.1109\/CVPR.2019.00209"},{"key":"58_CR8","doi-asserted-by":"crossref","unstructured":"Zhou, X., Yao, C., Wen, H., Wang, Y., Zhou, S., He, W., Liang, J.: East: an efficient and accurate scene text detector. In: Proceedings of CVPR 2017 (2017)","DOI":"10.1109\/CVPR.2017.283"},{"key":"58_CR9","doi-asserted-by":"crossref","unstructured":"Gupta, A., Vedaldi, A., Zisserman, A.: Synthetic data for text localisation in natural images. In: Proceedings of CVPR 2016 (2016)","DOI":"10.1109\/CVPR.2016.254"},{"issue":"11","key":"58_CR10","doi-asserted-by":"publisher","first-page":"2298","DOI":"10.1109\/TPAMI.2016.2646371","volume":"39","author":"B Shi","year":"2016","unstructured":"Shi, B., Bai, X., Yao, C.: An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE Trans. Pattern Anal. Mach. Intell. 39(11), 2298\u20132304 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"58_CR11","unstructured":"Kingma, D., Ba, J.: Adam: a method for stochastic optimization. In: Proceedings of ICLR 2015 (2015)"},{"key":"58_CR12","unstructured":"Mostafazadeh, N., Brockett, C., Dolan, B., Galley, M., Gao, J., Spithourakis, G., Vanderwende, L.: Image-grounded conversations: Multimodal context for natural question and response generation. In: Proceedings of IJCNLP 2017 (2017)"},{"key":"58_CR13","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of CVPR 2016 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"58_CR14","doi-asserted-by":"crossref","unstructured":"Shuster, K., Humeau, S., Bordes, A., Weston, J.: Image chat: engaging grounded conversations. In: Proceedings of ACL 2020 (2020)","DOI":"10.18653\/v1\/2020.acl-main.219"},{"key":"58_CR15","doi-asserted-by":"crossref","unstructured":"Kottur, S., Moon, S., Geramifard, A., Damavandi, B.: SIMMC 2.0: a task-oriented dialog dataset for immersive multimodal conversations. In: Proceedings of EMNLP 2021 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.401"},{"key":"58_CR16","doi-asserted-by":"crossref","unstructured":"Budzianowski, P., et al.: MultiWOZ-a large-scale multi-domain wizard-of-Oz dataset for task-oriented dialogue modelling. In: Proceedings of EMNLP 2018 (2018)","DOI":"10.18653\/v1\/D18-1547"},{"key":"58_CR17","unstructured":"Li, X., Wang, Y., Sun, S., Panda, S., Liu, J., Gao, J.: Microsoft dialogue challenge: building end-to-end task-completion dialogue systems. Journal: arXiv preprint arXiv:1807.11125 (2018)"},{"key":"58_CR18","doi-asserted-by":"crossref","unstructured":"Rastogi, A., Zang, X., Sunkara, S., Gupta, R., Khaitan, P.: Towards scalable multi-domain conversational agents: The schema-guided dialogue dataset. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34(05), pp. 8689\u20138696 (2020)","DOI":"10.1609\/aaai.v34i05.6394"},{"key":"58_CR19","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1162\/tacl_a_00314","volume":"8","author":"Q Zhu","year":"2020","unstructured":"Zhu, Q., Huang, K., Zhang, Z., Zhu, X., Huang, M.: Crosswoz: a large-scale Chinese cross-domain task-oriented dialogue dataset. TACL. 8, 281\u2013295 (2020)","journal-title":"TACL."},{"key":"58_CR20","doi-asserted-by":"crossref","unstructured":"Joo, J., Li, W., Steen, F., Zhu, S.: Visual persuasion: inferring communicative intents of images. In: Proceedings of CVPR 2014 (2014)","DOI":"10.1109\/CVPR.2014.35"},{"key":"58_CR21","doi-asserted-by":"crossref","unstructured":"Vondrick, C., Oktay, D., Pirsiavash, H., Torralba, A.: Predicting motivations of actions by leveraging text. In: Proceedings of CVPR 2016 (2016)","DOI":"10.1109\/CVPR.2016.327"},{"key":"58_CR22","doi-asserted-by":"crossref","unstructured":"Kruk, J., Lubin, J., Sikka, K., Lin, X., Jurafsky, D., Divakaran, A.: Integrating text and image: determining multimodal document intent in instagram posts. In: Proceedings of IJCNLP 2019 (2019)","DOI":"10.18653\/v1\/D19-1469"},{"key":"58_CR23","doi-asserted-by":"crossref","unstructured":"Jia, M., Wu, Z., Reiter, A., Cardie, C., Belongie, S., Lim, S.: Intentonomy: a Dataset and Study towards Human Intent Understanding. In: Proceedings of CVPR 2021 (2021)","DOI":"10.1109\/CVPR46437.2021.01279"},{"key":"58_CR24","doi-asserted-by":"crossref","unstructured":"Saha, A., Khapra, M., Sankaranarayanan, K.: Towards building large scale multimodal domain-aware conversation systems. In: Proceedings of ACL 2018 (2018)","DOI":"10.1609\/aaai.v32i1.11331"},{"key":"58_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/978-3-642-15561-1_2","volume-title":"Computer Vision \u2013 ECCV 2010","author":"A Farhadi","year":"2010","unstructured":"Farhadi, A., et al.: Every picture tells a story: generating sentences from images. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010. LNCS, vol. 6314, pp. 15\u201329. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15561-1_2"},{"key":"58_CR26","unstructured":"Zhao, N., Li, H., Wu, Y., He, X., Zhou, B.: The JDDC 2.0 Corpus: A Large-Scale Multimodal Multi-Turn Chinese Dialogue Dataset for E-commerce Customer Service. Journal: arXiv preprint arXiv:2109.12913 (2021)"},{"key":"58_CR27","unstructured":"Rahman, W., Hasan, M., Zadeh, A., Morency, L., Hoque, Mohammed E.: M-bert: Injecting multimodal information in the bert structure. Journal: arXiv preprint arXiv:1908.05787 (2019)"},{"key":"58_CR28","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL-HLT 2019 (2019)"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Chinese Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-17120-8_58","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,23]],"date-time":"2022-09-23T13:11:27Z","timestamp":1663938687000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-17120-8_58"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031171192","9783031171208"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-17120-8_58","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"24 September 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLPCC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CCF International Conference on Natural Language Processing and Chinese Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guilin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nlpcc2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tcci.ccf.org.cn\/conference\/2022\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Softconf","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"327","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"73","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"22% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}