{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T16:46:28Z","timestamp":1742921188821,"version":"3.40.3"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031560620"},{"type":"electronic","value":"9783031560637"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-56063-7_23","type":"book-chapter","created":{"date-parts":[[2024,3,22]],"date-time":"2024-03-22T08:44:01Z","timestamp":1711097041000},"page":"316-325","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["MMCRec: Towards Multi-modal Generative AI in\u00a0Conversational Recommendation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0654-7141","authenticated-orcid":false,"given":"Tendai","family":"Mukande","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1600-3161","authenticated-orcid":false,"given":"Esraa","family":"Ali","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7144-8545","authenticated-orcid":false,"given":"Annalina","family":"Caputo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2509-1370","authenticated-orcid":false,"given":"Ruihai","family":"Dong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4033-9135","authenticated-orcid":false,"given":"Noel E.","family":"O\u2019Connor","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,3,23]]},"reference":[{"key":"23_CR1","unstructured":"Wu, S., Fei, H., Qu, L., Ji, W., Chua, T.: NExT-GPT: any-to-any multimodal LLM. ArXiv Preprint: ArXiv:2309.05519 (2023)"},{"key":"23_CR2","unstructured":"Cui, Z., Ma, J., Zhou, C., Zhou, J., Yang, H.: M6-Rec: generative pretrained language models are open-ended recommender systems. ArXiv Preprint: ArXiv:2205.08084 (2022)"},{"key":"23_CR3","unstructured":"Hou, Y., et al.: Large language models are zero-shot rankers for recommender systems. ArXiv Preprint: ArXiv:2305.08845 (2023)"},{"key":"23_CR4","unstructured":"Gao, Y., Sheng, T., Xiang, Y., Xiong, Y., Wang, H., Zhang, J.: Chat-REC: towards interactive and explainable LLMs-augmented recommender system. ArXiv Preprint: ArXiv:2303.14524 (2023)"},{"key":"23_CR5","first-page":"3803","volume":"21","author":"A Salah","year":"2020","unstructured":"Salah, A., Truong, Q., Lauw, H.: Cornac: a comparative framework for multimodal recommender systems. J. Mach. Learn. Res. 21, 3803\u20133807 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"23_CR6","unstructured":"Liu, Q., Hu, J., Xiao, Y., Gao, J., Zhao, X.: Multimodal recommender systems: a survey. ArXiv Preprint ArXiv:2302.03883 (2023)"},{"key":"23_CR7","doi-asserted-by":"crossref","unstructured":"Chen, X., et al.: Personalized fashion recommendation with visual explanations based on multimodal attention network: towards visually explainable recommendation. In: Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 765\u2013774 (2019)","DOI":"10.1145\/3331184.3331254"},{"key":"23_CR8","doi-asserted-by":"crossref","unstructured":"Gu, R., Wang, X., Yang, Q.: Multimodal cross-attention graph network for desire detection. In: International Conference on Artificial Neural Networks, pp. 512\u2013523 (2023)","DOI":"10.1007\/978-3-031-44216-2_42"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Yao, Y., Liu, Z., Lin, Y., Sun, M.: Cross-modal representation learning. In: Representation Learning for Natural Language Processing, pp. 211\u2013240 (2023)","DOI":"10.1007\/978-981-99-1600-9_7"},{"key":"23_CR10","unstructured":"Zhu, L., Wang, T., Li, F., Li, J., Zhang, Z., Shen, H.: Cross-modal retrieval: a systematic review of methods and future directions. ArXiv Preprint: ArXiv:2308.14263 (2023)"},{"key":"23_CR11","doi-asserted-by":"publisher","first-page":"107217","DOI":"10.1016\/j.knosys.2021.107217","volume":"227","author":"S Tao","year":"2021","unstructured":"Tao, S., Qiu, R., Ping, Y., Ma, H.: Multi-modal knowledge-aware reinforcement learning network for explainable recommendation. Knowl.-Based Syst. 227, 107217 (2021)","journal-title":"Knowl.-Based Syst."},{"key":"23_CR12","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1038\/s41368-023-00239-y","volume":"15","author":"H Huang","year":"2023","unstructured":"Huang, H., et al.: ChatGPT for shaping the future of dentistry: the potential of multi-modal large language model. Int. J. Oral Sci. 15, 29 (2023)","journal-title":"Int. J. Oral Sci."},{"key":"23_CR13","doi-asserted-by":"crossref","unstructured":"Hu, Z., Cai, S., Wang, J., Zhou, T.: Collaborative recommendation model based on multi-modal multi-view attention network: movie and literature cases. Appl. Soft Comput., 110518 (2023)","DOI":"10.1016\/j.asoc.2023.110518"},{"key":"23_CR14","doi-asserted-by":"crossref","unstructured":"Yan, A., He, Z., Li, J., Zhang, T., McAuley, J.: Personalized showcases: generating multi-modal explanations for recommendations. In: Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2251\u20132255 (2023)","DOI":"10.1145\/3539618.3592036"},{"key":"23_CR15","doi-asserted-by":"crossref","unstructured":"Wu, Y., Macdonald, C., Ounis, I.: Goal-oriented multi-modal interactive recommendation with verbal and non-verbal relevance feedback. In: Proceedings of the 17th ACM Conference on Recommender Systems, pp. 362\u2013373 (2023)","DOI":"10.1145\/3604915.3608775"},{"key":"23_CR16","doi-asserted-by":"crossref","unstructured":"Xin, X., Pimentel, T., Karatzoglou, A., Ren, P., Christakopoulou, K., Ren, Z.: Rethinking reinforcement learning for recommendation: a prompt perspective. In: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 1347\u20131357 (2022)","DOI":"10.1145\/3477495.3531714"},{"key":"23_CR17","doi-asserted-by":"publisher","first-page":"5275","DOI":"10.3390\/s21165275","volume":"21","author":"X Chen","year":"2021","unstructured":"Chen, X., Lu, Y., Wang, Y., Yang, J.C.M.B.F.: Cross-modal-based fusion recommendation algorithm. Sensors 21, 5275 (2021)","journal-title":"Sensors"},{"key":"23_CR18","unstructured":"Friedman, L., et al.: Leveraging large language models in conversational recommender systems. ArXiv Preprint: ArXiv:2305.07961 (2023)"},{"key":"23_CR19","unstructured":"Dai, S., et al.: Uncovering ChatGPT\u2019s capabilities in recommender systems. ArXiv Preprint: ArXiv:2305.02182 (2023)"},{"key":"23_CR20","doi-asserted-by":"crossref","unstructured":"Bao, K., Zhang, J., Zhang, Y., Wang, W., Feng, F., He, X.: TALLRec: an effective and efficient tuning framework to align large language model with recommendation. ArXiv Preprint: ArXiv:2305.00447 (2023)","DOI":"10.1145\/3604915.3608857"},{"key":"23_CR21","unstructured":"Wang, W., Lin, X., Feng, F., He, X., Chua, T.: Generative recommendation: towards next-generation recommender paradigm. ArXiv Preprint: ArXiv:2304.03516 (2023)"},{"key":"23_CR22","unstructured":"Li, J., Zhang, W., Wang, T., Xiong, G., Lu, A., Medioni, G.: GPT4Rec: a generative framework for personalized recommendation and user interests interpretation. ArXiv Preprint: ArXiv:2304.03879 (2023)"},{"key":"23_CR23","doi-asserted-by":"crossref","unstructured":"Wang, X., Tang, X., Zhao, W., Wang, J., Wen, J.: Rethinking the evaluation for conversational recommendation in the era of large language models. ArXiv Preprint: ArXiv:2305.13112 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.621"},{"key":"23_CR24","doi-asserted-by":"crossref","unstructured":"Girdhar, R., et al.: ImageBind: one embedding space to bind them all. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15180\u201315190 (2023)","DOI":"10.1109\/CVPR52729.2023.01457"},{"key":"23_CR25","unstructured":"Chiang, W., et al.: Vicuna: an open-source chatbot impressing GPT-4 with 90%* ChatGPT quality (2023). See https:\/\/vicuna.Lmsys.Org. Accessed 14 Apr 2023"},{"key":"23_CR26","unstructured":"Su, Y., Lan, T., Li, H., Xu, J., Wang, Y., Cai, D.: PandaGPT: one model to instruction-follow them all. ArXiv Preprint: ArXiv:2305.16355 (2023)"},{"key":"23_CR27","doi-asserted-by":"crossref","unstructured":"Wang, X., Qin, J.: Multimodal recommendation algorithm based on dempster-Shafer evidence theory. Multimedia Tools Appl., 1\u201316 (2023)","DOI":"10.1007\/s11042-023-15262-8"},{"key":"23_CR28","unstructured":"Luo, L., Ju, J., Xiong, B., Li, Y., Haffari, G., Pan, S.: ChatRule: mining logical rules with large language models for knowledge graph reasoning. ArXiv Preprint: ArXiv:2309.01538 (2023)"},{"key":"23_CR29","doi-asserted-by":"crossref","unstructured":"Wu, Y., et al.: State graph reasoning for multimodal conversational recommendation. IEEE Trans. Multimedia (2022)","DOI":"10.1109\/TMM.2022.3155900"},{"key":"23_CR30","doi-asserted-by":"crossref","unstructured":"Liao, L., Long, L., Zhang, Z., Huang, M., Chua, T.: MMConv: an environment for multimodal conversational search across multiple domains. In: Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 675\u2013684 (2021)","DOI":"10.1145\/3404835.3462970"},{"key":"23_CR31","doi-asserted-by":"crossref","unstructured":"Viswanathan, S., Guillot, F., Grasso, A.: What is natural? Challenges and opportunities for conversational recommender systems. In: Proceedings of the 2nd Conference on Conversational User Interfaces, pp. 1\u20134 (2020)","DOI":"10.1145\/3405755.3406174"},{"key":"23_CR32","doi-asserted-by":"crossref","unstructured":"Dong, X., et al.: M5product: self-harmonized contrastive learning for e-commercial multi-modal pretraining. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21252\u201321262 (2022)","DOI":"10.1109\/CVPR52688.2022.02057"},{"key":"23_CR33","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"23_CR34","unstructured":"Liu, H., et al.: AudioLDM: text-to-audio generation with latent diffusion models. ArXiv Preprint: ArXiv:2301.12503 (2023)"},{"key":"23_CR35","unstructured":"Hu, E., et al.: Lora: low-rank adaptation of large language models. ArXiv Preprint: ArXiv:2106.09685 (2021)"},{"key":"23_CR36","unstructured":"Zhang, Z., Zhang, A., Li, M., Zhao, H., Karypis, G., Smola, A.: Multimodal chain-of-thought reasoning in language models. ArXiv Preprint: ArXiv:2302.00923 (2023)"},{"key":"23_CR37","doi-asserted-by":"crossref","unstructured":"Liu, Z., Yu, X., Fang, Y., Zhang, X.: GraphPrompt: unifying pre-training and downstream tasks for graph neural networks. In: Proceedings of the ACM Web Conference 2023, 417\u2013428 (2023)","DOI":"10.1145\/3543507.3583386"},{"key":"23_CR38","unstructured":"Wu, L., et al.: A survey on large language models for recommendation. ArXiv Preprint: ArXiv:2305.19860 (2023)"},{"key":"23_CR39","doi-asserted-by":"crossref","unstructured":"Geng, S., Liu, S., Fu, Z., Ge, Y., Zhang, Y.: Recommendation as language processing (RLP): a unified pretrain, personalized prompt & predict paradigm (p5). In: Proceedings of the 16th ACM Conference on Recommender Systems, pp. 299\u2013315 (2022)","DOI":"10.1145\/3523227.3546767"},{"key":"23_CR40","doi-asserted-by":"crossref","unstructured":"Lin, J., et al.: How can recommender systems benefit from large language models: a survey. ArXiv Preprint: ArXiv:2306.05817 (2023)","DOI":"10.1145\/3678004"},{"key":"23_CR41","unstructured":"Lyu, C., et al.: Macaw-LLM: multi-modal language modeling with image, audio, video, and text integration. ArXiv Preprint: ArXiv:2306.09093 (2023)"},{"key":"23_CR42","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763 (2021)"},{"key":"23_CR43","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1007\/s10115-022-01766-6","volume":"65","author":"M He","year":"2023","unstructured":"He, M., Wang, J., Ding, T., Shen, T.: Conversation and recommendation: knowledge-enhanced personalized dialog system. Knowl. Inf. Syst. 65, 261\u2013279 (2023)","journal-title":"Knowl. Inf. Syst."},{"key":"23_CR44","unstructured":"Besta, M., et al.: Graph of thoughts: solving elaborate problems with large language models. ArXiv Preprint: ArXiv:2308.09687 (2023)"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-56063-7_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T21:31:35Z","timestamp":1731619895000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-56063-7_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031560620","9783031560637"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-56063-7_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"23 March 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECIR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 March 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 March 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecir2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.ecir2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"578","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"110","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"69","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"19% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31 (Tracks: Workshop, Tutorial, Industry, Doctoral Consortium)","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}