{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T06:12:29Z","timestamp":1780467149164,"version":"3.54.1"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032213235","type":"print"},{"value":"9783032213242","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-21324-2_5","type":"book-chapter","created":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T08:38:47Z","timestamp":1774255127000},"page":"66-81","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Are Multimodal Embeddings Truly Beneficial for\u00a0Recommendation? A Deep Dive into Whole vs. Individual Modalities"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-8556-6243","authenticated-orcid":false,"given":"Yu","family":"Ye","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4759-2042","authenticated-orcid":false,"given":"Junchen","family":"Fu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8940-2561","authenticated-orcid":false,"given":"Yu","family":"Song","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2516-8407","authenticated-orcid":false,"given":"Kaiwen","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9228-1759","authenticated-orcid":false,"given":"Joemon M.","family":"Jose","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2026,3,24]]},"reference":[{"issue":"1","key":"5_CR1","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1007\/s11257-016-9174-x","volume":"26","author":"J Beel","year":"2016","unstructured":"Beel, J., Breitinger, C., Langer, S., Lommatzsch, A., Gipp, B.: Towards reproducibility in recommender-systems research. User Model. User-Adap. Inter. 26(1), 69\u2013101 (2016)","journal-title":"User Model. User-Adap. Inter."},{"key":"5_CR2","doi-asserted-by":"crossref","unstructured":"Ferrari\u00a0Dacrema, M., Cremonesi, P., Jannach, D.: Are we really making much progress? a worrying analysis of recent neural recommendation approaches. In: Proceedings of the 13th ACM conference on recommender systems, pp. 101\u2013109 (2019)","DOI":"10.1145\/3298689.3347058"},{"key":"5_CR3","doi-asserted-by":"crossref","unstructured":"Fu, J., et al.: Iisan: efficiently adapting multimodal representation for sequential recommendation with decoupled peft. In: Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 687\u2013697 (2024)","DOI":"10.1145\/3626772.3657725"},{"key":"5_CR4","doi-asserted-by":"crossref","unstructured":"Fu, J., et al.: Efficient and effective adaptation of multimodal foundation models in sequential recommendation. IEEE Trans. Knowl. Data Eng. (2025)","DOI":"10.1109\/TKDE.2025.3608071"},{"key":"5_CR5","doi-asserted-by":"crossref","unstructured":"Fu, J., et al.: Crossan: Towards efficient and effective adaptation of multiple multimodal foundation models for sequential recommendation. arXiv preprint arXiv:2504.10307 (2025)","DOI":"10.1109\/TKDE.2025.3608071"},{"key":"5_CR6","doi-asserted-by":"crossref","unstructured":"Fu, J., et al.: Exploring adapter-based transfer learning for recommender systems: Empirical studies and practical insights. In: Proceedings of the 17th ACM International Conference on Web Search and Data Mining, pp. 208\u2013217 (2024)","DOI":"10.1145\/3616855.3635805"},{"key":"5_CR7","doi-asserted-by":"crossref","unstructured":"Ge, X., Fu, J., Chen, F., An, S., Sebe, N., Jose, J.M.: Towards end-to-end explainable facial action unit recognition via vision-language joint learning. In: Proceedings of the 32nd ACM International Conference on Multimedia, pp. 8189\u20138198 (2024)","DOI":"10.1145\/3664647.3681443"},{"key":"5_CR8","doi-asserted-by":"crossref","unstructured":"Guo, Z., Li, J., Li, G., Wang, C., Shi, S., Ruan, B.: Lgmrec: Local and global graph learning for multimodal recommendation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 8454\u20138462 (2024)","DOI":"10.1609\/aaai.v38i8.28688"},{"key":"5_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., Girshick, R.: Masked autoencoders are scalable vision learners. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 16000\u201316009 (2022)","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"5_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"5_CR11","doi-asserted-by":"crossref","unstructured":"He, R., McAuley, J.: Vbpr: visual bayesian personalized ranking from implicit feedback. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a030 (2016)","DOI":"10.1609\/aaai.v30i1.9973"},{"key":"5_CR12","doi-asserted-by":"crossref","unstructured":"He, X., Liao, L., Zhang, H., Nie, L., Hu, X., Chua, T.S.: Neural collaborative filtering. In: Proceedings of the 26th International Conference on World Wide Web, pp. 173\u2013182 (2017)","DOI":"10.1145\/3038912.3052569"},{"issue":"3","key":"5_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3569930","volume":"41","author":"Y Ji","year":"2023","unstructured":"Ji, Y., Sun, A., Zhang, J., Li, C.: A critical study on data leakage in recommender system offline evaluation. ACM Trans. Inf. Syst. 41(3), 1\u201327 (2023)","journal-title":"ACM Trans. Inf. Syst."},{"key":"5_CR14","unstructured":"Li, R., Deng, W., Cheng, Y., Yuan, Z., Zhang, J., Yuan, F.: Exploring the upper limits of text-based collaborative filtering using large language models: Discoveries and insights. arXiv preprint arXiv:2305.11700 (2023)"},{"key":"5_CR15","doi-asserted-by":"crossref","unstructured":"Liu, Q., et al.: Multimodal pretraining, adaptation, and generation for recommendation: A survey. In: Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp. 6566\u20136576 (2024)","DOI":"10.1145\/3637528.3671473"},{"key":"5_CR16","doi-asserted-by":"crossref","unstructured":"Ni, J., Li, J., McAuley, J.: Justifying recommendations using distantly-labeled reviews and fine-grained aspects. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 188\u2013197 (2019)","DOI":"10.18653\/v1\/D19-1018"},{"key":"5_CR17","unstructured":"Ni, Y., et al.: A content-driven micro-video recommendation dataset at scale. arXiv preprint arXiv:2309.15379 (2023)"},{"key":"5_CR18","doi-asserted-by":"crossref","unstructured":"Ong, R.K., Khong, A.W.: Spectrum-based modality representation fusion graph convolutional network for multimodal recommendation. In: Proceedings of the Eighteenth ACM International Conference on Web Search and Data Mining, pp. 773\u2013781 (2025)","DOI":"10.1145\/3701551.3703561"},{"key":"5_CR19","doi-asserted-by":"crossref","unstructured":"Petrov, A., Macdonald, C.: A systematic review and replicability study of bert4rec for sequential recommendation. In: Proceedings of the 16th ACM Conference on Recommender Systems, pp. 436\u2013447 (2022)","DOI":"10.1145\/3523227.3548487"},{"key":"5_CR20","doi-asserted-by":"crossref","unstructured":"Pomo, C., Attimonelli, M., Danese, D., Narducci, F., Noia, T.D.: Do recommender systems really leverage multimodal content? a comprehensive analysis on multimodal representations for recommendation (2025). https:\/\/arxiv.org\/abs\/2508.04571","DOI":"10.1145\/3746252.3761398"},{"key":"5_CR21","unstructured":"Reimers, N., Gurevych, I.: all-minilm-l6-v2: a sentence-bert model. https:\/\/huggingface.co\/sentence-transformers\/all-MiniLM-L6-v2 (2020), Accessed 06 Sep 2025"},{"key":"5_CR22","unstructured":"Rendle, S., Freudenthaler, C., Gantner, Z., Schmidt-Thieme, L.: Bpr: Bayesian personalized ranking from implicit feedback. In: Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence (UAI), arXiv preprint arXiv:1205.2618 (2009)"},{"key":"5_CR23","doi-asserted-by":"crossref","unstructured":"Rendle, S., Krichene, W., Zhang, L., Anderson, J.: Neural collaborative filtering vs. matrix factorization revisited. In: Proceedings of the 14th ACM conference on recommender systems, pp. 240\u2013248 (2020)","DOI":"10.1145\/3383313.3412488"},{"key":"5_CR24","doi-asserted-by":"crossref","unstructured":"Sun, F., et al.: Bert4rec: Sequential recommendation with bidirectional encoder representations from transformer. In: Proceedings of the 28th ACM international conference on information and knowledge management, pp. 1441\u20131450 (2019)","DOI":"10.1145\/3357384.3357895"},{"key":"5_CR25","doi-asserted-by":"publisher","first-page":"5107","DOI":"10.1109\/TMM.2022.3187556","volume":"25","author":"Z Tao","year":"2022","unstructured":"Tao, Z., Liu, X., Xia, Y., Wang, X., Yang, L., Huang, X., Chua, T.S.: Self-supervised learning for multimedia recommendation. IEEE Trans. Multimedia 25, 5107\u20135116 (2022)","journal-title":"IEEE Trans. Multimedia"},{"key":"5_CR26","doi-asserted-by":"publisher","first-page":"1074","DOI":"10.1109\/TMM.2021.3138298","volume":"25","author":"Q Wang","year":"2021","unstructured":"Wang, Q., Wei, Y., Yin, J., Wu, J., Song, X., Nie, L.: Dualgnn: dual graph neural network for multimedia recommendation. IEEE Trans. Multimedia 25, 1074\u20131084 (2021)","journal-title":"IEEE Trans. Multimedia"},{"key":"5_CR27","doi-asserted-by":"crossref","unstructured":"Wei, Y., Wang, X., Nie, L., He, X., Chua, T.S.: Graph-refined convolutional network for multimedia recommendation with implicit feedback. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 3541\u20133549 (2020)","DOI":"10.1145\/3394171.3413556"},{"key":"5_CR28","doi-asserted-by":"crossref","unstructured":"Wei, Y., Wang, X., Nie, L., He, X., Hong, R., Chua, T.S.: Mmgcn: multi-modal graph convolution network for personalized recommendation of micro-video. In: Proceedings of the 27th ACM International Conference on Multimedia, pp. 1437\u20131445 (2019)","DOI":"10.1145\/3343031.3351034"},{"key":"5_CR29","doi-asserted-by":"crossref","unstructured":"Xv, G., et al.: Improving multi-modal recommender systems by denoising and aligning multi-modal content and user feedback. In: Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp. 3645\u20133656 (2024)","DOI":"10.1145\/3637528.3671703"},{"key":"5_CR30","doi-asserted-by":"crossref","unstructured":"Yu, P., Tan, Z., Lu, G., Bao, B.K.: Multi-view graph convolutional network for multimedia recommendation. In: Proceedings of the 31st ACM international conference on multimedia, pp. 6576\u20136585 (2023)","DOI":"10.1145\/3581783.3613915"},{"key":"5_CR31","doi-asserted-by":"crossref","unstructured":"Yu, P., Tan, Z., Lu, G., Bao, B.K.: Mind individual information! principal graph learning for multimedia recommendation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a039, pp. 13096\u201313105 (2025)","DOI":"10.1609\/aaai.v39i12.33429"},{"key":"5_CR32","doi-asserted-by":"crossref","unstructured":"Yuan, Z., et al.: Where to go next for recommender systems? id-vs. modality-based recommender models revisited. In: Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2639\u20132649 (2023)","DOI":"10.1145\/3539618.3591932"},{"key":"5_CR33","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: Ninerec: A benchmark dataset suite for evaluating transferable recommendation. IEEE Trans. Pattern Anal. Mach. Intell. (2024)","DOI":"10.1109\/TPAMI.2024.3373868"},{"key":"5_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, J., Zhu, Y., Liu, Q., Wu, S., Wang, S., Wang, L.: Mining latent structures for multimedia recommendation. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 3872\u20133880 (2021)","DOI":"10.1145\/3474085.3475259"},{"key":"5_CR35","unstructured":"Zhou, H., Zhang, Y., Sun, A., Shen, Z.: Does multimodality improve recommender systems as expected? a critical analysis and future directions (2025). https:\/\/arxiv.org\/abs\/2508.05377"},{"key":"5_CR36","unstructured":"Zhou, H., Zhou, X., Zeng, Z., Zhang, L., Shen, Z.: A comprehensive survey on multimodal recommender systems: Taxonomy, evaluation, and future directions. arXiv preprint arXiv:2302.04473 (2023)"},{"key":"5_CR37","doi-asserted-by":"crossref","unstructured":"Zhou, H., Zhou, X., Zhang, L., Shen, Z.: Enhancing dyadic relations with homogeneous graphs for multimodal recommendation. In: ECAI 2023, pp. 3123\u20133130. IOS Press (2023)","DOI":"10.3233\/FAIA230631"},{"key":"5_CR38","doi-asserted-by":"crossref","unstructured":"Zhou, X.: Mmrec: Simplifying multimodal recommendation. In: Proceedings of the 5th ACM International Conference on Multimedia in Asia Workshops, pp.\u00a01\u20132 (2023)","DOI":"10.1145\/3611380.3628561"},{"key":"5_CR39","doi-asserted-by":"crossref","unstructured":"Zhou, X., Shen, Z.: A tale of two graphs: freezing and denoising graph structures for multimodal recommendation. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 935\u2013943 (2023)","DOI":"10.1145\/3581783.3611943"},{"key":"5_CR40","doi-asserted-by":"crossref","unstructured":"Zhou, X., et al.: Bootstrap latent representations for multi-modal recommendation. In: Proceedings of the ACM Web Conference 2023, pp. 845\u2013854 (2023)","DOI":"10.1145\/3543507.3583251"},{"key":"5_CR41","doi-asserted-by":"crossref","unstructured":"Zhuang, Z., et al.: Bridging the gap: Teacher-assisted wasserstein knowledge distillation for efficient multi-modal recommendation. In: Proceedings of the ACM on Web Conference 2025, pp. 2464\u20132475 (2025)","DOI":"10.1145\/3696410.3714852"},{"key":"5_CR42","doi-asserted-by":"crossref","unstructured":"Zhuang, Z., et al.: Frequency-decoupled distillation for efficient multimodal recommendation. In: Proceedings of the 34th ACM International Conference on Information and Knowledge Management, pp. 4571\u20134581 (2025)","DOI":"10.1145\/3746252.3761242"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-21324-2_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T23:14:24Z","timestamp":1774307664000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-21324-2_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032213235","9783032213242"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-21324-2_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"24 March 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ECIR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Delft","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2026","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 March 2026","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 April 2026","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"48","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecir2026","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecir2026.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}