{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T15:04:21Z","timestamp":1780931061050,"version":"3.54.1"},"reference-count":42,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,12]]},"DOI":"10.1016\/j.patcog.2026.114020","type":"journal-article","created":{"date-parts":[[2026,5,23]],"date-time":"2026-05-23T06:46:31Z","timestamp":1779518791000},"page":"114020","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["Benchmarking Multimodal Large Language Models for missing modality completion in product catalogues"],"prefix":"10.1016","volume":"180","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4759-2042","authenticated-orcid":false,"given":"Junchen","family":"Fu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wenhao","family":"Deng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kaiwen","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ioannis","family":"Arapakis","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yu","family":"Ye","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yongxin","family":"Ni","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Joemon M.","family":"Jose","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xuri","family":"Ge","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2026.114020_b1","doi-asserted-by":"crossref","unstructured":"Y. Zhu, H. Zhao, W. Zhang, G. Ye, H. Chen, N. Zhang, H. Chen, Knowledge perceived multi-modal pretraining in e-commerce, in: Proceedings of the 29th ACM International Conference on Multimedia, 2021, pp. 2744\u20132752.","DOI":"10.1145\/3474085.3475648"},{"key":"10.1016\/j.patcog.2026.114020_b2","doi-asserted-by":"crossref","unstructured":"C. Wang, M. Niepert, H. Li, LRMM: Learning to Recommend with Missing Modalities, in: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, 2018, pp. 3360\u20133370.","DOI":"10.18653\/v1\/D18-1373"},{"key":"10.1016\/j.patcog.2026.114020_b3","doi-asserted-by":"crossref","unstructured":"D. Malitesta, E. Rossi, C. Pomo, T. Di Noia, F.D. Malliaros, Do We Really Need to Drop Items with Missing Modalities in Multimodal Recommendation?, in: Proceedings of the 33rd ACM International Conference on Information and Knowledge Management, 2024, pp. 3943\u20133948.","DOI":"10.1145\/3627673.3679898"},{"key":"10.1016\/j.patcog.2026.114020_b4","doi-asserted-by":"crossref","DOI":"10.1109\/TKDE.2025.3608071","article-title":"Efficient and effective adaptation of multimodal foundation models in sequential recommendation","author":"Fu","year":"2025","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10.1016\/j.patcog.2026.114020_b5","doi-asserted-by":"crossref","unstructured":"H. Li, P. Yuan, S. Xu, Y. Wu, X. He, B. Zhou, Aspect-aware multimodal summarization for chinese e-commerce products, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 34, 2020, pp. 8188\u20138195.","DOI":"10.1609\/aaai.v34i05.6332"},{"key":"10.1016\/j.patcog.2026.114020_b6","series-title":"Companion Proceedings of the ACM Web Conference 2023","first-page":"336","article-title":"MPKGAC: Multimodal product attribute completion in E-commerce","author":"Wang","year":"2023"},{"issue":"3","key":"10.1016\/j.patcog.2026.114020_b7","first-page":"1","article-title":"Mice: Multivariate imputation by chained equations in R","volume":"45","author":"van Buuren","year":"2011","journal-title":"J. Stat. Softw."},{"issue":"1","key":"10.1016\/j.patcog.2026.114020_b8","doi-asserted-by":"crossref","first-page":"112","DOI":"10.1093\/bioinformatics\/btr597","article-title":"MissForest\u2014non-parametric missing value imputation for mixed-type data","volume":"28","author":"Stekhoven","year":"2012","journal-title":"Bioinformatics"},{"key":"10.1016\/j.patcog.2026.114020_b9","doi-asserted-by":"crossref","unstructured":"G. Ke, S. He, X. Wang, B. Wang, G. Chao, Y. Zhang, Y. Xie, H. Su, Knowledge Bridger: Towards Training-Free Missing Modality Completion, in: Proceedings of the Computer Vision and Pattern Recognition Conference, 2025, pp. 25864\u201325873.","DOI":"10.1109\/CVPR52734.2025.02409"},{"key":"10.1016\/j.patcog.2026.114020_b10","doi-asserted-by":"crossref","unstructured":"M. Ma, J. Ren, L. Zhao, S. Tulyakov, C. Wu, X. Peng, SMIL: Multimodal Learning with Severely Missing Modality, in: Proceedings of the 35th AAAI Conference on Artificial Intelligence, 2021, pp. 2302\u20132310.","DOI":"10.1609\/aaai.v35i3.16330"},{"key":"10.1016\/j.patcog.2026.114020_b11","series-title":"CVPR","first-page":"18156","article-title":"Are multimodal transformers robust to missing modality?","author":"Ma","year":"2022"},{"key":"10.1016\/j.patcog.2026.114020_b12","series-title":"CVPR","first-page":"14943","article-title":"Multimodal prompting with missing modalities for visual recognition","author":"Lee","year":"2023"},{"key":"10.1016\/j.patcog.2026.114020_b13","doi-asserted-by":"crossref","unstructured":"Z. Wang, Z. Wan, X. Wan, Transmodality: An end2end fusion method with transformer for multimodal sentiment analysis, in: Proceedings of the Web Conference 2020, 2020, pp. 2514\u20132520.","DOI":"10.1145\/3366423.3380000"},{"key":"10.1016\/j.patcog.2026.114020_b14","article-title":"Adaptive latent disease state learning for multimodal alzheimer\u2019s disease biomarker detection with missing modalities","author":"Chen","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.114020_b15","article-title":"MCE: Towards a general framework for handling missing modalities under imbalanced missing rates","author":"Zhao","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.114020_b16","article-title":"MIMAR-OSA: Enhancing obstructive sleep apnea diagnosis through multimodal data integration and missing modality reconstruction","author":"Qiu","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.114020_b17","series-title":"CIKM","first-page":"1451","article-title":"Adaptive feature sampling for recommendation with missing content feature values","author":"Shi","year":"2019"},{"issue":"9","key":"10.1016\/j.patcog.2026.114020_b18","doi-asserted-by":"crossref","first-page":"4077","DOI":"10.1109\/TKDE.2020.3040772","article-title":"An attribute-aware attentive graph convolutional network for attribute missing in recommendation","volume":"34","author":"Liu","year":"2022","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10.1016\/j.patcog.2026.114020_b19","doi-asserted-by":"crossref","unstructured":"J. Kim, H. Kang, S. Kim, K. Kim, C. Park, Disentangling and generating modalities for recommendation in missing modality scenarios, in: Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval, 2025, pp. 1820\u20131829.","DOI":"10.1145\/3726302.3729953"},{"issue":"12","key":"10.1016\/j.patcog.2026.114020_b20","doi-asserted-by":"crossref","DOI":"10.1007\/s11432-025-4676-4","article-title":"Large multimodal models evaluation: A survey","volume":"68","author":"Zhang","year":"2025","journal-title":"Sci. China Inf. Sci."},{"key":"10.1016\/j.patcog.2026.114020_b21","article-title":"Aibench: Towards trustworthy evaluation under the 45\u00b0 law","author":"Zhang","year":"2025","journal-title":"Displays"},{"issue":"2","key":"10.1016\/j.patcog.2026.114020_b22","doi-asserted-by":"crossref","DOI":"10.1007\/s11432-025-4631-2","article-title":"Towards versatile multimedia quality assessment for visual communications","volume":"69","author":"Zhang","year":"2026","journal-title":"Sci. China Inf. Sci."},{"key":"10.1016\/j.patcog.2026.114020_b23","doi-asserted-by":"crossref","unstructured":"X. Zhou, MMREC: Simplifying multimodal recommendation, in: Proceedings of the 5th ACM International Conference on Multimedia in Asia Workshops, 2023, pp. 1\u20132.","DOI":"10.1145\/3611380.3628561"},{"key":"10.1016\/j.patcog.2026.114020_b24","doi-asserted-by":"crossref","unstructured":"Y. Hou, S. Mu, W.X. Zhao, Y. Li, B. Ding, J.-R. Wen, Towards universal sequence representation learning for recommender systems, in: Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, 2022, pp. 585\u2013593.","DOI":"10.1145\/3534678.3539381"},{"key":"10.1016\/j.patcog.2026.114020_b25","doi-asserted-by":"crossref","unstructured":"S. Zhang, L. Chen, D. Shen, C. Wang, H. Xiong, Hierarchical Time-Aware Mixture of Experts for Multi-Modal Sequential Recommendation, in: Proceedings of the ACM on Web Conference 2025, 2025, pp. 3672\u20133682.","DOI":"10.1145\/3696410.3714676"},{"key":"10.1016\/j.patcog.2026.114020_b26","series-title":"Bertscore: Evaluating text generation with bert","author":"Zhang","year":"2019"},{"key":"10.1016\/j.patcog.2026.114020_b27","doi-asserted-by":"crossref","unstructured":"N. Reimers, I. Gurevych, Sentence-bert: Sentence embeddings using siamese bert-networks, in: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), 2019, pp. 3982\u20133992.","DOI":"10.18653\/v1\/D19-1410"},{"key":"10.1016\/j.patcog.2026.114020_b28","unstructured":"S. Banerjee, A. Lavie, METEOR: An automatic metric for MT evaluation with improved correlation with human judgments, in: Proceedings of the Acl Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/Or Summarization, 2005, pp. 65\u201372."},{"key":"10.1016\/j.patcog.2026.114020_b29","doi-asserted-by":"crossref","unstructured":"W. Zhao, M. Peyrard, F. Liu, Y. Gao, C.M. Meyer, S. Eger, MoverScore: Text generation evaluating with contextualized embeddings and earth mover distance, in: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing, EMNLP-IJCNLP, 2019, pp. 563\u2013578.","DOI":"10.18653\/v1\/D19-1053"},{"key":"10.1016\/j.patcog.2026.114020_b30","series-title":"2010 20th International Conference on Pattern Recognition","first-page":"2366","article-title":"Image quality metrics: PSNR vs. SSIM","author":"Hore","year":"2010"},{"issue":"4","key":"10.1016\/j.patcog.2026.114020_b31","doi-asserted-by":"crossref","first-page":"600","DOI":"10.1109\/TIP.2003.819861","article-title":"Image quality assessment: From error visibility to structural similarity","volume":"13","author":"Wang","year":"2004","journal-title":"IEEE Trans. Image Process."},{"issue":"11","key":"10.1016\/j.patcog.2026.114020_b32","doi-asserted-by":"crossref","first-page":"4447","DOI":"10.1109\/TIP.2013.2273671","article-title":"A perceptually relevant MSE-based image quality metric","volume":"22","author":"Tan","year":"2013","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2026.114020_b33","doi-asserted-by":"crossref","unstructured":"R. Zhang, P. Isola, A.A. Efros, E. Shechtman, O. Wang, The unreasonable effectiveness of deep features as a perceptual metric, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2018, pp. 586\u2013595.","DOI":"10.1109\/CVPR.2018.00068"},{"key":"10.1016\/j.patcog.2026.114020_b34","series-title":"Proceedings of the 38th International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.patcog.2026.114020_b35","article-title":"Gans trained by a two time-scale update rule converge to a local nash equilibrium","volume":"30","author":"Heusel","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.114020_b36","doi-asserted-by":"crossref","DOI":"10.1016\/j.csl.2020.101151","article-title":"Human evaluation of automatically generated text: Current trends and best practice guidelines","volume":"67","author":"Van der Lee","year":"2021","journal-title":"Comput. Speech Lang."},{"key":"10.1016\/j.patcog.2026.114020_b37","doi-asserted-by":"crossref","unstructured":"R. He, J. McAuley, VBPR: visual bayesian personalized ranking from implicit feedback, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 30, 2016.","DOI":"10.1609\/aaai.v30i1.9973"},{"key":"10.1016\/j.patcog.2026.114020_b38","doi-asserted-by":"crossref","unstructured":"X. Zhou, H. Zhou, Y. Liu, Z. Zeng, C. Miao, P. Wang, Y. You, F. Jiang, Bootstrap latent representations for multi-modal recommendation, in: Proceedings of the ACM Web Conference 2023, 2023, pp. 845\u2013854.","DOI":"10.1145\/3543507.3583251"},{"key":"10.1016\/j.patcog.2026.114020_b39","doi-asserted-by":"crossref","unstructured":"X. Zhou, Z. Shen, A tale of two graphs: Freezing and denoising graph structures for multimodal recommendation, in: Proceedings of the 31st ACM International Conference on Multimedia, 2023, pp. 935\u2013943.","DOI":"10.1145\/3581783.3611943"},{"key":"10.1016\/j.patcog.2026.114020_b40","series-title":"Deepseekmath: Pushing the limits of mathematical reasoning in open language models","author":"Shao","year":"2024"},{"key":"10.1016\/j.patcog.2026.114020_b41","doi-asserted-by":"crossref","unstructured":"D. Ghalandari, C. Hokamp, G. Ifrim, Efficient Unsupervised Sentence Compression by Fine-tuning Transformers with Reinforcement Learning, in: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), 2022, pp. 1267\u20131280.","DOI":"10.18653\/v1\/2022.acl-long.90"},{"key":"10.1016\/j.patcog.2026.114020_b42","doi-asserted-by":"crossref","unstructured":"S. Vaze, N. Carion, I. Misra, Genecis: A benchmark for general conditional image similarity, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 6862\u20136872.","DOI":"10.1109\/CVPR52729.2023.00663"}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326009854?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326009854?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T14:44:16Z","timestamp":1780929856000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326009854"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,12]]},"references-count":42,"alternative-id":["S0031320326009854"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.114020","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Benchmarking Multimodal Large Language Models for missing modality completion in product catalogues","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.114020","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"114020"}}