{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,15]],"date-time":"2026-07-15T13:13:51Z","timestamp":1784121231271,"version":"3.55.0"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T00:00:00Z","timestamp":1755734400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T00:00:00Z","timestamp":1755734400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s00530-025-01856-9","type":"journal-article","created":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T11:03:14Z","timestamp":1755774194000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["LD4MRec: simplifying and powering diffusion model for multimedia recommendation"],"prefix":"10.1007","volume":"31","author":[{"given":"Jiarui","family":"Zhu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jun","family":"Hou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Penghang","family":"Yu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhiyi","family":"Tan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bing-Kun","family":"Bao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,8,21]]},"reference":[{"issue":"8","key":"1856_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3663574","volume":"18","author":"P Yu","year":"2024","unstructured":"Yu, P., Bao, B.-K., Tan, Z., Lu, G.: Improving graph collaborative filtering with directional behavior enhanced contrastive learning. ACM Trans. Knowl. Discov. Data 18(8), 1\u201320 (2024)","journal-title":"ACM Trans. Knowl. Discov. Data"},{"key":"1856_CR2","doi-asserted-by":"crossref","unstructured":"Fan, Q., Yu, P., Tan, Z., Bao, B.-K., Lu, G.: Befa: a general behavior-driven feature adapter for multimedia recommendation (2024). arXiv:2406.00323","DOI":"10.1609\/aaai.v39i11.33266"},{"key":"1856_CR3","doi-asserted-by":"crossref","unstructured":"Chen, J., Zhang, H., He, X., Nie, L., Liu, W., Chua, T.-S.: Attentive collaborative filtering: multimedia recommendation with item- and component-level attention. In: Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval. SIGIR \u201917, New York, pp. 335\u2013344 (2017)","DOI":"10.1145\/3077136.3080797"},{"key":"1856_CR4","doi-asserted-by":"crossref","unstructured":"Zhou, X., Zhou, H., Liu, Y., Zeng, Z., Miao, C., Wang, P., You, Y., Jiang, F.: Bootstrap latent representations for multi-modal recommendation. In: Proceedings of the ACM Web Conference 2023, pp. 845\u2013854 (2023)","DOI":"10.1145\/3543507.3583251"},{"key":"1856_CR5","doi-asserted-by":"crossref","unstructured":"Yu, P., Tan, Z., Lu, G., Bao, B.-K.: Multi-view graph convolutional network for multimedia recommendation. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 6576\u20136585 (2023)","DOI":"10.1145\/3581783.3613915"},{"key":"1856_CR6","doi-asserted-by":"crossref","unstructured":"Yu, P., Tan, Z., Lu, G., Bao, B.-K.: Mind individual information! principal graph learning for multimedia recommendation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 39, pp. 13096\u201313105 (2025)","DOI":"10.1609\/aaai.v39i12.33429"},{"issue":"3","key":"1856_CR7","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1109\/MIC.2017.72","volume":"21","author":"B Smith","year":"2017","unstructured":"Smith, B., Linden, G.: Two decades of recommender systems at amazon.com. IEEE Internet Comput. 21(3), 12\u201318 (2017)","journal-title":"IEEE Internet Comput."},{"key":"1856_CR8","doi-asserted-by":"crossref","unstructured":"Covington, P., Adams, J., Sargin, E.: Deep neural networks for YouTube recommendations. In: Proceedings of the 10th ACM Conference on Recommender Systems, pp. 191\u2013198 (2016)","DOI":"10.1145\/2959100.2959190"},{"key":"1856_CR9","doi-asserted-by":"crossref","unstructured":"He, R., McAuley, J.: Vbpr: visual Bayesian personalized ranking from implicit feedback. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 30 (2016)","DOI":"10.1609\/aaai.v30i1.9973"},{"key":"1856_CR10","doi-asserted-by":"crossref","unstructured":"Zhang, F., Yuan, N.J., Lian, D., Xie, X., Ma, W.-Y.: Collaborative knowledge base embedding for recommender systems. In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 353\u2013362 (2016)","DOI":"10.1145\/2939672.2939673"},{"key":"1856_CR11","doi-asserted-by":"crossref","unstructured":"Wei, Y., Wang, X., Nie, L., He, X., Hong, R., Chua, T.-S.: Mmgcn: multi-modal graph convolution network for personalized recommendation of micro-video. In: Proceedings of the 27th ACM International Conference on Multimedia, pp. 1437\u20131445 (2019)","DOI":"10.1145\/3343031.3351034"},{"key":"1856_CR12","doi-asserted-by":"publisher","first-page":"5107","DOI":"10.1109\/TMM.2022.3187556","volume":"25","author":"Z Tao","year":"2022","unstructured":"Tao, Z., Liu, X., Xia, Y., Wang, X., Yang, L., Huang, X., Chua, T.-S.: Self-supervised learning for multimedia recommendation. IEEE Trans. Multimed. 25, 5107\u201316 (2022)","journal-title":"IEEE Trans. Multimed."},{"key":"1856_CR13","doi-asserted-by":"crossref","unstructured":"He, X., Deng, K., Wang, X., Li, Y., Zhang, Y., Wang, M.: Lightgcn: simplifying and powering graph convolution network for recommendation. In: Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 639\u2013648 (2020)","DOI":"10.1145\/3397271.3401063"},{"key":"1856_CR14","doi-asserted-by":"crossref","unstructured":"Wei, Y., Wang, X., Nie, L., He, X., Chua, T.-S.: Graph-refined convolutional network for multimedia recommendation with implicit feedback. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 3541\u20133549 (2020)","DOI":"10.1145\/3394171.3413556"},{"key":"1856_CR15","doi-asserted-by":"publisher","first-page":"9343","DOI":"10.1109\/TMM.2023.3251108","volume":"25","author":"K Liu","year":"2023","unstructured":"Liu, K., Xue, F., Guo, D., Sun, P., Qian, S., Hong, R.: Multimodal graph contrastive learning for multimedia-based recommendation. IEEE Trans. Multimed. 25, 9343\u201355 (2023)","journal-title":"IEEE Trans. Multimed."},{"key":"1856_CR16","doi-asserted-by":"crossref","unstructured":"Wang, X., He, X., Wang, M., Feng, F., Chua, T.-S.: Neural graph collaborative filtering. In: Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval. SIGIR\u201919, New York, pp. 165\u2013174 (2019)","DOI":"10.1145\/3331184.3331267"},{"key":"1856_CR17","doi-asserted-by":"crossref","unstructured":"Wang, W., Feng, F., He, X., Nie, L., Chua, T.-S.: Denoising implicit feedback for recommendation. In: Proceedings of the 14th ACM International Conference on Web Search and Data Mining, pp. 373\u2013381 (2021)","DOI":"10.1145\/3437963.3441800"},{"issue":"2","key":"1856_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3591469","volume":"1","author":"X Zhou","year":"2023","unstructured":"Zhou, X., Sun, A., Liu, Y., Zhang, J., Miao, C.: Selfcf: a simple framework for self-supervised collaborative filtering. ACM Trans. Recomm. Syst. 1(2), 1\u201325 (2023)","journal-title":"ACM Trans. Recomm. Syst."},{"key":"1856_CR19","doi-asserted-by":"publisher","first-page":"5107","DOI":"10.1109\/TMM.2022.3187556","volume":"25","author":"Z Tao","year":"2022","unstructured":"Tao, Z., Liu, X., Xia, Y., Wang, X., Yang, L., Huang, X., Chua, T.-S.: Self-supervised learning for multimedia recommendation. IEEE Trans. Multimed. 25, 5107\u201316 (2022)","journal-title":"IEEE Trans. Multimed."},{"key":"1856_CR20","doi-asserted-by":"crossref","unstructured":"Wei, W., Huang, C., Xia, L., Zhang, C.: Multi-modal self-supervised learning for recommendation. In: Proceedings of the ACM Web Conference 2023, pp. 790\u2013800 (2023)","DOI":"10.1145\/3543507.3583206"},{"key":"1856_CR21","doi-asserted-by":"crossref","unstructured":"Wu, J., Wang, X., Feng, F., He, X., Chen, L., Lian, J., Xie, X.: Self-supervised graph learning for recommendation. In: Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval. SIGIR \u201921, New York, pp. 726\u2013735 (2021)","DOI":"10.1145\/3404835.3462862"},{"key":"1856_CR22","unstructured":"Yang, Z., Wu, J., Wang, Z., Wang, X., Yuan, Y., He, X.: Generate what you prefer: reshaping sequential recommendation via guided diffusion. In: Thirty-seventh Conference on Neural Information Processing Systems (2023)"},{"key":"1856_CR23","first-page":"24804","volume":"34","author":"Y Tashiro","year":"2021","unstructured":"Tashiro, Y., Song, J., Song, Y., Ermon, S.: Csdi: conditional score-based diffusion models for probabilistic time series imputation. Adv. Neural. Inf. Process. Syst. 34, 24804\u201324816 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1856_CR24","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advances in Neural Information Processing Systems, vol. 33, pp. 6840\u20136851 (2020)"},{"key":"1856_CR25","doi-asserted-by":"crossref","unstructured":"Jiang, Y., Xia, L., Wei, W., Luo, D., Lin, K., Huang, C.: Diffmm: multi-modal diffusion model for recommendation. In: Proceedings of the 32nd ACM International Conference on Multimedia, pp. 7591\u20137599 (2024)","DOI":"10.1145\/3664647.3681498"},{"key":"1856_CR26","doi-asserted-by":"crossref","unstructured":"Wang, W., Xu, Y., Feng, F., Lin, X., He, X., Chua, T.-S.: Diffusion recommender model. In: Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 832\u2013841 (2023)","DOI":"10.1145\/3539618.3591663"},{"issue":"5","key":"1856_CR27","first-page":"4425","volume":"35","author":"L Wu","year":"2022","unstructured":"Wu, L., He, X., Wang, X., Zhang, K., Wang, M.: A survey on accuracy-oriented neural recommendation: from collaborative filtering to information-rich recommendation. IEEE Trans. Knowl. Data Eng. 35(5), 4425\u201345 (2022)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"5","key":"1856_CR28","doi-asserted-by":"publisher","first-page":"242","DOI":"10.1007\/s00530-024-01448-z","volume":"30","author":"H Fang","year":"2024","unstructured":"Fang, H., Sha, L., Liang, J.: Multimodal recommender system based on multi-channel counterfactual learning networks. Multimed. Syst. 30(5), 242 (2024)","journal-title":"Multimed. Syst."},{"issue":"6","key":"1856_CR29","doi-asserted-by":"publisher","first-page":"347","DOI":"10.1007\/s00530-024-01548-w","volume":"30","author":"P Si","year":"2024","unstructured":"Si, P., Qi, Y., Yu, L., Lu, L., Zeng, Q.: Exploiting heterogeneous information isolation and multi-view aggregation for multimodal recommendation. Multimed. Syst. 30(6), 347 (2024)","journal-title":"Multimed. Syst."},{"issue":"4","key":"1856_CR30","doi-asserted-by":"publisher","first-page":"2375","DOI":"10.1007\/s00530-023-01107-9","volume":"29","author":"IU Rehman","year":"2023","unstructured":"Rehman, I.U., Hanif, M.S., Ali, Z., Jan, Z., Mawuli, C.B., Ali, W.: Empowering neural collaborative filtering with contextual features for multimedia recommendation. Multimed. Syst. 29(4), 2375\u20132388 (2023)","journal-title":"Multimed. Syst."},{"issue":"5","key":"1856_CR31","doi-asserted-by":"publisher","first-page":"2747","DOI":"10.1007\/s00530-023-01136-4","volume":"29","author":"W Wei","year":"2023","unstructured":"Wei, W., Wang, J., Xu, M., Zhang, F.: Multimodal heterogeneous graph convolutional network for image recommendation. Multimed. Syst. 29(5), 2747\u20132760 (2023)","journal-title":"Multimed. Syst."},{"issue":"6","key":"1856_CR32","doi-asserted-by":"publisher","first-page":"2161","DOI":"10.1007\/s00530-022-00968-w","volume":"28","author":"I Harrando","year":"2022","unstructured":"Harrando, I., Troncy, R.: Combining semantic and linguistic representations for media recommendation. Multimed. Syst. 28(6), 2161\u20132173 (2022)","journal-title":"Multimed. Syst."},{"key":"1856_CR33","doi-asserted-by":"crossref","unstructured":"Zhang, J., Zhu, Y., Liu, Q., Wu, S., Wang, S., Wang, L.: Mining latent structures for multimedia recommendation. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 3872\u20133880 (2021)","DOI":"10.1145\/3474085.3475259"},{"key":"1856_CR34","doi-asserted-by":"crossref","unstructured":"Wallace, B., Dang, M., Rafailov, R., Zhou, L., Lou, A., Purushwalkam, S., Ermon, S., Xiong, C., Joty, S., Naik, N.: Diffusion model alignment using direct preference optimization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8228\u20138238 (2024)","DOI":"10.1109\/CVPR52733.2024.00786"},{"key":"1856_CR35","doi-asserted-by":"crossref","unstructured":"Tao, M., Tang, H., Wu, F., Jing, X.-Y., Bao, B.-K., Xu, C.: Df-gan: a simple and effective baseline for text-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16515\u201316525 (2022)","DOI":"10.1109\/CVPR52688.2022.01602"},{"key":"1856_CR36","unstructured":"Yang, R., Mandt, S.: Lossy image compression with conditional diffusion models. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"1856_CR37","doi-asserted-by":"crossref","unstructured":"Walker, J., Zhong, T., Zhang, F., Gao, Q., Zhou, F.: Recommendation via collaborative diffusion generative model. In: KSEM, pp. 593\u2013605. Springer (2022)","DOI":"10.1007\/978-3-031-10989-8_47"},{"key":"1856_CR38","first-page":"22720","volume":"37","author":"W Xie","year":"2024","unstructured":"Xie, W., Wang, H., Zhang, L., Zhou, R., Lian, D., Chen, E.: Breaking determinism: fuzzy modeling of sequential recommendation using discrete state space diffusion model. Adv. Neural. Inf. Process. Syst. 37, 22720\u201322744 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1856_CR39","unstructured":"Nichol, A.Q., Dhariwal, P.: Improved denoising diffusion probabilistic models. In: Proceedings of the 39th International Conference on Machine Learning, pp. 8162\u20138171. PMLR (2021)"},{"key":"1856_CR40","unstructured":"Saharia, C., Chan, W., Saxena, S., Li, L., Whang, J., Denton, E.L., Ghasemipour, K., Gontijo\u00a0Lopes, R., Karagol\u00a0Ayan, B., Salimans, T., Ho, J., Fleet, D.J., Norouzi, M.: Photorealistic text-to-image diffusion models with deep language understanding. In: Koyejo, S., Mohamed, S., Agarwal, A., Belgrave, D., Cho, K., Oh, A. (eds.) Advances in Neural Information Processing Systems, vol. 35, pp. 36479\u201336494 (2022)"},{"key":"1856_CR41","doi-asserted-by":"crossref","unstructured":"Rangarajan, A.: Learning matrix space image representations. In: International Workshop on Energy Minimization Methods in Computer Vision and Pattern Recognition, pp. 153\u2013168. Springer (2001)","DOI":"10.1007\/3-540-44745-8_11"},{"issue":"3","key":"1856_CR42","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1016\/0168-9002(95)01478-0","volume":"372","author":"A Hoecker","year":"1996","unstructured":"Hoecker, A., Kartvelishvili, V.: Svd approach to data unfolding. Nucl. Instrum. Methods Phys. Res. Sect. A 372(3), 469\u2013481 (1996)","journal-title":"Nucl. Instrum. Methods Phys. Res. Sect. A"},{"key":"1856_CR43","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"issue":"8","key":"1856_CR44","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/MC.2009.263","volume":"42","author":"Y Koren","year":"2009","unstructured":"Koren, Y., Bell, R., Volinsky, C.: Matrix factorization techniques for recommender systems. Computer 42(8), 30\u201337 (2009)","journal-title":"Computer"},{"key":"1856_CR45","doi-asserted-by":"crossref","unstructured":"Yu, J., Yin, H., Xia, X., Chen, T., Cui, L., Nguyen, Q.V.H.: Are graph augmentations necessary? Simple graph contrastive learning for recommendation. In: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 1294\u20131303 (2022)","DOI":"10.1145\/3477495.3531937"},{"key":"1856_CR46","doi-asserted-by":"crossref","unstructured":"Zhou, X.: Mmrec: simplifying multimodal recommendation. In: Proceedings of the 5th ACM International Conference on Multimedia in Asia Workshops, pp. 1\u20132 (2023)","DOI":"10.1145\/3611380.3628561"},{"issue":"9","key":"1856_CR47","doi-asserted-by":"publisher","first-page":"9154","DOI":"10.1109\/TKDE.2022.3221949","volume":"35","author":"J Zhang","year":"2022","unstructured":"Zhang, J., Zhu, Y., Liu, Q., Zhang, M., Wu, S., Wang, L.: Latent structure mining with contrastive modality fusion for multimedia recommendation. IEEE Trans. Knowl. Data Eng. 35(9), 9154\u201367 (2022)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"1856_CR48","unstructured":"Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., et al.: Pytorch: an imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01856-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-01856-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01856-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T10:26:58Z","timestamp":1761388018000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-01856-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,21]]},"references-count":48,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["1856"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-01856-9","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,21]]},"assertion":[{"value":"16 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 August 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"334"}}