{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T23:34:17Z","timestamp":1775000057070,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T00:00:00Z","timestamp":1682812800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,30]]},"DOI":"10.1145\/3543507.3583206","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T23:30:25Z","timestamp":1682551825000},"page":"790-800","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":172,"title":["Multi-Modal Self-Supervised Learning for Recommendation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6653-3788","authenticated-orcid":false,"given":"Wei","family":"Wei","sequence":"first","affiliation":[{"name":"The University of Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2062-1512","authenticated-orcid":false,"given":"Chao","family":"Huang","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0725-2211","authenticated-orcid":false,"given":"Lianghao","family":"Xia","sequence":"additional","affiliation":[{"name":"University of Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8349-7926","authenticated-orcid":false,"given":"Chuxu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Brandeis University, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,4,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Jianxin Chang Chen Gao Yu Zheng Yiqun Hui Yanan Niu Yang Song Depeng Jin and Yong Li. 2021. Sequential recommendation with graph neural networks. In SIGIR. 378\u2013387.","DOI":"10.1145\/3404835.3462968"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Jingyuan Chen Hanwang Zhang Xiangnan He Liqiang Nie Wei Liu and Tat-Seng Chua. 2017. Attentive collaborative filtering: Multimedia recommendation with item-and component-level attention. In SIGIR. 335\u2013344.","DOI":"10.1145\/3077136.3080797"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Lei Chen Le Wu Richang Hong Kun Zhang and Meng Wang. 2020. Revisiting Graph Based Collaborative Filtering: A Linear Residual Graph Convolutional Network Approach. In AAAI Vol.\u00a034. 27\u201334.","DOI":"10.1609\/aaai.v34i01.5330"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Mengru Chen Chao Huang Lianghao Xia Wei Wei Yong Xu and Ronghua Luo. 2023. Heterogeneous Graph Contrastive Learning for Recommendation. In WSDM.","DOI":"10.1145\/3539597.3570484"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Xu Chen Hanxiong Chen Hongteng Xu Yongfeng Zhang Yixin Cao Zheng Qin and Hongyuan Zha. 2019. Personalized fashion recommendation with visual explanations based on multimodal attention network: Towards visually explainable recommendation. In SIGIR. 765\u2013774.","DOI":"10.1145\/3331184.3331254"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Wenqi Fan Yao Ma Qing Li Yuan He Eric Zhao Jiliang Tang and Dawei Yin. 2019. Graph neural networks for social recommendation. In WWW. 417\u2013426.","DOI":"10.1145\/3308558.3313488"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2945180"},{"key":"e_1_3_2_1_8_1","volume-title":"AISTATS. JMLR Workshop and Conference Proceedings, 249\u2013256","author":"Glorot Xavier","year":"2010","unstructured":"Xavier Glorot and Yoshua Bengio. 2010. Understanding the difficulty of training deep feedforward neural networks. In AISTATS. JMLR Workshop and Conference Proceedings, 249\u2013256."},{"key":"e_1_3_2_1_9_1","volume-title":"Improved training of wasserstein gans. Neurips 30","author":"Gulrajani Ishaan","year":"2017","unstructured":"Ishaan Gulrajani, Faruk Ahmed, Martin Arjovsky, Vincent Dumoulin, and Aaron\u00a0C Courville. 2017. Improved training of wasserstein gans. Neurips 30 (2017)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Ruining He and Julian McAuley. 2016. VBPR: visual bayesian personalized ranking from implicit feedback. In AAAI Vol.\u00a030.","DOI":"10.1609\/aaai.v30i1.9973"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401063"},{"key":"e_1_3_2_1_12_1","volume-title":"Improving neural networks by preventing co-adaptation of feature detectors. CVPR","author":"Hinton E","year":"2012","unstructured":"Geoffrey\u00a0E Hinton, Nitish Srivastava, Alex Krizhevsky, Ilya Sutskever, and Ruslan\u00a0R Salakhutdinov. 2012. Improving neural networks by preventing co-adaptation of feature detectors. CVPR (2012)."},{"key":"e_1_3_2_1_13_1","volume-title":"Generative adversarial networks based data augmentation for noise robust speech recognition","author":"Hu Hu","unstructured":"Hu Hu, Tian Tan, and Yanmin Qian. 2018. Generative adversarial networks based data augmentation for noise robust speech recognition. In ICASSP. IEEE, 5044\u20135048."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Chao Huang Huance Xu Yong Xu Peng Dai Lianghao Xia Mengyin Lu Liefeng Bo Hao Xing Xiaoping Lai and Yanfang Ye. 2021. Knowledge-aware coupled graph neural network for social recommendation. In AAAI. 4115\u20134122.","DOI":"10.1609\/aaai.v35i5.16533"},{"key":"e_1_3_2_1_15_1","volume-title":"International conference on machine learning. PMLR, 448\u2013456","author":"Ioffe Sergey","year":"2015","unstructured":"Sergey Ioffe and Christian Szegedy. 2015. Batch normalization: Accelerating deep network training by reducing internal covariate shift. In International conference on machine learning. PMLR, 448\u2013456."},{"key":"e_1_3_2_1_16_1","volume-title":"Categorical reparameterization with gumbel-softmax. arXiv preprint arXiv:1611.01144","author":"Jang Eric","year":"2016","unstructured":"Eric Jang, Shixiang Gu, and Ben Poole. 2016. Categorical reparameterization with gumbel-softmax. arXiv preprint arXiv:1611.01144 (2016)."},{"key":"e_1_3_2_1_17_1","first-page":"18661","article-title":"Supervised contrastive learning","volume":"33","author":"Khosla Prannay","year":"2020","unstructured":"Prannay Khosla, Piotr Teterwak, Chen Wang, Aaron Sarna, Yonglong Tian, Phillip Isola, Aaron Maschinot, Ce Liu, and Dilip Krishnan. 2020. Supervised contrastive learning. NeurIPS 33 (2020), 18661\u201318673.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_18_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma P","year":"2014","unstructured":"Diederik\u00a0P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Qimai Li Zhichao Han and Xiao-Ming Wu. 2018. Deeper insights into graph convolutional networks for semi-supervised learning. In AAAI Vol.\u00a032.","DOI":"10.1609\/aaai.v32i1.11604"},{"key":"e_1_3_2_1_20_1","volume-title":"Does Adversarial Transferability Indicate Knowledge Transferability?","author":"Liang Kaizhao","year":"2020","unstructured":"Kaizhao Liang, Jacky\u00a0Y Zhang, Oluwasanmi\u00a0O Koyejo, and Bo Li. 2020. Does Adversarial Transferability Indicate Knowledge Transferability? (2020)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Zihan Lin Changxin Tian Yupeng Hou and Wayne\u00a0Xin Zhao. 2022. Improving Graph Collaborative Filtering with Neighborhood-enriched Contrastive Learning. In WWW. 2320\u20132329.","DOI":"10.1145\/3485447.3512104"},{"key":"e_1_3_2_1_22_1","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. In ICLR."},{"key":"e_1_3_2_1_23_1","unstructured":"Andrew\u00a0L Maas Awni\u00a0Y Hannun Andrew\u00a0Y Ng 2013. Rectifier nonlinearities improve neural network acoustic models. In ICML."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Julian McAuley Christopher Targett Qinfeng Shi and Anton Van Den\u00a0Hengel. 2015. Image-based recommendations on styles and substitutes. In SIGIR. 43\u201352.","DOI":"10.1145\/2766462.2767755"},{"key":"e_1_3_2_1_25_1","volume-title":"Unrolled generative adversarial networks. arXiv preprint arXiv:1611.02163","author":"Metz Luke","year":"2016","unstructured":"Luke Metz, Ben Poole, David Pfau, and Jascha Sohl-Dickstein. 2016. Unrolled generative adversarial networks. arXiv preprint arXiv:1611.02163 (2016)."},{"key":"e_1_3_2_1_26_1","volume-title":"Virtual adversarial training: a regularization method for supervised and semi-supervised learning","author":"Miyato Takeru","year":"2018","unstructured":"Takeru Miyato, Shin-ichi Maeda, Masanori Koyama, and Shin Ishii. 2018. Virtual adversarial training: a regularization method for supervised and semi-supervised learning. IEEE transactions on pattern analysis and machine intelligence 41, 8 (2018), 1979\u20131993."},{"key":"e_1_3_2_1_27_1","volume-title":"On the regularization of wasserstein gans. ICLR","author":"Petzka Henning","year":"2017","unstructured":"Henning Petzka, Asja Fischer, and Denis Lukovnicov. 2017. On the regularization of wasserstein gans. ICLR (2017)."},{"key":"e_1_3_2_1_28_1","volume-title":"Causalrec: Causal inference for visual debiasing in visually-aware recommendation. In MM. ACM, 3844\u20133852.","author":"Qiu Ruihong","year":"2021","unstructured":"Ruihong Qiu, Sen Wang, Zhi Chen, Hongzhi Yin, and Zi Huang. 2021. Causalrec: Causal inference for visual debiasing in visually-aware recommendation. In MM. ACM, 3844\u20133852."},{"key":"e_1_3_2_1_29_1","volume-title":"Unsupervised representation learning with deep convolutional generative adversarial networks. arXiv preprint arXiv:1511.06434","author":"Radford Alec","year":"2015","unstructured":"Alec Radford, Luke Metz, and Soumith Chintala. 2015. Unsupervised representation learning with deep convolutional generative adversarial networks. arXiv preprint arXiv:1511.06434 (2015)."},{"key":"e_1_3_2_1_30_1","volume-title":"Sentence-bert: Sentence embeddings using siamese bert-networks. In EMNLP.","author":"Reimers Nils","year":"2019","unstructured":"Nils Reimers and Iryna Gurevych. 2019. Sentence-bert: Sentence embeddings using siamese bert-networks. In EMNLP."},{"key":"e_1_3_2_1_31_1","volume-title":"Veegan: Reducing mode collapse in gans using implicit variational learning. Advances in neural information processing systems 30","author":"Srivastava Akash","year":"2017","unstructured":"Akash Srivastava, Lazar Valkov, Chris Russell, Michael\u00a0U Gutmann, and Charles Sutton. 2017. Veegan: Reducing mode collapse in gans using implicit variational learning. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Rui Sun Xuezhi Cao Yan Zhao Junchen Wan Kun Zhou Fuzheng Zhang Zhongyuan Wang and Kai Zheng. 2020. Multi-modal knowledge graphs for recommender systems. In CIKM. 1405\u20131414.","DOI":"10.1145\/3340531.3411947"},{"key":"e_1_3_2_1_33_1","volume-title":"Self-supervised Learning for Multimedia Recommendation. Transactions on Multimedia (TMM)","author":"Tao Zhulin","year":"2022","unstructured":"Zhulin Tao, Xiaohao Liu, Yewei Xia, Xiang Wang, Lifang Yang, Xianglin Huang, and Tat-Seng Chua. 2022. Self-supervised Learning for Multimedia Recommendation. Transactions on Multimedia (TMM) (2022)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Quoc-Tuan Truong Aghiles Salah and Hady Lauw. 2021. Multi-modal recommender systems: Hands-on exploration. In Recsys. ACM 834\u2013837.","DOI":"10.1145\/3460231.3473324"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Di Wang Quan Wang Yaqiang An Xinbo Gao and Yumin Tian. 2020. Online collective matrix factorization hashing for large-scale cross-media retrieval. In SIGIR. 1409\u20131418.","DOI":"10.1145\/3397271.3401132"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Feng Wang and Huaping Liu. 2021. Understanding the behaviour of contrastive loss. In CVPR. 2495\u20132504.","DOI":"10.1109\/CVPR46437.2021.00252"},{"key":"e_1_3_2_1_37_1","volume-title":"DualGNN: Dual Graph Neural Network for Multimedia Recommendation. Transactions on Multimedia (TMM)","author":"Wang Qifan","year":"2021","unstructured":"Qifan Wang, Yinwei Wei, Jianhua Yin, Jianlong Wu, Xuemeng Song, and Liqiang Nie. 2021. DualGNN: Dual Graph Neural Network for Multimedia Recommendation. Transactions on Multimedia (TMM) (2021)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330989"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Xiang Wang Xiangnan He Meng Wang Fuli Feng and Tat-Seng Chua. 2019. Neural Graph Collaborative Filtering. In SIGIR.","DOI":"10.1145\/3331184.3331267"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Ziyang Wang Wei Wei Gao Cong Xiao-Li Li Xian-Ling Mao and Minghui Qiu. 2020. Global context enhanced graph neural networks for session-based recommendation. In SIGIR. 169\u2013178.","DOI":"10.1145\/3397271.3401142"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Wei Wei Chao Huang Lianghao Xia Yong Xu Jiashu Zhao and Dawei Yin. 2022. Contrastive meta learning with behavior multiplicity for recommendation. In WSDM. 1120\u20131128.","DOI":"10.1145\/3488560.3498527"},{"key":"e_1_3_2_1_42_1","volume-title":"Hierarchical user intent graph network for multimedia recommendation. Transactions on Multimedia (TMM)","author":"Wei Yinwei","year":"2021","unstructured":"Yinwei Wei, Xiang Wang, Xiangnan He, Liqiang Nie, Yong Rui, and Tat-Seng Chua. 2021. Hierarchical user intent graph network for multimedia recommendation. Transactions on Multimedia (TMM) (2021)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Yinwei Wei Xiang Wang Qi Li Liqiang Nie Yan Li Xuanping Li and Tat-Seng Chua. 2021. Contrastive learning for cold-start recommendation. In MM. 5382\u20135390.","DOI":"10.1145\/3474085.3475665"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"Yinwei Wei Xiang Wang Liqiang Nie Xiangnan He and Tat-Seng Chua. 2020. Graph-refined convolutional network for multimedia recommendation with implicit feedback. In MM. 3541\u20133549.","DOI":"10.1145\/3394171.3413556"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351034"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Jiancan Wu Xiang Wang Fuli Feng Xiangnan He Liang Chen Jianxun Lian and Xing Xie. 2021. Self-supervised graph learning for recommendation. In SIGIR. 726\u2013735.","DOI":"10.1145\/3404835.3462862"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"Lianghao Xia Chao Huang Yong Xu Jiashu Zhao Dawei Yin and Jimmy\u00a0Xiangji Huang. 2022. Hypergraph Contrastive Collaborative Filtering. In SIGIR. 70\u201379.","DOI":"10.1145\/3477495.3532058"},{"key":"e_1_3_2_1_48_1","volume-title":"Contrastive learning for sequential recommendation","author":"Xie Xu","unstructured":"Xu Xie, Fei Sun, Zhaoyang Liu, Shiwen Wu, Jinyang Gao, Jiandong Zhang, Bolin Ding, and Bin Cui. 2022. Contrastive learning for sequential recommendation. In ICDE. IEEE, 1259\u20131273."},{"key":"e_1_3_2_1_49_1","unstructured":"Zixuan Yi Xi Wang Iadh Ounis and Craig Macdonald. 2022. Multi-modal Graph Contrastive Learning for Micro-video Recommendation. In SIGIR. 1807\u20131811."},{"key":"e_1_3_2_1_50_1","volume-title":"Nguyen Quoc\u00a0Viet Hung, and Xiangliang Zhang","author":"Yu Junliang","year":"2021","unstructured":"Junliang Yu, Hongzhi Yin, Jundong Li, Qinyong Wang, Nguyen Quoc\u00a0Viet Hung, and Xiangliang Zhang. 2021. Self-Supervised Multi-Channel Hypergraph Convolutional Network for Social Recommendation. In WWW. 413\u2013424."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","unstructured":"Jinghao Zhang Yanqiao Zhu Qiang Liu Shu Wu Shuhui Wang and Liang Wang. 2021. Mining Latent Structures for Multimedia Recommendation. In MM. 3872\u20133880.","DOI":"10.1145\/3474085.3475259"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"Kun Zhou Hui Wang Wayne\u00a0Xin Zhao Yutao Zhu Sirui Wang Fuzheng Zhang 2020. S3-rec: Self-supervised learning for sequential recommendation with mutual information maximization. In CIKM. 1893\u20131902.","DOI":"10.1145\/3340531.3411954"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"crossref","unstructured":"Yanqiao Zhu Yichen Xu Feng Yu Qiang Liu Shu Wu and Liang Wang. 2021. Graph contrastive learning with adaptive augmentation. In WWW. 2069\u20132080.","DOI":"10.1145\/3442381.3449802"}],"event":{"name":"WWW '23: The ACM Web Conference 2023","location":"Austin TX USA","acronym":"WWW '23","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583206","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543507.3583206","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:09:18Z","timestamp":1750183758000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583206"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,30]]},"references-count":53,"alternative-id":["10.1145\/3543507.3583206","10.1145\/3543507"],"URL":"https:\/\/doi.org\/10.1145\/3543507.3583206","relation":{},"subject":[],"published":{"date-parts":[[2023,4,30]]},"assertion":[{"value":"2023-04-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}