{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:05:09Z","timestamp":1750309509427,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3680579","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:41Z","timestamp":1729925981000},"page":"1225-1234","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["TagOOD: A Novel Approach to Out-of-Distribution Detection via Vision-Language Representations and Class Center Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-4930-6284","authenticated-orcid":false,"given":"Jinglun","family":"Li","sequence":"first","affiliation":[{"name":"Shanghai Engineering Research Center of AI &amp; Robotics, Academy for Engineering &amp; Technology, Fudan University, Shanghai, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5734-1305","authenticated-orcid":false,"given":"Xinyu","family":"Zhou","sequence":"additional","affiliation":[{"name":"Shanghai Key Lab of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2878-0497","authenticated-orcid":false,"given":"Kaixun","family":"Jiang","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI &amp; Robotics, Academy for Engineering &amp; Technology, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2749-5133","authenticated-orcid":false,"given":"Lingyi","family":"Hong","sequence":"additional","affiliation":[{"name":"Shanghai Key Lab of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4388-9757","authenticated-orcid":false,"given":"Pinxue","family":"Guo","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI &amp; Robotics, Academy for Engineering &amp; Technology, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7112-2596","authenticated-orcid":false,"given":"Zhaoyu","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Engineering Research Center of AI &amp; Robotics, Academy for Engineering &amp; Technology, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6258-6225","authenticated-orcid":false,"given":"Weifeng","family":"Ge","sequence":"additional","affiliation":[{"name":"Shanghai Key Lab of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3339-8751","authenticated-orcid":false,"given":"Wenqiang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Engineering Research Center of AI &amp; Robotics, Ministry of Education, Academy for Engineering &amp; Technology, Fudan University &amp; Shanghai Key Lab of Intelligent Information Processing, School of Computer Science, Fudan University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Bard. 2023. A Large Language Model from Google AI. https:\/\/ai.googleblog. com\/2022\/01\/lamda-language-model-for-dialogue-and.html"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00294"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00119"},{"key":"e_1_3_2_1_4_1","volume-title":"Pali: A jointly-scaled multilingual language-image model. arXiv preprint arXiv:2209.06794","author":"Chen Xi","year":"2022","unstructured":"Xi Chen, Xiao Wang, Soravit Changpinyo, AJ Piergiovanni, Piotr Padlewski, Daniel Salz, Sebastian Goodman, Adam Grycner, Basil Mustafa, Lucas Beyer, et al. 2022. Pali: A jointly-scaled multilingual language-image model. arXiv preprint arXiv:2209.06794 (2022)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.461"},{"key":"e_1_3_2_1_6_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i6.20610"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298754"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11023-020-09548-1"},{"key":"e_1_3_2_1_10_1","volume-title":"Clip-adapter: Better vision-language models with feature adapters. International Journal of Computer Vision","author":"Gao Peng","year":"2023","unstructured":"Peng Gao, Shijie Geng, Renrui Zhang, Teli Ma, Rongyao Fang, Yongfeng Zhang, Hongsheng Li, and Yu Qiao. 2023. Clip-adapter: Better vision-language models with feature adapters. International Journal of Computer Vision (2023), 1--15."},{"key":"e_1_3_2_1_11_1","volume-title":"Openvis: Open-vocabulary video instance segmentation. arXiv preprint arXiv:2305.16835","author":"Guo Pinxue","year":"2023","unstructured":"Pinxue Guo, Tony Huang, Peiyang He, Xuefeng Liu, Tianjun Xiao, Zhaoyu Chen, and Wenqiang Zhang. 2023. Openvis: Open-vocabulary video instance segmentation. arXiv preprint arXiv:2305.16835 (2023)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3219230"},{"key":"e_1_3_2_1_13_1","volume-title":"CVAD: A generic medical anomaly detector based on Cascade VAE. arXiv preprint arXiv:2110.15811","author":"Guo Xiaoyuan","year":"2021","unstructured":"Xiaoyuan Guo, JudyWawira Gichoya, Saptarshi Purkayastha, and Imon Banerjee. 2021. CVAD: A generic medical anomaly detector based on Cascade VAE. arXiv preprint arXiv:2110.15811 (2021)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611881"},{"key":"e_1_3_2_1_17_1","volume-title":"Abaseline for detecting misclassified and out-of-distribution examples in neural networks. arXiv preprint arXiv:1610.02136","author":"Hendrycks Dan","year":"2016","unstructured":"Dan Hendrycks and Kevin Gimpel. 2016. Abaseline for detecting misclassified and out-of-distribution examples in neural networks. arXiv preprint arXiv:1610.02136 (2016)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01501"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01096"},{"key":"e_1_3_2_1_20_1","first-page":"677","article-title":"On the importance of gradients for detecting distributional shifts in the wild","volume":"34","author":"Huang Rui","year":"2021","unstructured":"Rui Huang, Andrew Geng, and Yixuan Li. 2021. On the importance of gradients for detecting distributional shifts in the wild. Advances in Neural Information Processing Systems 34 (2021), 677--689.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00860"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548108"},{"key":"e_1_3_2_1_23_1","volume-title":"International conference on machine learning. PMLR, 4904--4916","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling up visual and visionlanguage representation learning with noisy text supervision. In International conference on machine learning. PMLR, 4904--4916."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00404"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611827"},{"key":"e_1_3_2_1_26_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_27_1","volume-title":"Openimages: A public dataset for large-scale multi-label and multi-class image classification. Dataset available from https:\/\/github. com\/openimages 2, 3","author":"Krasin Ivan","year":"2017","unstructured":"Ivan Krasin, Tom Duerig, Neil Alldrin, Vittorio Ferrari, Sami Abu-El-Haija, Alina Kuznetsova, Hassan Rom, Jasper Uijlings, Stefan Popov, Andreas Veit, et al. 2017. Openimages: A public dataset for large-scale multi-label and multi-class image classification. Dataset available from https:\/\/github. com\/openimages 2, 3 (2017), 18."},{"key":"e_1_3_2_1_28_1","volume-title":"Advances in Neural Information Processing Systems","author":"Lee Kimin","year":"2018","unstructured":"Kimin Lee, Kibok Lee, Honglak Lee, and Jinwoo Shin. 2018. A Simple Unified Framework for Detecting Out-of-Distribution Samples and Adversarial Attacks. In Advances in Neural Information Processing Systems, S. Bengio, H. Wallach, H. Larochelle, K. Grauman, N. Cesa-Bianchi, and R. Garnett (Eds.), Vol. 31. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper\/2018\/file\/ abdeb6f575ac5c6676b747bca8d09cc2-Paper.pdf"},{"key":"e_1_3_2_1_29_1","volume-title":"International conference on machine learning. PMLR, 12888--12900","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In International conference on machine learning. PMLR, 12888--12900."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02141"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548269"},{"volume-title":"Enhancing The Reliability of Outof-distribution Image Detection in Neural Networks. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=H1VGkIxRZ","author":"Liang Shiyu","key":"e_1_3_2_1_32_1","unstructured":"Shiyu Liang, Yixuan Li, and R. Srikant. 2018. Enhancing The Reliability of Outof-distribution Image Detection in Neural Networks. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=H1VGkIxRZ"},{"key":"e_1_3_2_1_33_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems 36","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2024. Visual instruction tuning. Advances in neural information processing systems 36 (2024)."},{"key":"e_1_3_2_1_34_1","volume-title":"Query2label: A simple transformer way to multi-label classification. arXiv preprint arXiv:2107.10834","author":"Liu Shilong","year":"2021","unstructured":"Shilong Liu, Lei Zhang, Xiao Yang, Hang Su, and Jun Zhu. 2021. Query2label: A simple transformer way to multi-label classification. arXiv preprint arXiv:2107.10834 (2021)."},{"key":"e_1_3_2_1_35_1","volume-title":"Lin (Eds.)","volume":"33","author":"Liu Weitang","year":"2020","unstructured":"Weitang Liu, Xiaoyun Wang, John Owens, and Yixuan Li. 2020. Energy-based Outof-distribution Detection. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 21464--21475. https:\/\/proceedings.neurips.cc\/paper\/ 2020\/file\/f5496252609c43eb8a3d147ab9b9c006-Paper.pdf"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_37_1","volume-title":"Contrastive Language-Image Pretrained (CLIP) Models are Powerful Out-of-Distribution Detectors. arXiv preprint arXiv:2303.05828","author":"Michels Felix","year":"2023","unstructured":"Felix Michels, Nikolas Adaloglou, Tim Kaiser, and Markus Kollmann. 2023. Contrastive Language-Image Pretrained (CLIP) Models are Powerful Out-of-Distribution Detectors. arXiv preprint arXiv:2303.05828 (2023)."},{"key":"e_1_3_2_1_38_1","volume-title":"Delving into out-of-distribution detection with vision-language representations. Advances in neural information processing systems 35","author":"Ming Yifei","year":"2022","unstructured":"Yifei Ming, Ziyang Cai, Jiuxiang Gu, Yiyou Sun, Wei Li, and Yixuan Li. 2022. Delving into out-of-distribution detection with vision-language representations. Advances in neural information processing systems 35 (2022), 35087--35102."},{"key":"e_1_3_2_1_39_1","volume-title":"Locoop: Few-shot out-of-distribution detection via prompt learning. Advances in Neural Information Processing Systems 36","author":"Miyai Atsuyuki","year":"2024","unstructured":"Atsuyuki Miyai, Qing Yu, Go Irie, and Kiyoharu Aizawa. 2024. Locoop: Few-shot out-of-distribution detection via prompt learning. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_40_1","unstructured":"Long Ouyang Jeffrey Wu Xu Jiang Diogo Almeida Carroll Wainwright Pamela Mishkin Chong Zhang Sandhini Agarwal Katarina Slama Alex Ray et al. 2022. Training language models to follow instructions with human feedback. Advances in neural information processing systems 35 (2022) 27730--27744."},{"key":"e_1_3_2_1_41_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_42_1","volume-title":"A comprehensive survey of data mining-based fraud detection research. arXiv preprint arXiv:1009.6119","author":"Phua Clifton","year":"2010","unstructured":"Clifton Phua, Vincent Lee, Kate Smith, and Ross Gayler. 2010. A comprehensive survey of data mining-based fraud detection research. arXiv preprint arXiv:1009.6119 (2010)."},{"key":"e_1_3_2_1_43_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00012"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"crossref","unstructured":"Olga Russakovsky Jia Deng Hao Su Jonathan Krause Sanjeev Satheesh Sean Ma Zhiheng Huang Andrej Karpathy Aditya Khosla Michael Bernstein et al. 2015. Imagenet large scale visual recognition challenge. International journal of computer vision 115 (2015) 211--252.","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3107163"},{"key":"e_1_3_2_1_47_1","unstructured":"Yiyou Sun Chuan Guo and Yixuan Li. 2021. ReAct: Out-of-distribution Detection With Rectified Activations. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_48_1","volume-title":"Out-of-distribution Detection with Deep Nearest Neighbors. ICML","author":"Sun Yiyou","year":"2022","unstructured":"Yiyou Sun, Yifei Ming, Xiaojin Zhu, and Yixuan Li. 2022. Out-of-distribution Detection with Deep Nearest Neighbors. ICML (2022)."},{"key":"e_1_3_2_1_49_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_50_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00914"},{"key":"e_1_3_2_1_52_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_53_1","volume-title":"CLIPN for Zero-Shot OOD Detection: Teaching CLIP to Say No. arXiv preprint arXiv:2308.12213","author":"Li Yi","year":"2023","unstructured":"HualiangWang, Yi Li, Huifeng Yao, and Xiaomeng Li. 2023. CLIPN for Zero-Shot OOD Detection: Teaching CLIP to Say No. arXiv preprint arXiv:2308.12213 (2023)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00487"},{"key":"e_1_3_2_1_55_1","volume-title":"Zihang Dai, Yulia Tsvetkov, and Yuan Cao.","author":"Wang Zirui","year":"2021","unstructured":"Zirui Wang, Jiahui Yu, Adams Wei Yu, Zihang Dai, Yulia Tsvetkov, and Yuan Cao. 2021. Simvlm: Simple visual language model pretraining with weak supervision. arXiv preprint arXiv:2108.10904 (2021)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1117\/12.2293408"},{"key":"e_1_3_2_1_57_1","volume-title":"Robert Stanforth, Vivek Natarajan, Joseph R. Ledsam, Patricia MacWilliams, Pushmeet Kohli, Alan Karthikesalingam, Simon Kohl, Taylan Cemgil, S. M. Ali Eslami, and Olaf Ronneberger.","author":"Winkens Jim","year":"2020","unstructured":"Jim Winkens, Rudy Bunel, Abhijit Guha Roy, Robert Stanforth, Vivek Natarajan, Joseph R. Ledsam, Patricia MacWilliams, Pushmeet Kohli, Alan Karthikesalingam, Simon Kohl, Taylan Cemgil, S. M. Ali Eslami, and Olaf Ronneberger. 2020. Contrastive Training for Improved Out-of-Distribution Detection. arXiv preprint arXiv:2007.05566 (2020)."},{"volume-title":"Sun database: Large-scale scene recognition from abbey to zoo. In 2010 IEEE computer society conference on computer vision and pattern recognition","author":"Xiao Jianxiong","key":"e_1_3_2_1_58_1","unstructured":"Jianxiong Xiao, James Hays, Krista A Ehinger, Aude Oliva, and Antonio Torralba. 2010. Sun database: Large-scale scene recognition from abbey to zoo. In 2010 IEEE computer society conference on computer vision and pattern recognition. IEEE, 3485--3492."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01507"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611720"},{"key":"e_1_3_2_1_61_1","volume-title":"Recognize Anything: A Strong Image Tagging Model. arXiv preprint arXiv:2306.03514","author":"Zhang Youcai","year":"2023","unstructured":"Youcai Zhang, Xinyu Huang, Jinyu Ma, Zhaoyang Li, Zhaochuan Luo, Yanchun Xie, Yuzhuo Qin, Tong Luo, Yaqian Li, Shilong Liu, et al. 2023. Recognize Anything: A Strong Image Tagging Model. arXiv preprint arXiv:2306.03514 (2023)."},{"key":"e_1_3_2_1_62_1","volume-title":"Places: A 10 million image database for scene recognition","author":"Zhou Bolei","year":"2017","unstructured":"Bolei Zhou, Agata Lapedriza, Aditya Khosla, Aude Oliva, and Antonio Torralba. 2017. Places: A 10 million image database for scene recognition. IEEE transactions on pattern analysis and machine intelligence 40, 6 (2017), 1452--1464."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680579","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3680579","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:56Z","timestamp":1750295876000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680579"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":62,"alternative-id":["10.1145\/3664647.3680579","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3680579","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}