{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T22:00:05Z","timestamp":1776117605100,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681057","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"156-165","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["IconDM: Text-Guided Icon Set Expansion Using Diffusion Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-0104-7497","authenticated-orcid":false,"given":"Jiawei","family":"Lin","sequence":"first","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0133-9228","authenticated-orcid":false,"given":"Zhaoyun","family":"Jiang","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3240-5921","authenticated-orcid":false,"given":"Jiaqi","family":"Guo","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0931-2687","authenticated-orcid":false,"given":"Shizhao","family":"Sun","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7600-0934","authenticated-orcid":false,"given":"Ting","family":"Liu","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5437-0253","authenticated-orcid":false,"given":"Zijiang","family":"Yang","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8496-033X","authenticated-orcid":false,"given":"Jian-Guang","family":"Lou","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9230-2799","authenticated-orcid":false,"given":"Dongmei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"James Betker Gabriel Goh Li Jing Tim Brooks Jianfeng Wang Linjie Li Long Ouyang Juntang Zhuang Joyce Lee Yufei Guo et al. 2023. Improving image generation with better captions. Computer Science. https:\/\/cdn. openai. com\/papers\/dall-e-3. pdf Vol. 2 3 (2023) 8."},{"key":"e_1_3_2_1_2_1","volume-title":"Large scale GAN training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096","author":"Brock Andrew","year":"2018","unstructured":"Andrew Brock, Jeff Donahue, and Karen Simonyan. 2018. Large scale GAN training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096 (2018)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"e_1_3_2_1_4_1","volume-title":"Muse: Text-to-image generation via masked generative transformers. arXiv preprint arXiv:2301.00704","author":"Chang Huiwen","year":"2023","unstructured":"Huiwen Chang, Han Zhang, Jarred Barber, AJ Maschinot, Jose Lezama, Lu Jiang, Ming-Hsuan Yang, Kevin Murphy, William T Freeman, Michael Rubinstein, et al. 2023. Muse: Text-to-image generation via masked generative transformers. arXiv preprint arXiv:2301.00704 (2023)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612524"},{"key":"e_1_3_2_1_6_1","volume-title":"The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=eAKmQPe3m1","author":"Chen Junsong","year":"2024","unstructured":"Junsong Chen, Jincheng YU, Chongjian GE, Lewei Yao, Enze Xie, Zhongdao Wang, James Kwok, Ping Luo, Huchuan Lu, and Zhenguo Li. 2024. PixArt-$textbackslashalpha$: Fast Training of Diffusion Transformer for Photorealistic Text-to-Image Synthesis. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=eAKmQPe3m1"},{"key":"e_1_3_2_1_7_1","volume-title":"SATO: Stable Text-to-Motion Framework. arXiv preprint arXiv:2405.01461","author":"Chen Wenshuo","year":"2024","unstructured":"Wenshuo Chen, Hongru Xiao, Erhang Zhang, Lijie Hu, Lei Wang, Mengyuan Liu, and Chen Chen. 2024. SATO: Stable Text-to-Motion Framework. arXiv preprint arXiv:2405.01461 (2024)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548109"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"e_1_3_2_1_10_1","volume-title":"An image is worth one word: Personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618","author":"Gal Rinon","year":"2022","unstructured":"Rinon Gal, Yuval Alaluf, Yuval Atzmon, Or Patashnik, Amit H Bermano, Gal Chechik, and Daniel Cohen-Or. 2022. An image is worth one word: Personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618 (2022)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"e_1_3_2_1_12_1","volume-title":"Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626","author":"Hertz Amir","year":"2022","unstructured":"Amir Hertz, Ron Mokady, Jay Tenenbaum, Kfir Aberman, Yael Pritch, and Daniel Cohen-Or. 2022. Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626 (2022)."},{"key":"e_1_3_2_1_13_1","volume-title":"Style aligned image generation via shared attention. arXiv preprint arXiv:2312.02133","author":"Hertz Amir","year":"2023","unstructured":"Amir Hertz, Andrey Voynov, Shlomi Fruchter, and Daniel Cohen-Or. 2023. Style aligned image generation via shared attention. arXiv preprint arXiv:2312.02133 (2023)."},{"key":"e_1_3_2_1_14_1","unstructured":"Jonathan Ho William Chan Chitwan Saharia Jay Whang Ruiqi Gao Alexey Gritsenko Diederik P Kingma Ben Poole Mohammad Norouzi David J Fleet et al. 2022. Imagen video: High definition video generation with diffusion models. arXiv preprint arXiv:2210.02303 (2022)."},{"key":"e_1_3_2_1_15_1","volume-title":"Denoising diffusion probabilistic models. Advances in neural information processing systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems, Vol. 33 (2020), 6840--6851."},{"key":"e_1_3_2_1_16_1","volume-title":"Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho and Tim Salimans. 2022. Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)."},{"key":"e_1_3_2_1_17_1","first-page":"8633","article-title":"Video diffusion models","volume":"35","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho, Tim Salimans, Alexey Gritsenko, William Chan, Mohammad Norouzi, and David J Fleet. 2022. Video diffusion models. Advances in Neural Information Processing Systems, Vol. 35 (2022), 8633--8646.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_18_1","volume-title":"International conference on machine learning. PMLR, 2790--2799","author":"Houlsby Neil","year":"2019","unstructured":"Neil Houlsby, Andrei Giurgiu, Stanislaw Jastrzebski, Bruna Morrone, Quentin De Laroussilhe, Andrea Gesmundo, Mona Attariyan, and Sylvain Gelly. 2019. Parameter-efficient transfer learning for NLP. In International conference on machine learning. PMLR, 2790--2799."},{"key":"e_1_3_2_1_19_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.167"},{"key":"e_1_3_2_1_21_1","volume-title":"Recognizing image style. arXiv preprint arXiv:1311.3715","author":"Karayev Sergey","year":"2013","unstructured":"Sergey Karayev, Matthew Trentacoste, Helen Han, Aseem Agarwala, Trevor Darrell, Aaron Hertzmann, and Holger Winnemoeller. 2013. Recognizing image style. arXiv preprint arXiv:1311.3715 (2013)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00582"},{"key":"e_1_3_2_1_24_1","volume-title":"Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114","author":"Kingma Diederik P","year":"2013","unstructured":"Diederik P Kingma and Max Welling. 2013. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Nupur Kumari Bingliang Zhang Richard Zhang Eli Shechtman and Jun-Yan Zhu. 2023. Multi-Concept Customization of Text-to-Image Diffusion. (2023).","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2017.2730822"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"e_1_3_2_1_28_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2024. Visual instruction tuning. Advances in neural information processing systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_29_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00984"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01583"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2018.00157"},{"key":"e_1_3_2_1_34_1","volume-title":"Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784","author":"Mirza Mehdi","year":"2014","unstructured":"Mehdi Mirza and Simon Osindero. 2014. Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784 (2014)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"e_1_3_2_1_36_1","volume-title":"Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741","author":"Nichol Alex","year":"2021","unstructured":"Alex Nichol, Prafulla Dhariwal, Aditya Ramesh, Pranav Shyam, Pamela Mishkin, Bob McGrew, Ilya Sutskever, and Mark Chen. 2021. Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741 (2021)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2019.00086"},{"key":"e_1_3_2_1_38_1","volume-title":"Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952","author":"Podell Dustin","year":"2023","unstructured":"Dustin Podell, Zion English, Kyle Lacey, Andreas Blattmann, Tim Dockhorn, Jonas M\u00fcller, Joe Penna, and Robin Rombach. 2023. Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)."},{"key":"e_1_3_2_1_39_1","volume-title":"Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988","author":"Poole Ben","year":"2022","unstructured":"Ben Poole, Ajay Jain, Jonathan T Barron, and Ben Mildenhall. 2022. Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988 (2022)."},{"key":"e_1_3_2_1_40_1","volume-title":"DEADiff: An Efficient Stylization Diffusion Model with Disentangled Representations. arXiv preprint arXiv:2403.06951","author":"Qi Tianhao","year":"2024","unstructured":"Tianhao Qi, Shancheng Fang, Yanze Wu, Hongtao Xie, Jiawei Liu, Lang Chen, Qian He, and Yongdong Zhang. 2024. DEADiff: An Efficient Stylization Diffusion Model with Disentangled Representations. arXiv preprint arXiv:2403.06951 (2024)."},{"key":"e_1_3_2_1_41_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_42_1","volume-title":"Unsupervised representation learning with deep convolutional generative adversarial networks. arXiv preprint arXiv:1511.06434","author":"Radford Alec","year":"2015","unstructured":"Alec Radford, Luke Metz, and Soumith Chintala. 2015. Unsupervised representation learning with deep convolutional generative adversarial networks. arXiv preprint arXiv:1511.06434 (2015)."},{"key":"e_1_3_2_1_43_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125, Vol. 1, 2 (2022), 3."},{"key":"e_1_3_2_1_44_1","volume-title":"International conference on machine learning. Pmlr, 8821--8831","author":"Ramesh Aditya","year":"2021","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-shot text-to-image generation. In International conference on machine learning. Pmlr, 8821--8831."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_46_1","volume-title":"U-net: Convolutional networks for biomedical image segmentation. In Medical image computing and computer-assisted intervention--MICCAI 2015: 18th international conference","author":"Ronneberger Olaf","year":"2015","unstructured":"Olaf Ronneberger, Philipp Fischer, and Thomas Brox. 2015. U-net: Convolutional networks for biomedical image segmentation. In Medical image computing and computer-assisted intervention--MICCAI 2015: 18th international conference, Munich, Germany, October 5--9, 2015, proceedings, part III 18. Springer, 234--241."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"e_1_3_2_1_48_1","volume-title":"Burcu Karagol Ayan, Tim Salimans, et al.","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily L Denton, Kamyar Ghasemipour, Raphael Gontijo Lopes, Burcu Karagol Ayan, Tim Salimans, et al. 2022. Photorealistic text-to-image diffusion models with deep language understanding. Advances in neural information processing systems, Vol. 35 (2022), 36479--36494."},{"key":"e_1_3_2_1_49_1","first-page":"25278","article-title":"Laion-5b: An open large-scale dataset for training next generation image-text models","volume":"35","author":"Schuhmann Christoph","year":"2022","unstructured":"Christoph Schuhmann, Romain Beaumont, Richard Vencu, Cade Gordon, Ross Wightman, Mehdi Cherti, Theo Coombes, Aarush Katta, Clayton Mullis, Mitchell Wortsman, et al. 2022. Laion-5b: An open large-scale dataset for training next generation image-text models. Advances in Neural Information Processing Systems, Vol. 35 (2022), 25278--25294.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_50_1","volume-title":"Laion-400m: Open dataset of clip-filtered 400 million image-text pairs. arXiv preprint arXiv:2111.02114","author":"Schuhmann Christoph","year":"2021","unstructured":"Christoph Schuhmann, Richard Vencu, Romain Beaumont, Robert Kaczmarczyk, Clayton Mullis, Aarush Katta, Theo Coombes, Jenia Jitsev, and Aran Komatsuzaki. 2021. Laion-400m: Open dataset of clip-filtered 400 million image-text pairs. arXiv preprint arXiv:2111.02114 (2021)."},{"key":"e_1_3_2_1_51_1","unstructured":"Animesh Sinha Bo Sun Anmol Kalia Arantxa Casanova Elliot Blanchard David Yan Winnie Zhang Tony Nelli Jiahui Chen Hardik Shah et al. 2023. Text-to-Sticker: Style Tailoring Latent Diffusion Models for Human Expression. arXiv preprint arXiv:2311.10794 (2023)."},{"key":"e_1_3_2_1_52_1","volume-title":"Irina Blok, Huiwen Chang, Jarred Barber, Lu Jiang, Glenn Entis, Yuanzhen Li, et al.","author":"Sohn Kihyuk","year":"2023","unstructured":"Kihyuk Sohn, Nataniel Ruiz, Kimin Lee, Daniel Castro Chin, Irina Blok, Huiwen Chang, Jarred Barber, Lu Jiang, Glenn Entis, Yuanzhen Li, et al. 2023. Styledrop: Text-to-image generation in any style. arXiv preprint arXiv:2306.00983 (2023)."},{"key":"e_1_3_2_1_53_1","volume-title":"Denoising Diffusion Implicit Models. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=St1giarCHLP","author":"Song Jiaming","year":"2021","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2021. Denoising Diffusion Implicit Models. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=St1giarCHLP"},{"key":"e_1_3_2_1_54_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_55_1","volume-title":"InstantStyle: Free Lunch towards Style-Preserving in Text-to-Image Generation. arXiv preprint arXiv:2404.02733","author":"Wang Haofan","year":"2024","unstructured":"Haofan Wang, Qixun Wang, Xu Bai, Zekui Qin, and Anthony Chen. 2024. InstantStyle: Free Lunch towards Style-Preserving in Text-to-Image Generation. arXiv preprint arXiv:2404.02733 (2024)."},{"key":"e_1_3_2_1_56_1","volume-title":"Styleadapter: A single-pass lora-free model for stylized image generation. arXiv preprint arXiv:2309.01770","author":"Wang Zhouxia","year":"2023","unstructured":"Zhouxia Wang, Xintao Wang, Liangbin Xie, Zhongang Qi, Ying Shan, Wenping Wang, and Ping Luo. 2023. Styleadapter: A single-pass lora-free model for stylized image generation. arXiv preprint arXiv:2309.01770 (2023)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19787-1_41"},{"key":"e_1_3_2_1_58_1","volume-title":"IconShop: Text-Based Vector Icon Synthesis with Autoregressive Transformers. arXiv preprint arXiv:2304.14400","author":"Wu Ronghuan","year":"2023","unstructured":"Ronghuan Wu, Wanchao Su, Kede Ma, and Jing Liao. 2023. IconShop: Text-Based Vector Icon Synthesis with Autoregressive Transformers. arXiv preprint arXiv:2304.14400 (2023)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01763"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.3390\/app11177890"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00978"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681057","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681057","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:52Z","timestamp":1750294672000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681057"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":62,"alternative-id":["10.1145\/3664647.3681057","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681057","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}