{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T01:10:25Z","timestamp":1755825025224,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1145\/3731715.3734580","type":"proceedings-article","created":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T18:31:04Z","timestamp":1750876264000},"page":"2140-2142","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Visual Content Generation in the Era of Large Foundation Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-6555-3834","authenticated-orcid":false,"given":"Leigang","family":"Qu","sequence":"first","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8073-9168","authenticated-orcid":false,"given":"Fei","family":"Shen","sequence":"additional","affiliation":[{"name":"Nanjing University of Science and Technology, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5886-0906","authenticated-orcid":false,"given":"Zhenglin","family":"Zhou","sequence":"additional","affiliation":[{"name":"Zhejiang University, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8517-3346","authenticated-orcid":false,"given":"Jiayi","family":"Lyu","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5199-1428","authenticated-orcid":false,"given":"Wenjie","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0286-8439","authenticated-orcid":false,"given":"Lu","family":"Jiang","sequence":"additional","affiliation":[{"name":"ByteDance, San Jose, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,6,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 5968--5976","author":"Bhunia Ankan Kumar","year":"2023","unstructured":"Ankan Kumar Bhunia, Salman Khan, Hisham Cholakkal, Rao Muhammad Anwer, Jorma Laaksonen, Mubarak Shah, and Fahad Shahbaz Khan. 2023. Person image synthesis via denoising diffusion model. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 5968--5976."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00834"},{"key":"e_1_3_2_1_3_1","unstructured":"Hejia Chen Haoxian Zhang Shoulong Zhang Xiaoqiang Liu Sisi Zhuang Yuan Zhang Pengfei Wan Di Zhang and Shuai Li. 2025 b. Cafe-Talk: Generating 3D Talking Face Animation with Multimodal Coarse- and Fine-grained Control. arxiv: 2503.14517 [cs.CV] https:\/\/arxiv.org\/abs\/2503.14517"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680691"},{"key":"e_1_3_2_1_5_1","volume-title":"2025 a. Janus-pro: Unified multimodal understanding and generation with data and model scaling. arXiv preprint arXiv:2501.17811","author":"Chen Xiaokang","year":"2025","unstructured":"Xiaokang Chen, Zhiyu Wu, Xingchao Liu, Zizheng Pan, Wen Liu, Zhenda Xie, Xingkai Yu, and Chong Ruan. 2025 a. Janus-pro: Unified multimodal understanding and generation with data and model scaling. arXiv preprint arXiv:2501.17811 (2025)."},{"key":"e_1_3_2_1_6_1","volume-title":"Luciddreamer: Domain-free generation of 3d gaussian splatting scenes. In CVPR.","author":"Chung Jaeyoung","year":"2024","unstructured":"Jaeyoung Chung, Suyoung Lee, Hyeongjin Nam, Jaerin Lee, and Kyoung Mu Lee. 2024. Luciddreamer: Domain-free generation of 3d gaussian splatting scenes. In CVPR."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2410.07718"},{"key":"e_1_3_2_1_8_1","volume-title":"Samir Yitzhak Gadre, et al","author":"Deitke Matt","year":"2024","unstructured":"Matt Deitke, Ruoshi Liu, Matthew Wallingford, Huong Ngo, Oscar Michel, Aditya Kusupati, Alan Fan, Christian Laforte, Vikram Voleti, Samir Yitzhak Gadre, et al. 2024. Objaverse-xl: A universe of 10m 3d objects. In NeurIPS, Vol. 36."},{"key":"e_1_3_2_1_9_1","volume-title":"Objaverse: A universe of annotated 3d objects. In CVPR. 13142--13153.","author":"Deitke Matt","year":"2023","unstructured":"Matt Deitke, Dustin Schwenk, Jordi Salvador, Luca Weihs, Oscar Michel, Eli VanderBilt, Ludwig Schmidt, Kiana Ehsani, Aniruddha Kembhavi, and Ali Farhadi. 2023. Objaverse: A universe of annotated 3d objects. In CVPR. 13142--13153."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02069"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME57554.2024.10687416"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3-031--25072--9_23"},{"key":"e_1_3_2_1_13_1","unstructured":"Jianzhu Guo Xiangyu Zhu and Zhen Lei. 2018. 3DDFA. https:\/\/github.com\/cleardusk\/3DDFA."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58529-7_10"},{"key":"e_1_3_2_1_15_1","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. In NeurIPS."},{"key":"e_1_3_2_1_16_1","volume-title":"Lrm: Large reconstruction model for single image to 3d. arXiv preprint arXiv:2311.04400","author":"Hong Yicong","year":"2023","unstructured":"Yicong Hong, Kai Zhang, Jiuxiang Gu, Sai Bi, Yang Zhou, Difan Liu, Feng Liu, Kalyan Sunkavalli, Trung Bui, and Hao Tan. 2023. Lrm: Large reconstruction model for single image to 3d. arXiv preprint arXiv:2311.04400 (2023)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Ajay Jain Ben Mildenhall Jonathan T. Barron Pieter Abbeel and Ben Poole. 2022. Zero-Shot Text-Guided Object Generation with Dream Fields. In CVPR.","DOI":"10.1109\/CVPR52688.2022.00094"},{"key":"e_1_3_2_1_18_1","volume-title":"Noise-free score distillation. arXiv preprint arXiv:2310.17590","author":"Katzir Oren","year":"2023","unstructured":"Oren Katzir, Or Patashnik, Daniel Cohen-Or, and Dani Lischinski. 2023. Noise-free score distillation. arXiv preprint arXiv:2310.17590 (2023)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592433"},{"key":"e_1_3_2_1_20_1","volume-title":"AnyDressing: Customizable Multi-Garment Virtual Dressing via Latent Diffusion Models. arXiv preprint arXiv:2412.04146","author":"Li Xinghui","year":"2024","unstructured":"Xinghui Li, Qichao Sun, Pengze Zhang, Fulong Ye, Zhichao Liao, Wanquan Feng, Songtao Zhao, and Qian He. 2024. AnyDressing: Customizable Multi-Garment Virtual Dressing via Latent Diffusion Models. arXiv preprint arXiv:2412.04146 (2024)."},{"key":"e_1_3_2_1_21_1","volume-title":"NeurIPS","volume":"36","author":"Liu Minghua","year":"2024","unstructured":"Minghua Liu, Chao Xu, Haian Jin, Linghao Chen, Mukund Varma T, Zexiang Xu, and Hao Su. 2024. One-2--3--45: Any single image to 3d mesh in 45 seconds without per-shape optimization. In NeurIPS, Vol. 36."},{"key":"e_1_3_2_1_22_1","volume-title":"Pavel Tokmakov, Sergey Zakharov, and Carl Vondrick.","author":"Liu Ruoshi","year":"2023","unstructured":"Ruoshi Liu, Rundi Wu, Basile Van Hoorick, Pavel Tokmakov, Sergey Zakharov, and Carl Vondrick. 2023b. Zero-1-to-3: Zero-shot one image to 3d object. In ICCV. 9298--9309."},{"key":"e_1_3_2_1_23_1","volume-title":"Syncdreamer: Generating multiview-consistent images from a single-view image. arXiv preprint arXiv:2309.03453","author":"Liu Yuan","year":"2023","unstructured":"Yuan Liu, Cheng Lin, Zijiao Zeng, Xiaoxiao Long, Lingjie Liu, Taku Komura, and Wenping Wang. 2023a. Syncdreamer: Generating multiview-consistent images from a single-view image. arXiv preprint arXiv:2309.03453 (2023)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Xiaoxiao Long Yuan-Chen Guo Cheng Lin Yuan Liu Zhiyang Dou Lingjie Liu Yuexin Ma Song-Hai Zhang Marc Habermann Christian Theobalt et al. 2024. Wonder3d: Single image to 3d using cross-domain diffusion. In CVPR. 9970--9980.","DOI":"10.1109\/CVPR52733.2024.00951"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME57554.2024.10687525"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19836-6_5"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Ben Mildenhall Pratul P. Srinivasan Matthew Tancik Jonathan T. Barron Ravi Ramamoorthi and Ren Ng. 2020. NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis. In ECCV.","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00290"},{"key":"e_1_3_2_1_29_1","volume-title":"Deepsdf: Learning continuous signed distance functions for shape representation. In CVPR. 165--174.","author":"Park Jeong Joon","year":"2019","unstructured":"Jeong Joon Park, Peter Florence, Julian Straub, Richard Newcombe, and Steven Lovegrove. 2019. Deepsdf: Learning continuous signed distance functions for shape representation. In CVPR. 165--174."},{"key":"e_1_3_2_1_30_1","volume-title":"Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988","author":"Poole Ben","year":"2022","unstructured":"Ben Poole, Ajay Jain, Jonathan T Barron, and Ben Mildenhall. 2022. Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988 (2022)."},{"key":"e_1_3_2_1_31_1","volume-title":"TIGeR: Unifying Text-to-Image Generation and Retrieval with Large Multimodal Models. In The Thirteenth International Conference on Learning Representations.","author":"Qu Leigang","year":"2024","unstructured":"Leigang Qu, Haochuan Li, Tan Wang, Wenjie Wang, Yongqi Li, Liqiang Nie, and Tat-Seng Chua. 2024b. TIGeR: Unifying Text-to-Image Generation and Retrieval with Large Multimodal Models. In The Thirteenth International Conference on Learning Representations."},{"key":"e_1_3_2_1_32_1","volume-title":"SILMM: Self-Improving Large Multimodal Models for Compositional Text-to-Image Generation. arXiv preprint arXiv:2412.05818","author":"Qu Leigang","year":"2024","unstructured":"Leigang Qu, Haochuan Li, Wenjie Wang, Xiang Liu, Juncheng Li, Liqiang Nie, and Tat-Seng Chua. 2024a. SILMM: Self-Improving Large Multimodal Models for Compositional Text-to-Image Generation. arXiv preprint arXiv:2412.05818 (2024)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00710"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612012"},{"key":"e_1_3_2_1_35_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i7.32729"},{"key":"e_1_3_2_1_38_1","volume-title":"Imagpose: A unified conditional framework for pose-guided person generation. Advances in neural information processing systems","author":"Shen Fei","year":"2024","unstructured":"Fei Shen and Jinhui Tang. 2024. Imagpose: A unified conditional framework for pose-guided person generation. Advances in neural information processing systems, Vol. 37 (2024), 6246--6266."},{"key":"e_1_3_2_1_39_1","volume-title":"2025 b. Long-Term TalkingFace Generation via Motion-Prior Conditional Diffusion Model. arXiv preprint arXiv:2502.09533","author":"Shen Fei","year":"2025","unstructured":"Fei Shen, Cong Wang, Junyao Gao, Qin Guo, Jisheng Dang, Jinhui Tang, and Tat-Seng Chua. 2025 b. Long-Term TalkingFace Generation via Motion-Prior Conditional Diffusion Model. arXiv preprint arXiv:2502.09533 (2025)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i7.32728"},{"key":"e_1_3_2_1_41_1","volume-title":"Advancing pose-guided image synthesis with progressive conditional diffusion models. arXiv preprint arXiv:2310.06313","author":"Shen Fei","year":"2023","unstructured":"Fei Shen, Hu Ye, Jun Zhang, Cong Wang, Xiao Han, and Wei Yang. 2023. Advancing pose-guided image synthesis with progressive conditional diffusion models. arXiv preprint arXiv:2310.06313 (2023)."},{"key":"e_1_3_2_1_42_1","volume-title":"2025 d. IMAGGarment-1: Fine-Grained Garment Generation for Controllable Fashion Design. arXiv preprint arXiv:2504.13176","author":"Shen Fei","year":"2025","unstructured":"Fei Shen, Jian Yu, Cong Wang, Xin Jiang, Xiaoyu Du, and Jinhui Tang. 2025 d. IMAGGarment-1: Fine-Grained Garment Generation for Controllable Fashion Design. arXiv preprint arXiv:2504.13176 (2025)."},{"key":"e_1_3_2_1_43_1","volume-title":"Mvdream: Multi-view diffusion for 3d generation. arXiv preprint arXiv:2308.16512","author":"Shi Yichun","year":"2023","unstructured":"Yichun Shi, Peng Wang, Jianglong Ye, Mai Long, Kejie Li, and Xiao Yang. 2023. Mvdream: Multi-view diffusion for 3d generation. arXiv preprint arXiv:2308.16512 (2023)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28313"},{"key":"e_1_3_2_1_45_1","volume-title":"LGM: Large Multi-View Gaussian Model for High-Resolution 3D Content Creation. arXiv preprint arXiv:2402.05054","author":"Tang Jiaxiang","year":"2024","unstructured":"Jiaxiang Tang, Zhaoxi Chen, Xiaokang Chen, Tengfei Wang, Gang Zeng, and Ziwei Liu. 2024. LGM: Large Multi-View Gaussian Model for High-Resolution 3D Content Creation. arXiv preprint arXiv:2402.05054 (2024)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2503.14295"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"Haochen Wang Xiaodan Du Jiahao Li Raymond A. Yeh and Greg Shakhnarovich. 2022. Score Jacobian Chaining: Lifting Pretrained 2D Diffusion Models for 3D Generation. In CVPR.","DOI":"10.1109\/CVPR52729.2023.01214"},{"key":"e_1_3_2_1_48_1","volume-title":"ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation. arXiv preprint arXiv:2305.16213","author":"Wang Zhengyi","year":"2023","unstructured":"Zhengyi Wang, Cheng Lu, Yikai Wang, Fan Bao, Chongxuan Li, Hang Su, and Jun Zhu. 2023. ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation. arXiv preprint arXiv:2305.16213 (2023)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00165"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00165"},{"key":"e_1_3_2_1_51_1","unstructured":"Huawei Wei Zejun Yang and Zhisheng Wang. 2024. AniPortrait: Audio-Driven Synthesis of Photorealistic Portrait Animations. arxiv: 2403.17694 [cs.CV]"},{"key":"e_1_3_2_1_52_1","volume-title":"Janus: Decoupling visual encoding for unified multimodal understanding and generation. arXiv preprint arXiv:2410.13848","author":"Wu Chengyue","year":"2024","unstructured":"Chengyue Wu, Xiaokang Chen, Zhiyu Wu, Yiyang Ma, Xingchao Liu, Zizheng Pan, Wen Liu, Zhenda Xie, Xingkai Yu, Chong Ruan, et al. 2024a. Janus: Decoupling visual encoding for unified multimodal understanding and generation. arXiv preprint arXiv:2410.13848 (2024)."},{"key":"e_1_3_2_1_53_1","unstructured":"Zike Wu Pan Zhou Xuanyu Yi Xiaoding Yuan and Hanwang Zhang. 2024b. Consistent3d: Towards consistent high-fidelity text-to-3d generation with deterministic sampling prior. In CVPR."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"crossref","unstructured":"Jianfeng Xiang Zelong Lv Sicheng Xu Yu Deng Ruicheng Wang Bowen Zhang Dong Chen Xin Tong and Jiaolong Yang. 2025. Structured 3d latents for scalable and versatile 3d generation. In CVPR.","DOI":"10.1109\/CVPR52734.2025.02000"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2406.08801"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592442"},{"key":"e_1_3_2_1_57_1","volume-title":"AnyLogo: Symbiotic Subject-Driven Diffusion System with Gemini Status. arXiv preprint arXiv:2409.17740","author":"Zhang Jinghao","year":"2024","unstructured":"Jinghao Zhang, Wen Qian, Hao Luo, Fan Wang, and Feng Zhao. 2024a. AnyLogo: Symbiotic Subject-Driven Diffusion System with Gemini Status. arXiv preprint arXiv:2409.17740 (2024)."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3658146"},{"key":"e_1_3_2_1_59_1","volume-title":"Posediffusion: a Pose-Guided Human Image Generator Via Cascaded Diffusion Models. Available at SSRN 4552596","author":"Zhang Songchuan","year":"2023","unstructured":"Songchuan Zhang and Yiwei Zhou. 2023. Posediffusion: a Pose-Guided Human Image Generator Via Cascaded Diffusion Models. Available at SSRN 4552596 (2023)."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00829"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00836"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02116"},{"key":"e_1_3_2_1_63_1","volume-title":"Michelangelo: Conditional 3D Shape Generation based on Shape-Image-Text Aligned Latent Representation. In NeurIPS.","author":"Zhao Zibo","year":"2023","unstructured":"Zibo Zhao, Wen Liu, Xin Chen, Xianfang Zeng, Rui Wang, Pei Cheng, BIN FU, Tao Chen, Gang YU, and Shenghua Gao. 2023. Michelangelo: Conditional 3D Shape Generation based on Shape-Image-Text Aligned Latent Representation. In NeurIPS."},{"key":"e_1_3_2_1_64_1","first-page":"110315","article-title":"Storydiffusion: Consistent self-attention for long-range image and video generation","volume":"37","author":"Zhou Yupeng","year":"2024","unstructured":"Yupeng Zhou, Daquan Zhou, Ming-Ming Cheng, Jiashi Feng, and Qibin Hou. 2024. Storydiffusion: Consistent self-attention for long-range image and video generation. Advances in Neural Information Processing Systems, Vol. 37 (2024), 110315--110340.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_65_1","unstructured":"Zhenglin Zhou Xiaobo Xia Fan Ma Hehe Fan Yi Yang and Tat-Seng Chua. 2025. DreamDPO: Aligning Text-to-3D Generation with Human Preferences via Direct Preference Optimization."},{"key":"e_1_3_2_1_66_1","volume-title":"LogoSticker: Inserting Logos Into Diffusion Models for Customized Generation. In European Conference on Computer Vision. Springer, 363--378","author":"Zhu Mingkang","year":"2024","unstructured":"Mingkang Zhu, Xi Chen, Zhongdao Wang, Hengshuang Zhao, and Jiaya Jia. 2024. LogoSticker: Inserting Logos Into Diffusion Models for Customized Generation. In European Conference on Computer Vision. Springer, 363--378."}],"event":{"name":"ICMR '25: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Chicago IL USA","acronym":"ICMR '25"},"container-title":["Proceedings of the 2025 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731715.3734580","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T04:03:53Z","timestamp":1755749033000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731715.3734580"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":66,"alternative-id":["10.1145\/3731715.3734580","10.1145\/3731715"],"URL":"https:\/\/doi.org\/10.1145\/3731715.3734580","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]},"assertion":[{"value":"2025-06-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}