{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:17:26Z","timestamp":1778080646789,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":64,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3758287","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:26:55Z","timestamp":1761377215000},"page":"13289-13296","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["SVGenius: Benchmarking LLMs in SVG Understanding, Editing and Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-1511-0405","authenticated-orcid":false,"given":"Siqi","family":"Chen","sequence":"first","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4023-056X","authenticated-orcid":false,"given":"Xinyu","family":"Dong","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5397-8422","authenticated-orcid":false,"given":"Haolei","family":"Xu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6486-0041","authenticated-orcid":false,"given":"Xingyu","family":"Wu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0019-1034","authenticated-orcid":false,"given":"Fei","family":"Tang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5991-0651","authenticated-orcid":false,"given":"Hang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7903-8102","authenticated-orcid":false,"given":"Yuchen","family":"Yan","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2168-3045","authenticated-orcid":false,"given":"Linjuan","family":"Wu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8312-0184","authenticated-orcid":false,"given":"Wenqi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9163-0633","authenticated-orcid":false,"given":"Guiyang","family":"Hou","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0975-3554","authenticated-orcid":false,"given":"Yongliang","family":"Shen","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0200-9215","authenticated-orcid":false,"given":"Weiming","family":"Lu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9017-2508","authenticated-orcid":false,"given":"Yueting","family":"Zhuang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Anthropic. 2023. Claude 2. https:\/\/www.anthropic.com\/index\/claude-2. Accessed: 2025-05-29."},{"key":"e_1_3_2_1_2_1","unstructured":"Shuai Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Sibo Song Kai Dang Peng Wang Shijie Wang Jun Tang et al. 2025. Qwen2.5-vl technical report. arXiv preprint arXiv:2502.13923 (2025)."},{"key":"e_1_3_2_1_3_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems Vol. 33 (2020) 1877-1901."},{"key":"e_1_3_2_1_4_1","volume-title":"Leveraging large language models for scalable vector graphics-driven image understanding. arXiv preprint arXiv:2306.06094","author":"Cai Mu","year":"2023","unstructured":"Mu Cai, Zeyi Huang, Yuheng Li, Utkarsh Ojha, Haohan Wang, and Yong Jae Lee. 2023. Leveraging large language models for scalable vector graphics-driven image understanding. arXiv preprint arXiv:2306.06094 (2023)."},{"key":"e_1_3_2_1_5_1","first-page":"16351","article-title":"Deepsvg: A hierarchical generative network for vector graphics animation","volume":"33","author":"Carlier Alexandre","year":"2020","unstructured":"Alexandre Carlier, Martin Danelljan, Alexandre Alahi, and Radu Timofte. 2020. Deepsvg: A hierarchical generative network for vector graphics animation. Advances in Neural Information Processing Systems, Vol. 33 (2020), 16351-16361.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_6_1","volume-title":"Jared Kaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, et al.","author":"Chen Mark","year":"2021","unstructured":"Mark Chen, Jerry Tworek, Heewoo Jun, Qiming Yuan, Henrique Ponde De Oliveira Pinto, Jared Kaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, et al., 2021. Evaluating large language models trained on code. arXiv preprint arXiv:2107.03374 (2021)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i3.32236"},{"key":"e_1_3_2_1_8_1","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery Aakanksha","year":"2023","unstructured":"Aakanksha Chowdhery, Sharan Narang, Jacob Devlin, Maarten Bosma, Gaurav Mishra, Adam Roberts, Paul Barham, Hyung Won Chung, Charles Sutton, Sebastian Gehrmann, et al., 2023. Palm: Scaling language modeling with pathways. Journal of Machine Learning Research, Vol. 24, 240 (2023), 1-113.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_9_1","volume-title":"Figr: Few-shot image generation with reptile. arXiv preprint arXiv:1901.02199","author":"Clou\u00e2tre Louis","year":"2019","unstructured":"Louis Clou\u00e2tre and Marc Demers. 2019. Figr: Few-shot image generation with reptile. arXiv preprint arXiv:1901.02199 (2019)."},{"key":"e_1_3_2_1_10_1","volume-title":"SVGO: Node.js tool for optimizing SVG files. https:\/\/github.com\/svg\/svgo Accessed: 2024-12-XX.","author":"Contributors SVG","year":"2024","unstructured":"SVG Contributors. 2024. SVGO: Node.js tool for optimizing SVG files. https:\/\/github.com\/svg\/svgo Accessed: 2024-12-XX."},{"key":"e_1_3_2_1_11_1","unstructured":"Aaron Grattafiori Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Alex Vaughan et al. 2024. The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"e_1_3_2_1_12_1","unstructured":"Daya Guo Dejian Yang Haowei Zhang Junxiao Song Ruoyu Zhang Runxin Xu Qihao Zhu Shirong Ma Peiyi Wang Xiao Bi et al. 2025. Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning. arXiv preprint arXiv:2501.12948 (2025)."},{"key":"e_1_3_2_1_13_1","volume-title":"A neural representation of sketch drawings. arXiv preprint arXiv:1704.03477","author":"Ha David","year":"2017","unstructured":"David Ha and Douglas Eck. 2017. A neural representation of sketch drawings. arXiv preprint arXiv:1704.03477 (2017)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02351"},{"key":"e_1_3_2_1_15_1","unstructured":"Binyuan Hui Jian Yang Zeyu Cui Jiaxi Yang Dayiheng Liu Lei Zhang Tianyu Liu Jiajun Zhang Bowen Yu Keming Lu et al. 2024. Qwen2. 5-coder technical report. arXiv preprint arXiv:2409.12186 (2024)."},{"key":"e_1_3_2_1_16_1","unstructured":"Aaron Hurst Adam Lerer Adam P Goucher Adam Perelman Aditya Ramesh Aidan Clark AJ Ostrow Akila Welihinda Alan Hayes Alec Radford et al. 2024. Gpt-4o system card. arXiv preprint arXiv:2410.21276 (2024)."},{"key":"e_1_3_2_1_17_1","unstructured":"IconFont. 2024. IconFont - Alibaba Vector Icon Library. https:\/\/www.iconfont.cn\/. Accessed: 2025-05-29."},{"key":"e_1_3_2_1_18_1","first-page":"838","article-title":"Layerwise Image Vectorization via Bayesain-Optimized Contour","volume":"831","author":"Jabour Ghfran","year":"2025","unstructured":"Ghfran Jabour, Sergey Muravyov, and Valeria Efimova. 2025. Layerwise Image Vectorization via Bayesain-Optimized Contour. Proceedings Copyright, Vol. 831 (2025), 838.","journal-title":"Proceedings Copyright"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00190"},{"key":"e_1_3_2_1_20_1","volume-title":"Swe-bench: Can language models resolve real-world github issues? arXiv preprint arXiv:2310.06770","author":"Jimenez Carlos E","year":"2023","unstructured":"Carlos E Jimenez, John Yang, Alexander Wettig, Shunyu Yao, Kexin Pei, Ofir Press, and Karthik Narasimhan. 2023. Swe-bench: Can language models resolve real-world github issues? arXiv preprint arXiv:2310.06770 (2023)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2010.88"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2015.2454513"},{"key":"e_1_3_2_1_23_1","volume-title":"International conference on machine learning. PMLR, 12888-12900","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In International conference on machine learning. PMLR, 12888-12900."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3414685.3417763"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3135932.3135941"},{"key":"e_1_3_2_1_26_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023. Visual instruction tuning. Advances in neural information processing systems, Vol. 36 (2023), 34892-34916."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00802"},{"key":"e_1_3_2_1_28_1","volume-title":"Codexglue: A machine learning benchmark dataset for code understanding and generation. arXiv preprint arXiv:2102.04664","author":"Lu Shuai","year":"2021","unstructured":"Shuai Lu, Daya Guo, Shuo Ren, Junjie Huang, Alexey Svyatkovskiy, Ambrosio Blanco, Colin Clement, Dawn Drain, Daxin Jiang, Duyu Tang, et al., 2021. Codexglue: A machine learning benchmark dataset for code understanding and generation. arXiv preprint arXiv:2102.04664 (2021)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01583"},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 8142-8147","author":"Nishina Kunato","year":"2024","unstructured":"Kunato Nishina and Yusuke Matsui. 2024. SVGEditBench: A Benchmark Dataset for Quantitative Assessment of LLM's SVG Editing Capabilities. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 8142-8147."},{"key":"e_1_3_2_1_31_1","volume-title":"SVGEditBench V2: A Benchmark for Instruction-based SVG Editing. arXiv preprint arXiv:2502.19453","author":"Nishina Kunato","year":"2025","unstructured":"Kunato Nishina and Yusuke Matsui. 2025. SVGEditBench V2: A Benchmark for Instruction-based SVG Editing. arXiv preprint arXiv:2502.19453 (2025)."},{"key":"e_1_3_2_1_32_1","unstructured":"Maxime Oquab Timoth\u00e9e Darcet Th\u00e9o Moutakanni Huy Vo Marc Szafraniec Vasil Khalidov Pierre Fernandez Daniel Haziza Francisco Massa Alaaeldin El-Nouby et al. 2023. Dinov2: Learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)."},{"key":"e_1_3_2_1_33_1","volume-title":"Can Large Language Models Understand Symbolic Graphics Programs? arXiv preprint arXiv:2408.08313","author":"Qiu Zeju","year":"2024","unstructured":"Zeju Qiu, Weiyang Liu, Haiwen Feng, Zhen Liu, Tim Z Xiao, Katherine M Collins, Joshua B Tenenbaum, Adrian Weller, Michael J Black, and Bernhard Sch\u00f6lkopf. 2024. Can Large Language Models Understand Symbolic Graphics Programs? arXiv preprint arXiv:2408.08313 (2024)."},{"key":"e_1_3_2_1_34_1","volume-title":"arXiv:2412.15115 [cs.CL] https:\/\/arxiv.org\/abs\/2412.15115","author":"Yang An","year":"2025","unstructured":"Qwen, :, An Yang, Baosong Yang, Beichen Zhang, Binyuan Hui, Bo Zheng, Bowen Yu, Chengyuan Li, Dayiheng Liu, Fei Huang, Haoran Wei, Huan Lin, Jian Yang, Jianhong Tu, Jianwei Zhang, Jianxin Yang, Jiaxi Yang, Jingren Zhou, Junyang Lin, Kai Dang, Keming Lu, Keqin Bao, Kexin Yang, Le Yu, Mei Li, Mingfeng Xue, Pei Zhang, Qin Zhu, Rui Men, Runji Lin, Tianhao Li, Tianyi Tang, Tingyu Xia, Xingzhang Ren, Xuancheng Ren, Yang Fan, Yang Su, Yichang Zhang, Yu Wan, Yuqiong Liu, Zeyu Cui, Zhenru Zhang, and Zihan Qiu. 2025. Qwen2.5 Technical Report. (2025). arXiv:2412.15115 [cs.CL] https:\/\/arxiv.org\/abs\/2412.15115"},{"key":"e_1_3_2_1_35_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 7342-7351","author":"Reddy Pradyumna","year":"2021","unstructured":"Pradyumna Reddy, Michael Gharbi, Michal Lukac, and Niloy J Mitra. 2021. Im2vec: Synthesizing vector graphics without vector supervision. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 7342-7351."},{"key":"e_1_3_2_1_37_1","volume-title":"Starvector: Generating scalable vector graphics code from images. arXiv preprint arXiv:2312.11556","author":"Rodriguez Juan A","year":"2023","unstructured":"Juan A Rodriguez, Shubham Agarwal, Issam H Laradji, Pau Rodriguez, David Vazquez, Christopher Pal, and Marco Pedersoli. 2023. Starvector: Generating scalable vector graphics code from images. arXiv preprint arXiv:2312.11556 (2023)."},{"key":"e_1_3_2_1_38_1","unstructured":"Christoph Schuhmann. 2022. Improved Aesthetic Predictor. https:\/\/github.com\/christophschuhmann\/improved-aesthetic-predictor. Accessed: 2025-05-29."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2021.3084944"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25326"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3309786"},{"key":"e_1_3_2_1_42_1","volume-title":"Strokenuwa: Tokenizing strokes for vector graphic synthesis. arXiv preprint arXiv:2401.17093","author":"Tang Zecheng","year":"2024","unstructured":"Zecheng Tang, Chenfei Wu, Zekai Zhang, Mingheng Ni, Shengming Yin, Yu Liu, Zhengyuan Yang, Lijuan Wang, Zicheng Liu, Juntao Li, et al., 2024. Strokenuwa: Tokenizing strokes for vector graphic synthesis. arXiv preprint arXiv:2401.17093 (2024)."},{"key":"e_1_3_2_1_43_1","volume-title":"Ryan Burnell, Libin Bai, Anmol Gulati, Garrett Tanzer, Damien Vincent, Zhufeng Pan, Shibo Wang, et al.","author":"Team Gemini","year":"2024","unstructured":"Gemini Team, Petko Georgiev, Ving Ian Lei, Ryan Burnell, Libin Bai, Anmol Gulati, Garrett Tanzer, Damien Vincent, Zhufeng Pan, Shibo Wang, et al., 2024. Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context. arXiv preprint arXiv:2403.05530 (2024)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00439"},{"key":"e_1_3_2_1_45_1","volume-title":"A note on the evaluation of generative models. arXiv preprint arXiv:1511.01844","author":"Theis Lucas","year":"2015","unstructured":"Lucas Theis, A\u00e4ron van den Oord, and Matthias Bethge. 2015. A note on the evaluation of generative models. arXiv preprint arXiv:1511.01844 (2015)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-03789-4_18"},{"key":"e_1_3_2_1_47_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_48_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528223.3530068"},{"key":"e_1_3_2_1_50_1","first-page":"61501","article-title":"Visionllm: Large language model is also an open-ended decoder for vision-centric tasks","volume":"36","author":"Wang Wenhai","year":"2023","unstructured":"Wenhai Wang, Zhe Chen, Xiaokang Chen, Jiannan Wu, Xizhou Zhu, Gang Zeng, Ping Luo, Tong Lu, Jie Zhou, Yu Qiao, et al., 2023. Visionllm: Large language model is also an open-ended decoder for vision-centric tasks. Advances in Neural Information Processing Systems, Vol. 36 (2023), 61501-61513.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_51_1","volume-title":"Image quality assessment: from error visibility to structural similarity","author":"Wang Zhou","year":"2004","unstructured":"Zhou Wang, Alan C Bovik, Hamid R Sheikh, and Eero P Simoncelli. 2004. Image quality assessment: from error visibility to structural similarity. IEEE transactions on image processing, Vol. 13, 4 (2004), 600-612."},{"key":"e_1_3_2_1_52_1","volume-title":"Chat2SVG: Vector Graphics Generation with Large Language Models and Image Diffusion Models. arXiv preprint arXiv:2411.16602","author":"Wu Ronghuan","year":"2024","unstructured":"Ronghuan Wu, Wanchao Su, and Jing Liao. 2024. Chat2SVG: Vector Graphics Generation with Large Language Models and Image Diffusion Models. arXiv preprint arXiv:2411.16602 (2024)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3618364"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00200"},{"key":"e_1_3_2_1_55_1","volume-title":"Empowering LLMs to Understand and Generate Complex Vector Graphics. arXiv preprint arXiv:2412.11102","author":"Xing Ximing","year":"2024","unstructured":"Ximing Xing, Juncheng Hu, Guotao Liang, Jing Zhang, Dong Xu, and Qian Yu. 2024a. Empowering LLMs to Understand and Generate Complex Vector Graphics. arXiv preprint arXiv:2412.11102 (2024)."},{"key":"e_1_3_2_1_56_1","first-page":"15869","article-title":"Diffsketcher: Text guided vector sketch synthesis through latent diffusion models","volume":"36","author":"Xing Ximing","year":"2023","unstructured":"Ximing Xing, Chuang Wang, Haitao Zhou, Jing Zhang, Qian Yu, and Dong Xu. 2023. Diffsketcher: Text guided vector sketch synthesis through latent diffusion models. Advances in Neural Information Processing Systems, Vol. 36 (2023), 15869-15889.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00435"},{"key":"e_1_3_2_1_58_1","first-page":"126","article-title":"Exploring the capability of llms in performing low-level visual analytic tasks on svg data visualizations. In 2024 IEEE Visualization and Visual Analytics (VIS)","author":"Xu Zhongzheng","year":"2024","unstructured":"Zhongzheng Xu and Emily Wall. 2024. Exploring the capability of llms in performing low-level visual analytic tasks on svg data visualizations. In 2024 IEEE Visualization and Visual Analytics (VIS). IEEE, 126-130.","journal-title":"IEEE"},{"key":"e_1_3_2_1_59_1","unstructured":"An Yang Anfeng Li Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chang Gao Chengen Huang Chenxu Lv et al. 2025b. Qwen3 technical report. arXiv preprint arXiv:2505.09388 (2025)."},{"key":"e_1_3_2_1_60_1","volume-title":"OmniSVG: A Unified Scalable Vector Graphics Generation Model. arXiv preprint arXiv:2504.06263","author":"Yang Yiying","year":"2025","unstructured":"Yiying Yang, Wei Cheng, Sijin Chen, Xianfang Zeng, Jiaxu Zhang, Liao Wang, Gang Yu, Xingjun Ma, and Yu-Gang Jiang. 2025a. OmniSVG: A Unified Scalable Vector Graphics Generation Model. arXiv preprint arXiv:2504.06263 (2025)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3658204"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_1_63_1","unstructured":"Jinguo Zhu Weiyun Wang Zhe Chen Zhaoyang Liu Shenglong Ye Lixin Gu Yuchen Duan Hao Tian Weijie Su Jie Shao et al. 2025. Internvl3: Exploring advanced training and test-time recipes for open-source multimodal models. arXiv preprint arXiv:2504.10479 (2025)."},{"key":"e_1_3_2_1_64_1","volume-title":"Vgbench: Evaluating large language models on vector graphics understanding and generation. arXiv preprint arXiv:2407.10972.","author":"Zou Bocheng","year":"2024","unstructured":"Bocheng Zou, Mu Cai, Jianrui Zhang, and Yong Jae Lee. 2024. Vgbench: Evaluating large language models on vector graphics understanding and generation. arXiv preprint arXiv:2407.10972."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3758287","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T05:09:03Z","timestamp":1765343343000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3758287"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":64,"alternative-id":["10.1145\/3746027.3758287","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3758287","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}