{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:41:28Z","timestamp":1765309288188,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","funder":[{"name":"Beijing Natural Science Foundation","award":["L233008"],"award-info":[{"award-number":["L233008"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755714","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:56:44Z","timestamp":1761375404000},"page":"5001-5009","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["ChartM\n                    <sup>3<\/sup>\n                    : Benchmarking Chart Editing with Multimodal Instructions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-7763-0094","authenticated-orcid":false,"given":"Donglu","family":"Yang","sequence":"first","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6187-3628","authenticated-orcid":false,"given":"Liang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3470-5442","authenticated-orcid":false,"given":"Zihao","family":"Yue","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2099-4025","authenticated-orcid":false,"given":"Liangyu","family":"Chen","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8113-6464","authenticated-orcid":false,"given":"Yichen","family":"Xu","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9803-8204","authenticated-orcid":false,"given":"Wenxuan","family":"Wang","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6486-6020","authenticated-orcid":false,"given":"Qin","family":"Jin","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2503.04095"},{"key":"e_1_3_2_1_3_1","unstructured":"Zhe Chen Weiyun Wang Yue Cao Yangzhou Liu Zhangwei Gao Erfei Cui Jinguo Zhu Shenglong Ye Hao Tian Zhaoyang Liu et al. 2024a. Expanding Performance Boundaries of Open-Source Multimodal Models with Model Data and Test-Time Scaling. arXiv preprint arXiv:2412.05271 (2024)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Zhe Chen Weiyun Wang Hao Tian Shenglong Ye Zhangwei Gao Erfei Cui Wenwen Tong Kongzhi Hu Jiapeng Luo Zheng Ma et al. 2024b. How Far Are We to GPT-4V? Closing the Gap to Commercial Multimodal Models with Open-Source Suites. arXiv preprint arXiv:2404.16821 (2024).","DOI":"10.1007\/s11432-024-4231-5"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1705.06830"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1406.2661"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2501.11233"},{"key":"e_1_3_2_1_8_1","volume-title":"Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626","author":"Hertz Amir","year":"2022","unstructured":"Amir Hertz, Ron Mokady, Jay Tenenbaum, Kfir Aberman, Yael Pritch, and Daniel Cohen-Or. 2022. Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626 (2022). Sections: 2, 3, 5, 6, 7."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2210.06628"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2210.09276"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00246"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2504.00557"},{"key":"e_1_3_2_1_13_1","unstructured":"Haotian Liu Chunyuan Li Yuheng Li Bo Li Yuanhan Zhang Sheng Shen and Yong Jae Lee. 2024. LLaVA-NeXT: Improved reasoning OCR and world knowledge. https:\/\/llava-vl.github.io\/blog\/2024-01-30-llava-next\/"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2304.08485"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2403.05525"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2203.10244"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1909.00997"},{"key":"e_1_3_2_1_18_1","unstructured":"OpenAI. 2023. GPT-4V(ision) system card. https:\/\/openai.com\/index\/gpt-4v-system-card\/"},{"key":"e_1_3_2_1_19_1","volume-title":"Drag Your GAN: Interactive Point-based Manipulation on the Generative Image Manifold. In ACM SIGGRAPH 2023 Conference Proceedings.","author":"Pan Xingang","year":"2023","unstructured":"Xingang Pan, Ayush Tewari, Thomas Leimk\u00fchler, Lingjie Liu, Abhimitra Meka, and Christian Theobalt. 2023. Drag Your GAN: Interactive Point-based Manipulation on the Generative Image Manifold. In ACM SIGGRAPH 2023 Conference Proceedings."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1604.07379"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2410.04064"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2311.10089"},{"key":"e_1_3_2_1_23_1","volume-title":"Vincent YF Tan, and Song Bai","author":"Shi Yujun","year":"2023","unstructured":"Yujun Shi, Chuhui Xue, Jiachun Pan, Wenqing Zhang, Vincent YF Tan, and Song Bai. 2023. DragDiffusion: Harnessing Diffusion Models for Interactive Point-based Image Editing. arXiv preprint arXiv:2306.14435 (2023)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1503.03585"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2503.23131"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2409.12191"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819861"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2403.11703"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2403.00209"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2406.09961"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2501.06598"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755714","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:38:36Z","timestamp":1765309116000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755714"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":31,"alternative-id":["10.1145\/3746027.3755714","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755714","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}