{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T05:10:04Z","timestamp":1765343404382,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":77,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["Grant 62271312 and Grant 62132006"],"award-info":[{"award-number":["Grant 62271312 and Grant 62132006"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"STCSM","award":["Grant 22DZ2229005"],"award-info":[{"award-number":["Grant 22DZ2229005"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754726","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:26:55Z","timestamp":1761377215000},"page":"6761-6770","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Explainable Partial-AIGC Image Quality Assessment"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8783-4942","authenticated-orcid":false,"given":"Jiaying","family":"Qian","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2623-4756","authenticated-orcid":false,"given":"Ziheng","family":"Jia","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7247-7938","authenticated-orcid":false,"given":"Zicheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, China, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4931-7006","authenticated-orcid":false,"given":"Zeyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8165-9322","authenticated-orcid":false,"given":"Guangtao","family":"Zhai","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5693-0416","authenticated-orcid":false,"given":"Xiongkuo","family":"Min","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Quality-aware image-text alignment for real-world image quality assessment. arXiv preprint arXiv:2403.11176 5, 6","author":"Agnolucci Lorenzo","year":"2024","unstructured":"Lorenzo Agnolucci, Leonardo Galteri, and Marco Bertini. 2024. Quality-aware image-text alignment for real-world image quality assessment. arXiv preprint arXiv:2403.11176 5, 6 (2024)."},{"key":"e_1_3_2_1_2_1","volume-title":"Kyong Hwan Jin, and Seungryong Kim.","author":"Ahn Donghoon","year":"2024","unstructured":"Donghoon Ahn, Hyoungwon Cho, Jaewon Min, Wooseok Jang, Jungwoo Kim, SeonHwa Kim, Hyun Hee Park, Kyong Hwan Jin, and Seungryong Kim. 2024. Self-rectifying diffusion sampling with perturbed-attention guidance. In ECCV. 1--17."},{"volume-title":"Feb 24, 2025.","year":"2025","key":"e_1_3_2_1_3_1","unstructured":"Anthropic. 2025. Claude 3.7 Sonnet and Claude Code. https:\/\/www.anthropic. com\/blog Announcement on Anthropic blog, Feb 24, 2025."},{"key":"e_1_3_2_1_4_1","volume-title":"Blended latent diffusion. ACM transactions on graphics (TOG) 42, 4","author":"Avrahami Omri","year":"2023","unstructured":"Omri Avrahami, Ohad Fried, and Dani Lischinski. 2023. Blended latent diffusion. ACM transactions on graphics (TOG) 42, 4 (2023), 1--11."},{"key":"e_1_3_2_1_5_1","unstructured":"Shuai Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Sibo Song Kai Dang Peng Wang Shijie Wang Jun Tang et al. 2025. Qwen2. 5-vl technical report. arXiv preprint arXiv:2502.13923 (2025)."},{"key":"e_1_3_2_1_6_1","volume-title":"Ledits: Limitless image editing using text-to-image models. In CVPR. 8861--8870.","author":"Brack Manuel","year":"2024","unstructured":"Manuel Brack, Felix Friedrich, Katharia Kornmeier, Linoy Tsaban, Patrick Schramowski, Kristian Kersting, and Apolin\u00e1rio Passos. 2024. Ledits: Limitless image editing using text-to-image models. In CVPR. 8861--8870."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Tim Brooks Aleksander Holynski and Alexei A Efros. 2023. Instructpix2pix: Learning to follow image editing instructions. In CVPR. 18392--18402.","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"e_1_3_2_1_8_1","volume-title":"Topiq: A top-down approach from semantics to distortions for image quality assessment","author":"Chen Chaofeng","year":"2024","unstructured":"Chaofeng Chen, Jiadi Mo, Jingwen Hou, Haoning Wu, Liang Liao, Wenxiu Sun, Qiong Yan, and Weisi Lin. 2024. Topiq: A top-down approach from semantics to distortions for image quality assessment. IEEE TIP (2024)."},{"key":"e_1_3_2_1_9_1","volume-title":"andWenjun Zhang","author":"Chen Zijian","year":"2024","unstructured":"Zijian Chen,Wei Sun, HaoningWu, Zicheng Zhang, Jun Jia, Ru Huang, Xiongkuo Min, Guangtao Zhai, andWenjun Zhang. 2024. Study of Subjective and Objective Naturalness Assessment of AI-Generated Images. IEEE TIP (2024)."},{"key":"e_1_3_2_1_10_1","volume-title":"Diffedit: Diffusion-based semantic image editing with mask guidance. arXiv preprint arXiv:2210.11427","author":"Couairon Guillaume","year":"2022","unstructured":"Guillaume Couairon, Jakob Verbeek, Holger Schwenk, and Matthieu Cord. 2022. Diffedit: Diffusion-based semantic image editing with mask guidance. arXiv preprint arXiv:2210.11427 (2022)."},{"key":"e_1_3_2_1_11_1","unstructured":"Sci-kit Learn Developers. 2019. scikit-learn: machine learning in Python."},{"key":"e_1_3_2_1_12_1","volume-title":"Instructdiffusion: A generalist modeling interface for vision tasks. In CVPR. 12709--12720.","author":"Geng Zigang","year":"2024","unstructured":"Zigang Geng, Binxin Yang, Tiankai Hang, Chen Li, Shuyang Gu, Ting Zhang, Jianmin Bao, Zheng Zhang, Houqiang Li, Han Hu, et al. 2024. Instructdiffusion: A generalist modeling interface for vision tasks. In CVPR. 12709--12720."},{"key":"e_1_3_2_1_13_1","volume-title":"NTIRE 2022 challenge on perceptual image quality assessment. In CVPR. 951--967","author":"Gu Jinjin","year":"2022","unstructured":"Jinjin Gu, Haoming Cai, Chao Dong, Jimmy S Ren, Radu Timofte, Yuan Gong, Shanshan Lao, Shuwei Shi, Jiahao Wang, Sidi Yang, et al. 2022. NTIRE 2022 challenge on perceptual image quality assessment. In CVPR. 951--967."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2024.106777"},{"key":"e_1_3_2_1_15_1","volume-title":"Diffusion model-based image editing: A survey. arXiv preprint arXiv:2402.17525","author":"Huang Yi","year":"2024","unstructured":"Yi Huang, Jiancheng Huang, Yifan Liu, Mingfu Yan, Jiaxi Lv, Jianzhuang Liu, Wei Xiong, He Zhang, Liangliang Cao, and Shifeng Chen. 2024. Diffusion model-based image editing: A survey. arXiv preprint arXiv:2402.17525 (2024)."},{"key":"e_1_3_2_1_16_1","volume-title":"Aesexpert: Towards multimodality foundation model for image aesthetics perception. In ACM MM. 5911-- 5920.","author":"Huang Yipo","year":"2024","unstructured":"Yipo Huang, Xiangfei Sheng, Zhichao Yang, Quan Yuan, Zhichao Duan, Pengfei Chen, Leida Li, Weisi Lin, and Guangming Shi. 2024. Aesexpert: Towards multimodality foundation model for image aesthetics perception. In ACM MM. 5911-- 5920."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01185"},{"key":"e_1_3_2_1_18_1","unstructured":"Aaron Hurst Adam Lerer Adam P Goucher Adam Perelman Aditya Ramesh Aidan Clark AJ Ostrow Akila Welihinda Alan Hayes Alec Radford et al. 2024. Gpt-4o system card. arXiv preprint arXiv:2410.21276 (2024)."},{"key":"e_1_3_2_1_19_1","volume-title":"Musiq: Multi-scale image quality transformer. In ICCV. 5148--5157.","author":"Ke Junjie","year":"2021","unstructured":"Junjie Ke, QifeiWang, YilinWang, Peyman Milanfar, and Feng Yang. 2021. Musiq: Multi-scale image quality transformer. In ICCV. 5148--5157."},{"volume-title":"Natural image deblurring using recursive deep convolutional neural network (R-DbCNN) and secondgeneration wavelets","author":"Khmag Asem","key":"e_1_3_2_1_20_1","unstructured":"Asem Khmag and Noraziahtulhidayu Kamarudin. 2019. Natural image deblurring using recursive deep convolutional neural network (R-DbCNN) and secondgeneration wavelets. In IEEE ICSIPA. 285--290."},{"key":"e_1_3_2_1_21_1","first-page":"11","article-title":"Deep CNN-Based Blind Image Quality Predictor","volume":"30","author":"Kim Jongyoo","year":"2019","unstructured":"Jongyoo Kim, Anh-Duc Nguyen, and Sanghoon Lee. 2019. Deep CNN-Based Blind Image Quality Predictor. IEEE TNNLS 30, 1 (2019), 11--24.","journal-title":"IEEE TNNLS"},{"key":"e_1_3_2_1_22_1","first-page":"36652","article-title":"Pick-a-pic: An open dataset of user preferences for text-toimage generation","volume":"36","author":"Kirstain Yuval","year":"2023","unstructured":"Yuval Kirstain, Adam Polyak, Uriel Singer, Shahbuland Matiana, Joe Penna, and Omer Levy. 2023. Pick-a-pic: An open dataset of user preferences for text-toimage generation. NeurIPS 36 (2023), 36652--36663.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_23_1","first-page":"011006","article-title":"Most apparent distortion: fullreference image quality assessment and the role of strategy","volume":"19","author":"Larson Eric C","year":"2010","unstructured":"Eric C Larson and Damon M Chandler. 2010. Most apparent distortion: fullreference image quality assessment and the role of strategy. JEI 19, 1 (2010), 011006--011006.","journal-title":"JEI"},{"key":"e_1_3_2_1_24_1","volume-title":"Llava-onevision: Easy visual task transfer. arXiv preprint arXiv:2408.03326","author":"Li Bo","year":"2024","unstructured":"Bo Li, Yuanhan Zhang, Dong Guo, Renrui Zhang, Feng Li, Hao Zhang, Kaichen Zhang, Peiyuan Zhang, Yanwei Li, Ziwei Liu, et al. 2024. Llava-onevision: Easy visual task transfer. arXiv preprint arXiv:2408.03326 (2024)."},{"key":"e_1_3_2_1_25_1","first-page":"6833","article-title":"Agiqa-3k: An open database for ai-generated image quality assessment","volume":"34","author":"Li Chunyi","year":"2023","unstructured":"Chunyi Li, Zicheng Zhang, Haoning Wu, Wei Sun, Xiongkuo Min, Xiaohong Liu, Guangtao Zhai, andWeisi Lin. 2023. Agiqa-3k: An open database for ai-generated image quality assessment. IEEE TCSVT 34, 8 (2023), 6833--6846.","journal-title":"IEEE TCSVT"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00598"},{"key":"e_1_3_2_1_27_1","volume-title":"Hunyuandit: A powerful multi-resolution diffusion transformer with fine-grained chinese understanding. arXiv preprint arXiv:2405.08748","author":"Li Zhimin","year":"2024","unstructured":"Zhimin Li, Jianwei Zhang, Qin Lin, Jiangfeng Xiong, Yanxin Long, Xinchi Deng, Yingfang Zhang, Xingchao Liu, Minbin Huang, Zedong Xiao, et al. 2024. Hunyuandit: A powerful multi-resolution diffusion transformer with fine-grained chinese understanding. arXiv preprint arXiv:2405.08748 (2024)."},{"volume-title":"Flow Matching for Generative Modeling. In The Eleventh International Conference on Learning Representations.","author":"Lipman Yaron","key":"e_1_3_2_1_28_1","unstructured":"Yaron Lipman, Ricky TQ Chen, Heli Ben-Hamu, Maximilian Nickel, and Matthew Le. [n. d.]. Flow Matching for Generative Modeling. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_29_1","first-page":"1500","article-title":"Image quality assessment based on gradient similarity","volume":"21","author":"Liu Anmin","year":"2011","unstructured":"Anmin Liu, Weisi Lin, and Manish Narwaria. 2011. Image quality assessment based on gradient similarity. IEEE TIP 21, 4 (2011), 1500--1512.","journal-title":"IEEE TIP"},{"key":"e_1_3_2_1_30_1","volume-title":"Joost Van DeWeijer, and Andrew D Bagdanov","author":"Liu Xialei","year":"2017","unstructured":"Xialei Liu, Joost Van DeWeijer, and Andrew D Bagdanov. 2017. Rankiqa: Learning from rankings for no-reference image quality assessment. In ICCV. 1040--1049."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00218"},{"key":"e_1_3_2_1_32_1","volume-title":"European Conference on Computer Vision. Springer, 111--129","author":"Mirzaei Ashkan","year":"2024","unstructured":"Ashkan Mirzaei, Tristan Aumentado-Armstrong, Marcus A Brubaker, Jonathan Kelly, Alex Levinshtein, Konstantinos G Derpanis, and Igor Gilitschenski. 2024. Watch your steps: Local image and scene editing by text instructions. In European Conference on Computer Vision. Springer, 111--129."},{"key":"e_1_3_2_1_33_1","first-page":"4695","article-title":"Noreference image quality assessment in the spatial domain","volume":"21","author":"Mittal Anish","year":"2012","unstructured":"Anish Mittal, Anush Krishna Moorthy, and Alan Conrad Bovik. 2012. Noreference image quality assessment in the spatial domain. IEEE TIP 21, 12 (2012), 4695--4708.","journal-title":"IEEE TIP"},{"key":"e_1_3_2_1_34_1","first-page":"209","article-title":"Making a ''completely blind'' image quality analyzer","volume":"20","author":"Mittal Anish","year":"2012","unstructured":"Anish Mittal, Rajiv Soundararajan, and Alan C Bovik. 2012. Making a ''completely blind'' image quality analyzer. IEEE SPL 20, 3 (2012), 209--212.","journal-title":"IEEE SPL"},{"key":"e_1_3_2_1_35_1","first-page":"513","article-title":"A two-step framework for constructing blind image quality indices","volume":"17","author":"Moorthy Anush Krishna","year":"2010","unstructured":"Anush Krishna Moorthy and Alan Conrad Bovik. 2010. A two-step framework for constructing blind image quality indices. IEEE SPL 17, 5 (2010), 513--516.","journal-title":"IEEE SPL"},{"key":"e_1_3_2_1_36_1","first-page":"3350","article-title":"Blind image quality assessment: From natural scene statistics to perceptual quality","volume":"20","author":"Moorthy Anush Krishna","year":"2011","unstructured":"Anush Krishna Moorthy and Alan Conrad Bovik. 2011. Blind image quality assessment: From natural scene statistics to perceptual quality. IEEE TIP 20, 12 (2011), 3350--3364.","journal-title":"IEEE TIP"},{"key":"e_1_3_2_1_37_1","volume-title":"AVA: A large-scale database for aesthetic visual analysis. In CVPR. 2408--2415.","author":"Murray Naila","year":"2012","unstructured":"Naila Murray, Luca Marchesotti, and Florent Perronnin. 2012. AVA: A large-scale database for aesthetic visual analysis. In CVPR. 2408--2415."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Fei Peng Huiyuan Fu Anlong Ming Chuanming Wang Huadong Ma Shuai He Zifei Dou and Shu Chen. 2024. Aigc image quality assessment via image-prompt correspondence. In CVPR. 6432--6441.","DOI":"10.1109\/CVPRW63382.2024.00644"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Fei Peng Huiyuan Fu Anlong Ming Chuanming Wang Huadong Ma Shuai He Zifei Dou and Shu Chen. 2024. Aigc image quality assessment via image-prompt correspondence. In CVPR. 6432--6441.","DOI":"10.1109\/CVPRW63382.2024.00644"},{"key":"e_1_3_2_1_40_1","volume-title":"andWei Gao","author":"Qu Bowen","year":"2024","unstructured":"Bowen Qu, Haohui Li, andWei Gao. 2024. Bringing textual prompt to ai-generated image quality assessment. In IEEE ICME. 1--6."},{"key":"e_1_3_2_1_41_1","unstructured":"Alec Radford JeffreyWu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_2_1_42_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 1, 2","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 1, 2 (2022), 3."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Jian Ren Xiaohui Shen Zhe Lin Radomir Mech and David J Foran. 2017. Personalized image aesthetics. In ICCV. 638--647.","DOI":"10.1109\/ICCV.2017.76"},{"key":"e_1_3_2_1_44_1","volume-title":"Dino-x: A unified vision model for open-world object detection and understanding. arXiv preprint arXiv:2411.14347","author":"Ren Tianhe","year":"2024","unstructured":"Tianhe Ren, Yihao Chen, Qing Jiang, Zhaoyang Zeng, Yuda Xiong, Wenlong Liu, Zhengyu Ma, Junyi Shen, Yuan Gao, Xiaoke Jiang, et al. 2024. Dino-x: A unified vision model for open-world object detection and understanding. arXiv preprint arXiv:2411.14347 (2024)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_46_1","first-page":"3339","article-title":"Blind image quality assessment: A natural scene statistics approach in the DCT domain","volume":"21","author":"Saad Michele A","year":"2012","unstructured":"Michele A Saad, Alan C Bovik, and Christophe Charrier. 2012. Blind image quality assessment: A natural scene statistics approach in the DCT domain. IEEE TIP 21, 8 (2012), 3339--3352.","journal-title":"IEEE TIP"},{"key":"e_1_3_2_1_47_1","first-page":"2117","article-title":"A visual information fidelity approach to video quality assessment","volume":"7","author":"Sheikh Hamid R","year":"2005","unstructured":"Hamid R Sheikh and Alan C Bovik. 2005. A visual information fidelity approach to video quality assessment. In VPQM, Vol. 7. 2117--2128.","journal-title":"VPQM"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"crossref","unstructured":"Shuwei Shi Qingyan Bai Mingdeng Cao Weihao Xia Jiahao Wang Yifan Chen and Yujiu Yang. 2021. Region-adaptive deformable network for image quality assessment. In CVPR. 324--333.","DOI":"10.1109\/CVPRW53098.2021.00042"},{"key":"e_1_3_2_1_49_1","unstructured":"Shaolin Su Qingsen Yan Yu Zhu Cheng Zhang Xin Ge Jinqiu Sun and Yanning Zhang. 2020. Blindly assess image quality in the wild guided by a self-adaptive hyper network. In CVPR. 3667--3676."},{"key":"e_1_3_2_1_50_1","first-page":"1178","article-title":"Blind quality assessment for in-the-wild images via hierarchical feature fusion and iterative mixed database training","volume":"17","author":"Sun Wei","year":"2023","unstructured":"Wei Sun, Xiongkuo Min, Danyang Tu, Siwei Ma, and Guangtao Zhai. 2023. Blind quality assessment for in-the-wild images via hierarchical feature fusion and iterative mixed database training. IEEE JSTSP 17, 6 (2023), 1178--1192.","journal-title":"IEEE JSTSP"},{"key":"e_1_3_2_1_51_1","first-page":"3998","article-title":"NIMA: Neural image assessment","volume":"27","author":"Talebi Hossein","year":"2018","unstructured":"Hossein Talebi and Peyman Milanfar. 2018. NIMA: Neural image assessment. IEEE TIP 27, 8 (2018), 3998--4011.","journal-title":"IEEE TIP"},{"key":"e_1_3_2_1_52_1","unstructured":"Gemini Team Rohan Anil Sebastian Borgeaud Jean-Baptiste Alayrac Jiahui Yu Radu Soricut Johan Schalkwyk Andrew M Dai Anja Hauth Katie Millican et al. 2023. Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00191"},{"key":"e_1_3_2_1_54_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_55_1","volume-title":"CAAI International Conference on Artificial Intelligence. Springer, 46--57","author":"Duan Huiyu","year":"2023","unstructured":"JiaruiWang, Huiyu Duan, Jing Liu, Shi Chen, Xiongkuo Min, and Guangtao Zhai. 2023. Aigciqa2023: A large-scale image quality assessment database for ai generated images: from the perspectives of quality, authenticity and correspondence. In CAAI International Conference on Artificial Intelligence. Springer, 46--57."},{"key":"e_1_3_2_1_56_1","unstructured":"Peng Wang Shuai Bai Sinan Tan Shijie Wang Zhihao Fan Jinze Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge et al. 2024. Qwen2-vl: Enhancing vision-language model's perception of the world at any resolution. arXiv preprint arXiv:2409.12191 (2024)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Puyi Wang Wei Sun Zicheng Zhang Jun Jia Yanwei Jiang Zhichao Zhang Xiongkuo Min and Guangtao Zhai. 2024. Large multi-modality model assisted ai-generated image quality assessment. In ACM MM. 7803--7812.","DOI":"10.1145\/3664647.3681471"},{"key":"e_1_3_2_1_58_1","first-page":"600","article-title":"Image quality assessment: from error visibility to structural similarity","volume":"13","author":"Wang Zhou","year":"2004","unstructured":"Zhou Wang, A.C. Bovik, H.R. Sheikh, and E.P. Simoncelli. 2004. Image quality assessment: from error visibility to structural similarity. IEEE TIP 13, 4 (2004), 600--612.","journal-title":"IEEE TIP"},{"key":"e_1_3_2_1_59_1","volume-title":"Image quality assessment: from error visibility to structural similarity","author":"Wang Zhou","year":"2004","unstructured":"Zhou Wang, Alan C Bovik, Hamid R Sheikh, and Eero P Simoncelli. 2004. Image quality assessment: from error visibility to structural similarity. IEEE transactions on image processing 13, 4 (2004), 600--612."},{"key":"e_1_3_2_1_60_1","first-page":"1185","article-title":"Information Content Weighting for Perceptual Image Quality Assessment","volume":"20","author":"Wang Zhou","year":"2011","unstructured":"Zhou Wang and Qiang Li. 2011. Information Content Weighting for Perceptual Image Quality Assessment. IEEE TIP 20, 5 (2011), 1185--1198.","journal-title":"IEEE TIP"},{"key":"e_1_3_2_1_61_1","volume-title":"The Thrity-Seventh ACSSC","volume":"2","author":"Wang Z.","year":"2003","unstructured":"Z.Wang, E.P. Simoncelli, and A.C. Bovik. 2003. Multiscale structural similarity for image quality assessment. In The Thrity-Seventh ACSSC, 2003, Vol. 2. 1398--1402 Vol.2."},{"key":"e_1_3_2_1_62_1","volume-title":"Diffusiondb: A large-scale prompt gallery dataset for text-to-image generative models. arXiv preprint arXiv:2210.14896","author":"Wang Zijie","year":"2022","unstructured":"Zijie JWang, Evan Montoya, David Munechika, Haoyang Yang, Benjamin Hoover, and Duen Horng Chau. 2022. Diffusiondb: A large-scale prompt gallery dataset for text-to-image generative models. arXiv preprint arXiv:2210.14896 (2022)."},{"key":"e_1_3_2_1_63_1","volume-title":"Q-instruct: Improving low-level visual abilities for multi-modality foundation models. In CVPR. 25490--25500.","author":"Wu Haoning","year":"2024","unstructured":"Haoning Wu, Zicheng Zhang, Erli Zhang, Chaofeng Chen, Liang Liao, Annan Wang, Kaixin Xu, Chunyi Li, Jingwen Hou, Guangtao Zhai, et al. 2024. Q-instruct: Improving low-level visual abilities for multi-modality foundation models. In CVPR. 25490--25500."},{"key":"e_1_3_2_1_64_1","volume-title":"Q-align: Teaching lmms for visual scoring via discrete text-defined levels. ICML","author":"Zhang Zicheng","year":"2023","unstructured":"HaoningWu, Zicheng Zhang,Weixia Zhang, Chaofeng Chen, Liang Liao, Chunyi Li, Yixuan Gao, Annan Wang, Erli Zhang, Wenxiu Sun, et al. 2023. Q-align: Teaching lmms for visual scoring via discrete text-defined levels. ICML (2023)."},{"key":"e_1_3_2_1_65_1","unstructured":"Zhiyu Wu Xiaokang Chen Zizheng Pan Xingchao Liu Wen Liu Damai Dai Huazuo Gao Yiyang Ma Chengyue Wu Bingxuan Wang et al. 2024. Deepseekvl2: Mixture-of-experts vision-language models for advanced multimodal understanding. arXiv preprint arXiv:2412.10302 (2024)."},{"key":"e_1_3_2_1_66_1","volume-title":"Yandong Li, Yanwu Xu, Kun Zhang, and Tingbo Hou.","author":"Xie Shaoan","year":"2023","unstructured":"Shaoan Xie, Yang Zhao, Zhisheng Xiao, Kelvin CK Chan, Yandong Li, Yanwu Xu, Kun Zhang, and Tingbo Hou. 2023. Dreaminpainter: Text-guided subject-driven image inpainting with diffusion models. arXiv preprint arXiv:2312.03771 (2023)."},{"key":"e_1_3_2_1_67_1","first-page":"15903","article-title":"Imagereward: Learning and evaluating human preferences for text-to-image generation","volume":"36","author":"Xu Jiazheng","year":"2023","unstructured":"Jiazheng Xu, Xiao Liu, Yuchen Wu, Yuxuan Tong, Qinkai Li, Ming Ding, Jie Tang, and Yuxiao Dong. 2023. Imagereward: Learning and evaluating human preferences for text-to-image generation. NeurIPS 36 (2023), 15903--15935.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_68_1","first-page":"684","article-title":"Gradient magnitude similarity deviation: A highly efficient perceptual image quality index","volume":"23","author":"Xue Wufeng","year":"2013","unstructured":"Wufeng Xue, Lei Zhang, Xuanqin Mou, and Alan C Bovik. 2013. Gradient magnitude similarity deviation: A highly efficient perceptual image quality index. IEEE TIP 23, 2 (2013), 684--695.","journal-title":"IEEE TIP"},{"key":"e_1_3_2_1_69_1","volume-title":"Moe-agiqa: Mixture-of-experts boosted visual perception-driven and semanticaware quality assessment for ai-generated images. In CVPR. 6395--6404.","author":"Yang Junfeng","year":"2024","unstructured":"Junfeng Yang, Jing Fu, Wei Zhang, Wenzhi Cao, Limei Liu, and Han Peng. 2024. Moe-agiqa: Mixture-of-experts boosted visual perception-driven and semanticaware quality assessment for ai-generated images. In CVPR. 6395--6404."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612200"},{"volume-title":"Transformer for image quality assessment","author":"You Junyong","key":"e_1_3_2_1_71_1","unstructured":"Junyong You and Jari Korhonen. 2021. Transformer for image quality assessment. In IEEE ICIP. 1389--1393."},{"key":"e_1_3_2_1_72_1","volume-title":"Inpaint anything: Segment anything meets image inpainting. arXiv preprint arXiv:2304.06790","author":"Yu Tao","year":"2023","unstructured":"Tao Yu, Runseng Feng, Ruoyu Feng, Jinming Liu, Xin Jin, Wenjun Zeng, and Zhibo Chen. 2023. Inpaint anything: Segment anything meets image inpainting. arXiv preprint arXiv:2304.06790 (2023)."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-019-2757-1"},{"key":"e_1_3_2_1_74_1","first-page":"4270","article-title":"VSI: A visual saliency-induced index for perceptual image quality assessment","volume":"23","author":"Zhang Lin","year":"2014","unstructured":"Lin Zhang, Ying Shen, and Hongyu Li. 2014. VSI: A visual saliency-induced index for perceptual image quality assessment. IEEE TIP 23, 10 (2014), 4270--4281.","journal-title":"IEEE TIP"},{"key":"e_1_3_2_1_75_1","first-page":"2378","article-title":"FSIM: A feature similarity index for image quality assessment","volume":"20","author":"Zhang Lin","year":"2011","unstructured":"Lin Zhang, Lei Zhang, Xuanqin Mou, and David Zhang. 2011. FSIM: A feature similarity index for image quality assessment. IEEE TIP 20, 8 (2011), 2378--2386.","journal-title":"IEEE TIP"},{"key":"e_1_3_2_1_76_1","unstructured":"Zicheng Zhang Ziheng Jia Haoning Wu Chunyi Li Zijian Chen Yingjie Zhou Wei Sun Xiaohong Liu Xiongkuo Min Weisi Lin et al. 2024. Q-Bench-Video: Benchmarking the Video Quality Understanding of LMMs. arXiv preprint arXiv:2409.20063 (2024)."},{"volume-title":"A perceptual quality assessment exploration for aigc images","author":"Zhang Zicheng","key":"e_1_3_2_1_77_1","unstructured":"Zicheng Zhang, Chunyi Li,Wei Sun, Xiaohong Liu, Xiongkuo Min, and Guangtao Zhai. 2023. A perceptual quality assessment exploration for aigc images. In IEEE ICMEW. 440--445."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754726","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T05:06:21Z","timestamp":1765343181000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754726"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":77,"alternative-id":["10.1145\/3746027.3754726","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754726","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}