{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T09:08:41Z","timestamp":1765357721275,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":58,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the OPPO Research Fund"},{"name":"the National Natural Science Foundation of China","award":["62171340, 61991451 and61771473"],"award-info":[{"award-number":["62171340, 61991451 and61771473"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3680972","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:41Z","timestamp":1729925981000},"page":"2632-2641","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Semantics-Aware Image Aesthetics Assessment using Tag Matching and Contrastive Ranking"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3398-1286","authenticated-orcid":false,"given":"Zhichao","family":"Yang","sequence":"first","affiliation":[{"name":"School of Artificial Intelligence, Xidian University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9069-8796","authenticated-orcid":false,"given":"Leida","family":"Li","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Xidian University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0509-3782","authenticated-orcid":false,"given":"Pengfei","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Xidian University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7501-0009","authenticated-orcid":false,"given":"Jinjian","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Xidian University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9632-985X","authenticated-orcid":false,"given":"Weisheng","family":"Dong","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Xidian University, Xi'an, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3191853"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3130536"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2018.2875357"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/11744078_23"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2696576"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/132"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.3389\/frai.2022.976235"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2016.2514499"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00960"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3308852"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3389452"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3225728"},{"key":"e_1_3_2_1_13_1","volume-title":"AesExpert: Towards Multi-modality Foundation Model for Image Aesthetics Perception. arXiv preprint arXiv:2404.09624","author":"Huang Yipo","year":"2024","unstructured":"Yipo Huang, Xiangfei Sheng, Zhichao Yang, Quan Yuan, Zhichao Duan, Pengfei Chen, Leida Li, Weisi Lin, and Guangming Shi. 2024. AesExpert: Towards Multi-modality Foundation Model for Image Aesthetics Perception. arXiv preprint arXiv:2404.09624 (2024)."},{"key":"e_1_3_2_1_14_1","volume-title":"AesBench: An Expert Benchmark for Multimodal Large Language Models on Image Aesthetics Perception. arXiv preprint arXiv:2401.08276","author":"Huang Yipo","year":"2024","unstructured":"Yipo Huang, Quan Yuan, Xiangfei Sheng, Zhichao Yang, Haoning Wu, Pengfei Chen, Yuzhe Yang, Leida Li, and Weisi Lin. 2024. AesBench: An Expert Benchmark for Multimodal Large Language Models on Image Aesthetics Perception. arXiv preprint arXiv:2401.08276 (2024)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3151787"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2016.7532767"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cogr.2022.07.003"},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the International Conference on Learning Representations.","author":"Kang Bingyi","year":"2021","unstructured":"Bingyi Kang, Yu Li, Sa Xie, Zehuan Yuan, and Jiashi Feng. 2021. Exploring balanced feature spaces for representation learning. In Proceedings of the International Conference on Learning Representations."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00510"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00968"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. IEEE, 419--426","author":"Ke Yan","year":"2006","unstructured":"Yan Ke, Xiaoou Tang, and Feng Jing. 2006. The design of high-level features for photo quality assessment. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. IEEE, 419--426."},{"key":"e_1_3_2_1_22_1","first-page":"18661","article-title":"Supervised contrastive learning","volume":"33","author":"Khosla Prannay","year":"2020","unstructured":"Prannay Khosla, Piotr Teterwak, Chen Wang, Aaron Sarna, Yonglong Tian, Phillip Isola, Aaron Maschinot, Ce Liu, and Dilip Krishnan. 2020. Supervised contrastive learning. Advances in Neural Information Processing Systems, Vol. 33 (2020), 18661--18673.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_40"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2845100"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3031549"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00128"},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of the International Conference on Learning Representations.","author":"Li Junnan","year":"2021","unstructured":"Junnan Li, Pan Zhou, Caiming Xiong, and Steven CH Hoi. 2021. Prototypical contrastive learning of unsupervised representations. In Proceedings of the International Conference on Learning Representations."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3249185"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2968285"},{"key":"e_1_3_2_1_30_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654927"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.119"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.84"},{"key":"e_1_3_2_1_34_1","volume-title":"A deep architecture for unified aesthetic prediction. arXiv preprint arXiv:1708.04890","author":"Murray Naila","year":"2017","unstructured":"Naila Murray and Albert Gordo. 2017. A deep architecture for unified aesthetic prediction. arXiv preprint arXiv:1708.04890 (2017)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.5555\/2354409.2354807"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3201510"},{"key":"e_1_3_2_1_37_1","unstructured":"Adam Paszke Sam Gross Soumith Chintala Gregory Chanan Edward Yang Zachary DeVito Zeming Lin Alban Desmaison Luca Antiga and Adam Lerer. 2017. Automatic differentiation in pytorch. In Advances in Neural Information Processing Systems. 1--4."},{"key":"e_1_3_2_1_38_1","volume-title":"Proceedings of the International Conference on Machine Learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In Proceedings of the International Conference on Machine Learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_39_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever et al. 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611969"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2688929"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2831899"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2013.2269899"},{"key":"e_1_3_2_1_44_1","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, 11 (2008), 2579--2605.","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00393"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2393347.2393400"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2964298"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502083"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2975798"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01924"},{"key":"e_1_3_2_1_51_1","volume-title":"Proceedings of the International Conference on Machine Learning. PMLR, 11842--11851","author":"Yang Yuzhe","year":"2021","unstructured":"Yuzhe Yang, Kaiwen Zha, Yingcong Chen, Hao Wang, and Dina Katabi. 2021. Delving into deep imbalanced regression. In Proceedings of the International Conference on Machine Learning. PMLR, 11842--11851."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3290479"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2941778"},{"key":"e_1_3_2_1_54_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Zha Kaiwen","year":"2024","unstructured":"Kaiwen Zha, Peng Cao, Jeany Son, Yuzhe Yang, and Dina Katabi. 2024. Rank-N-Contrast: Learning Continuous Representations for Regression. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_55_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 22302--22313","author":"Zhao Kai","year":"2023","unstructured":"Kai Zhao, Kun Yuan, Ming Sun, Mading Li, and Xing Wen. 2023. Quality-aware pre-trained models for blind image quality assessment. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 22302--22313."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i3.25485"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2967223"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00678"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680972","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3680972","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:35Z","timestamp":1750295855000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680972"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":58,"alternative-id":["10.1145\/3664647.3680972","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3680972","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}