{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:55:26Z","timestamp":1781538926026,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810710","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"2656-2665","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Focal-RegionFace: Generating Fine-Grained Multi-attribute Descriptions for Arbitrarily Selected Face Focal Regions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-2516-8407","authenticated-orcid":false,"given":"Kaiwen","family":"Zheng","sequence":"first","affiliation":[{"name":"University of Glasgow, Glasgow, United Kingdom"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4759-2042","authenticated-orcid":false,"given":"Junchen","family":"Fu","sequence":"additional","affiliation":[{"name":"University of Glasgow, Glasgow, United Kingdom"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5735-8674","authenticated-orcid":false,"given":"Songpei","family":"Xu","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4662-8037","authenticated-orcid":false,"given":"Yaoqin","family":"He","sequence":"additional","affiliation":[{"name":"University of Glasgow, Glasgow, United Kingdom"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9228-1759","authenticated-orcid":false,"given":"Joemon","family":"Jose","sequence":"additional","affiliation":[{"name":"University of Glasgow, Glasgow, United Kingdom"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6010-1792","authenticated-orcid":false,"given":"Hu","family":"Han","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3925-4951","authenticated-orcid":false,"given":"Xuri","family":"Ge","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","unstructured":"Ilkoo Ahn Younghwa Baek Bok-Nam Seo Su\u00a0Eun Lim Kyoungsik Jung Ho\u00a0Seok Kim Jeongkyun Kim Sukyung Lee and Siwoo Lee. 2024. Perceived age estimation from facial image and demographic data in young and middle-aged South Korean adults. Scientific Reports 14 1 (Dec 2024) 30084. 10.1038\/s41598-024-78695-7","DOI":"10.1038\/s41598-024-78695-7"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-24670-1_36"},{"key":"e_1_3_3_2_4_2","unstructured":"Shuai Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Sibo Song Kai Dang Peng Wang Shijie Wang Jun Tang Humen Zhong Yuanzhi Zhu Mingkun Yang Zhaohai Li Jianqiang Wan Pengfei Wang Wei Ding Zheren Fu Yiheng Xu Jiabo Ye Xi Zhang Tianbao Xie Zesen Cheng Hang Zhang Zhibo Yang Haiyang Xu and Junyang Lin. 2025. Qwen2.5-VL Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.13923 (2025)."},{"key":"e_1_3_3_2_5_2","unstructured":"Ashutosh Chaubey Xulang Guan and Mohammad Soleymani. 2025. Face-LLaVA: Facial Expression and Attribute Understanding through Instruction Tuning. arxiv:https:\/\/arXiv.org\/abs\/2504.07198\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2504.07198"},{"key":"e_1_3_3_2_6_2","unstructured":"Xiaokang Chen Zhiyu Wu Xingchao Liu Zizheng Pan Wen Liu Zhenda Xie Xingkai Yu and Chong Ruan. 2025. Janus-pro: Unified multimodal understanding and generation with data and model scaling. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.17811 (2025)."},{"key":"e_1_3_3_2_7_2","unstructured":"Zebang Cheng Zhi-Qi Cheng Jun-Yan He Jingdong Sun Kai Wang Yuxiang Lin Zheng Lian Xiaojiang Peng and Alexander Hauptmann. 2024. Emotion-LLaMA: Multimodal Emotion Recognition and Reasoning with Instruction Tuning. arxiv:https:\/\/arXiv.org\/abs\/2406.11161\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2406.11161"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"crossref","unstructured":"Zebang Cheng Zhi-Qi Cheng Jun-Yan He Kai Wang Yuxiang Lin Zheng Lian Xiaojiang Peng and Alexander Hauptmann. 2024. Emotion-llama: Multimodal emotion recognition and reasoning with instruction tuning. Advances in Neural Information Processing Systems 37 (2024) 110805\u2013110853.","DOI":"10.52202\/079017-3518"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3444685.3446259"},{"key":"e_1_3_3_2_10_2","unstructured":"Junchen Fu Wenhao Deng Kaiwen Zheng Ioannis Arapakis Yu Ye Yongxin Ni Joemon\u00a0M Jose and Xuri Ge. 2026. Benchmarking Multimodal Large Language Models for Missing Modality Completion in Product Catalogues. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2601.19750 (2026)."},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657725"},{"key":"e_1_3_3_2_12_2","unstructured":"Junchen Fu Xuri Ge Xin Xin Alexandros Karatzoglou Ioannis Arapakis Kaiwen Zheng Yongxin Ni and Joemon M\u00a0Jose Joemon. 2025. Efficient and effective adaptation of multimodal foundation models in sequential recommendation. IEEE Transactions on Knowledge and Data Engineering (2025)."},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","unstructured":"Tzvi Ganel Carmel Sofer and Melvyn\u00a0A. Goodale. 2022. Biases in human perception of facial age are present and more exaggerated in current AI technology. Scientific Reports 12 1 (December 2022) 22519. 10.1038\/s41598-022-27009-w","DOI":"10.1038\/s41598-022-27009-w"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681443"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Xuri Ge Joemon\u00a0M Jose Pengcheng Wang Arunachalam Iyer Xiao Liu and Hu Han. 2023. ALGRNet: Multi-relational adaptive facial action unit modelling for face representation and relevant recognitions. IEEE Transactions on Biometrics Behavior and Identity Science 5 4 (2023) 566\u2013578.","DOI":"10.1109\/TBIOM.2023.3306810"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/FG52635.2021.9666961"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/IEMBS.2008.4650288"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"crossref","unstructured":"Dimitrios Kollias Panagiotis Tzirakis Alice Baird Alan Cowen and Stefanos Zafeiriou. 2023. ABAW: Valence-Arousal Estimation Expression Recognition Action Unit Detection & Emotional Reaction Intensity Estimation Challenges. arxiv:https:\/\/arXiv.org\/abs\/2303.01498\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2303.01498","DOI":"10.1109\/CVPRW59228.2023.00626"},{"key":"e_1_3_3_2_19_2","unstructured":"Jewon Lee Ki-Ung Song Seungmin Yang Donguk Lim Jaeyeon Kim Wooksu Shin Bo-Kyeong Kim Yong\u00a0Jae Lee and Tae-Ho Kim. 2025. Efficient LLaMA-3.2-Vision by Trimming Cross-attended Visual Features. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2504.00557 (2025)."},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","unstructured":"Chaoyu Lei Kang Dang Sifan Song et\u00a0al. 2025. AI-assisted facial analysis in healthcare: From disease detection to comprehensive management. Patterns 6 2 (2025) 101175. 10.1016\/j.patter.2025.101175","DOI":"10.1016\/j.patter.2025.101175"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01118"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2010.5543262"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","unstructured":"Ali Mollahosseini Behzad Hasani and Mohammad\u00a0H. Mahoor. 2019. AffectNet: A Database for Facial Expression Valence and Arousal Computing in the Wild. IEEE TAC 10 1 (Jan. 2019) 18\u201331. 10.1109\/TAFFC.2017.2740923","DOI":"10.1109\/TAFFC.2017.2740923"},{"key":"e_1_3_3_2_24_2","unstructured":"Kartik Narayan Vibashan VS and Vishal\u00a0M. Patel. 2025. FaceXBench: Evaluating Multimodal LLMs on Face Understanding. arxiv:https:\/\/arXiv.org\/abs\/2501.10360\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2501.10360"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2830593"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"crossref","unstructured":"Arjun Panickssery Samuel Bowman and Shi Feng. 2024. Llm evaluators recognize and favor their own generations. Advances in Neural Information Processing Systems 37 (2024) 68772\u201368802.","DOI":"10.52202\/079017-2197"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","unstructured":"Partha\u00a0Pratim Ray. 2023. ChatGPT: a Comprehensive Review on background applications Key challenges bias ethics Limitations and Future Scope. Internet of Things and Cyber-Physical Systems 3 (04 2023) 121\u2013154. 10.1016\/j.iotcps.2023.04.003","DOI":"10.1016\/j.iotcps.2023.04.003"},{"key":"e_1_3_3_2_28_2","unstructured":"Andres Romero Juan Leon and Pablo Arbelaez. 2018. Multi-View Dynamic Facial Action Unit Detection. arxiv:https:\/\/arXiv.org\/abs\/1704.07863\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1704.07863"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","unstructured":"Rasmus Rothe Radu Timofte and Luc Van\u00a0Gool. 2018. Deep Expectation of Real and Apparent Age from a Single Image Without Facial Landmarks. Int. J. Comput. Vision 126 2\u20134 (April 2018) 144\u2013157. 10.1007\/s11263-016-0940-3","DOI":"10.1007\/s11263-016-0940-3"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","unstructured":"Cynthia Rudin. 2019. Stop explaining black box machine learning models for high stakes decisions and use interpretable models instead. Nature Machine Intelligence 1 5 (May 2019) 206\u2013215. 10.1038\/s42256-019-0048-x","DOI":"10.1038\/s42256-019-0048-x"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2013.59"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"crossref","unstructured":"Yuntao Shou Xiangyong Cao Huan Liu and Deyu Meng. 2025. Masked contrastive graph representation learning for age estimation. Pattern Recognition 158 (2025) 110974.","DOI":"10.1016\/j.patcog.2024.110974"},{"key":"e_1_3_3_2_33_2","unstructured":"Gemma Team Aishwarya Kamath Johan Ferret Shreya Pathak Nino Vieillard et\u00a0al. 2025. Gemma 3 Technical Report. arxiv:https:\/\/arXiv.org\/abs\/2503.19786\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2503.19786"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","unstructured":"Carina Trojahn Gabor Dobos Andrea Lichterfeld et\u00a0al. 2015. Characterizing facial skin ageing in humans: disentangling extrinsic from intrinsic biological phenomena. BioMed Research International 2015 (Feb 2015) 318586. 10.1155\/2015\/318586","DOI":"10.1155\/2015\/318586"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2001.990517"},{"key":"e_1_3_3_2_36_2","unstructured":"Gaojian Wang Feng Lin Tong Wu Zhenguang Liu Zhongjie Ba and Kui Ren. 2025. FSFM: A Generalizable Face Security Foundation Model via Self-Supervised Facial Representation Learning. arxiv:https:\/\/arXiv.org\/abs\/2412.12032\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2412.12032"},{"key":"e_1_3_3_2_37_2","unstructured":"Xiaoqin Wang Xusen Ma Xianxu Hou Meidan Ding Yudong Li Junliang Chen Wenting Chen Xiaoyang Peng and Linlin Shen. 2025. FaceBench: A Multi-View Multi-Level Facial Attribute VQA Dataset for Benchmarking Face Perception MLLMs. arxiv:https:\/\/arXiv.org\/abs\/2503.21457\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2503.21457"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611999"},{"key":"e_1_3_3_2_39_2","unstructured":"Zhiyu Wu and Jinshi Cui. 2023. LA-Net: Landmark-Aware Learning for Reliable Facial Expression Recognition under Label Noise. arxiv:https:\/\/arXiv.org\/abs\/2307.09023\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2307.09023"},{"key":"e_1_3_3_2_40_2","volume-title":"ACCV","author":"Yan Wen-Jing","year":"2020","unstructured":"Wen-Jing Yan, Shan Li, Chengtao Que, Jiquan Pei, and Weihong Deng. 2020. RAF-AU Database: In-the-Wild Facial Expressions with Subjective Emotion Judgement and Objective AU Annotations. In ACCV."},{"key":"e_1_3_3_2_41_2","unstructured":"Nikolay Yushev. 2019. language_tool_python: A Python Wrapper for LanguageTool. https:\/\/github.com\/jxmorris12\/language_tool_python Accessed: 2025-05-15."},{"key":"e_1_3_3_2_42_2","unstructured":"Tianyi Zhang Varsha Kishore Felix Wu Kilian\u00a0Q Weinberger and Yoav Artzi. 2019. Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1904.09675 (2019)."},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","unstructured":"Xing Zhang Lijun Yin Jeffrey\u00a0F. Cohn Shaun Canavan Michael Reale Andy Horowitz Peng Liu and Jeffrey\u00a0M. Girard. 2014. BP4D-Spontaneous: a high-resolution spontaneous 3D dynamic facial expression database. IVC 32 10 (2014) 692\u2013706. 10.1016\/j.imavis.2014.06.002Best of Automatic Face and Gesture Recognition 2013.","DOI":"10.1016\/j.imavis.2014.06.002"},{"key":"e_1_3_3_2_44_2","unstructured":"Yuhang Zhang Xiuqi Zheng Chenyi Liang Jiani Hu and Weihong Deng. 2024. Generalizable Facial Expression Recognition. arxiv:https:\/\/arXiv.org\/abs\/2408.10614\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2408.10614"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.463"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICME59968.2025.11208908"},{"key":"e_1_3_3_2_47_2","unstructured":"Yinglin Zheng Hao Yang Ting Zhang Jianmin Bao Dongdong Chen Yangyu Huang Lu Yuan Dong Chen Ming Zeng and Fang Wen. 2022. General Facial Representation Learning in a Visual-Linguistic Manner. arxiv:https:\/\/arXiv.org\/abs\/2112.03109\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2112.03109"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-demos.38"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00360"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:21:07Z","timestamp":1781536867000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810710"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":48,"alternative-id":["10.1145\/3805622.3810710","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810710","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}