{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,27]],"date-time":"2026-06-27T15:33:23Z","timestamp":1782574403724,"version":"3.54.5"},"reference-count":138,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100011038","name":"Office of the Director of National Intelligence (ODNI)","doi-asserted-by":"publisher","award":["2022-21102100005"],"award-info":[{"award-number":["2022-21102100005"]}],"id":[{"id":"10.13039\/100011038","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100011039","name":"Intelligence Advanced Research Projects Activity (IARPA)","doi-asserted-by":"publisher","award":["2022-21102100005"],"award-info":[{"award-number":["2022-21102100005"]}],"id":[{"id":"10.13039\/100011039","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.01058","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"11369-11382","source":"Crossref","is-referenced-by-count":6,"title":["FaceXFormer: A Unified Transformer for Facial Analysis"],"prefix":"10.1109","author":[{"given":"Kartik","family":"Narayan","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Vibashan","family":"VS","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Rama","family":"Chellappa","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Vishal M.","family":"Patel","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.matpr.2020.08.350"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00077"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00753"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00166"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR48806.2021.9412608"},{"key":"ref6","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.191"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2020.11.008"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00123"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.396"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110263"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00074"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00699"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00482"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01173"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00598"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00047"},{"key":"ref19","article-title":"An image is worth $16 \\times 16$ words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020","journal-title":"arXiv preprint arXiv"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-012-0549-0"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00245"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2018.00238"},{"key":"ref23","article-title":"Learning expectation of label distribution for facial age and attractiveness estimation","author":"Gao","year":"2020","journal-title":"arXiv preprint arXiv"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ATSIP49331.2020.9231835"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46487-9_6"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58565-5_20"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2738004"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11229"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952703"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2018.2866770"},{"key":"ref31","article-title":"Labeled faces in the wild: A database forstudying face recognition in unconstrained environments","volume-title":"Workshop on faces in\u2019Real-Life\u2019Images: detection, alignment, and recognition","author":"Huang","year":"2008"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-49409-8_14"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01521-4"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.450"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00159"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00159"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01819"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00826"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-54534-4_15"},{"key":"ref41","article-title":"Hih: Towards more accurate face alignment via heatmap in heatmap","author":"Lan","year":"2021","journal-title":"arXiv preprint arXiv"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00559"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2015.7301352"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3049955"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.277"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01368"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02640"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2886767"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3082319"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00580"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3081873"},{"key":"ref52","article-title":"Query2label: A simple transformer way to multi-label classification. arxiv 2021","author":"Liu","year":"2021","journal-title":"arXiv preprint arXiv"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.425"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.425"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.3390\/app10093135"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2018.2820048"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2020.2969189"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICRIS52159.2020.00041"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02185"},{"key":"ref60","article-title":"Dynamic multitask learning for face recognition with facial expression","author":"Ming","year":"2019","journal-title":"arXiv preprint arXiv"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2858821"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2017.2740923"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.250"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2007.4357803"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00939"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/wacv61041.2025.00088"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i6.32661"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.532"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46466-4_5"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00120"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3304724"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72754-2_14"},{"key":"ref73","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International conference on machine learning","author":"Radford","year":"2021"},{"key":"ref74","first-page":"8821","article-title":"Zero-shot text-to-image generation","volume-title":"International Conference on Machine Learning","author":"Ramesh","year":"2021"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2781233"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2017.137"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_2"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/cvprw.2018.00281"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2013.59"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02009"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2016.7477558"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00618"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2015.132"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01820"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01174"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.3390\/app11125366"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-67558-9_28"},{"key":"ref88","article-title":"Task-adaptive q-face","author":"Sun","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref89","article-title":"Deep learning face representation by joint identificationverification","volume":"27","author":"Sun","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.220"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58610-2_16"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3113780"},{"key":"ref93","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2019.102846"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3046323"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00552"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2983686"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.52202\/068431-0412"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00693"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2956143"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01813"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01100"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.420"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2909652"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3167743"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.230"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19778-9_10"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00227"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.417"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.606"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00402"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2105.15203"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00162"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_47"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00118"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/FG57933.2023.10042570"},{"key":"ref118","article-title":"Florence: A new foundation model for computer vision","author":"Yuan","year":"2021","journal-title":"arXiv preprint arXiv"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01190"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_14"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00859"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2018.2800901"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.212"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10599-4_7"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.463"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR48806.2021.9413000"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00412"},{"issue":"7","key":"ref128","first-page":"5","article-title":"Cross-pose lfw: A database for studying cross-pose face recognition in unconstrained environments","volume":"5","author":"Zheng","year":"2018","journal-title":"Beijing University of Posts and Telecommunications, Tech. Rep"},{"key":"ref129","article-title":"Crossage lfw: A database for studying cross-age face recognition in unconstrained environments","author":"Zheng","year":"2017","journal-title":"arXiv preprint arXiv"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01814"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.5244\/C.34.189"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01485"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00918"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00515"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.23"},{"key":"ref136","article-title":"Deformable detr: Deformable transformers for end-to-end object detection","author":"Zhu","year":"2020","journal-title":"arXiv preprint arXiv"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2018.8545271"},{"key":"ref138","article-title":"Segment everything everywhere all at once","volume":"36","author":"Zou","year":"2024","journal-title":"Advances in Neural Information Processing Systems"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11444244.pdf?arnumber=11444244","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T06:18:24Z","timestamp":1777529904000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11444244\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":138,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.01058","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}