{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T08:40:47Z","timestamp":1771922447337,"version":"3.50.1"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100007706","name":"Ministry of Economic Development","doi-asserted-by":"publisher","award":["139-15-2025-010"],"award-info":[{"award-number":["139-15-2025-010"]}],"id":[{"id":"10.13039\/501100007706","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccvw69036.2025.00013","type":"proceedings-article","created":{"date-parts":[[2026,2,23]],"date-time":"2026-02-23T20:44:02Z","timestamp":1771879442000},"page":"71-79","source":"Crossref","is-referenced-by-count":0,"title":["Zero-Shot Multimodal Compound Expression Recognition Approach Using Off-the-Shelf Large Visual-Language Models"],"prefix":"10.1109","author":[{"given":"Elena","family":"Ryumina","sequence":"first","affiliation":[{"name":"St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia"}]},{"given":"Maxim","family":"Markitantov","sequence":"additional","affiliation":[{"name":"St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia"}]},{"given":"Alexandr","family":"Axyonov","sequence":"additional","affiliation":[{"name":"St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia"}]},{"given":"Dmitry","family":"Ryumin","sequence":"additional","affiliation":[{"name":"St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia"}]},{"given":"Mikhail","family":"Dolgushin","sequence":"additional","affiliation":[{"name":"St. Petersburg Federal Research Center of the Russian Academy of Sciences,St. Petersburg,Russia"}]},{"given":"Alexey","family":"Karpov","sequence":"additional","affiliation":[{"name":"ITMO University,St. Petersburg,Russia"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"ref2","first-page":"10041","article-title":"Transformers are ssms: generalized models and efficient algorithms through structured state space dualit","volume-title":"International Conference on Machine Learning","author":"Dao"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3340555.3355710"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00259"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-25075-0_12"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00541"},{"key":"ref7","first-page":"1","article-title":"Expression, affect, action unit recognition: Aff-Wild2, multi-task learning and arcfac","author":"Kollias","journal-title":"arXiv preprint"},{"key":"ref8","first-page":"1","article-title":"Affect analysis in-the-wild: Valence-arousal, expressions, action units and a unified framewor","author":"Kollias","year":"2021","journal-title":"arXiv preprint"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/iccvw54120.2021.00408"},{"key":"ref10","first-page":"1","article-title":"Face behavior a la carte: Expressions, affect and action units in a single networ","author":"Kollias","year":"2019","journal-title":"arXiv preprint"},{"key":"ref11","first-page":"1","article-title":"Deep affect prediction in-the-wild: Aff-wild database and challenge, deep architectures, and beyo","author":"Kollias","year":"2019","journal-title":"IJCV"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/fg47880.2020.00126"},{"key":"ref13","first-page":"1","article-title":"Distribution matching for heterogeneous multi-task learning: a large-scale face stud","author":"Kollias","year":"2021","journal-title":"arXiv preprint"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00626"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i3.28061"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/cvprw63382.2024.00461"},{"key":"ref17","first-page":"1","article-title":"7th abaw competition: Multi-task learning and compound expression recognitio","author":"Kollias","year":"2024","journal-title":"arXiv preprint"},{"key":"ref18","first-page":"1","article-title":"Dvd: A comprehensive dataset for advancing violence detection in real-world scenario","author":"Kollias","year":"2025","journal-title":"arXiv preprint"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW67362.2025.00554"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-91581-9_20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-91581-9_19"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW67362.2025.00576"},{"key":"ref23","first-page":"1","article-title":"Mediapipe: A framework for building perception pipeline","author":"Lugaresi","year":"2019","journal-title":"arXiv preprint"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00476"},{"key":"ref25","first-page":"8748","article-title":"Learning transferable visual models from natural language supervisio","volume-title":"International Conference on Machine Learning","author":"Radford","year":"2021"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-91581-9_5"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.10.013"},{"key":"ref28","first-page":"1","article-title":"Team ras in 9th abaw competition: Multimodal compound expression recognition approac","author":"Ryumina","year":"2025","journal-title":"arXiv preprint"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00473"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-91581-9_18"},{"key":"ref31","first-page":"1","article-title":"jina-embeddings-v3: Multilingual embeddings with task lor","author":"Sturua","year":"2024","journal-title":"arXiv preprint"},{"key":"ref32","first-page":"1","article-title":"Attention is all you nee","author":"Vaswani","year":"2017","journal-title":"NeurIPS"},{"key":"ref33","first-page":"1","article-title":"Zero-shot compound expression recognition with visual language model at the 6th abaw challeng","author":"Wang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.12.102"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00491"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.248"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision Workshops (ICCVW)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,20]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision Workshops (ICCVW)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11373940\/11374285\/11375783.pdf?arnumber=11375783","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T07:35:59Z","timestamp":1771918559000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11375783\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/iccvw69036.2025.00013","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}