{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T16:00:48Z","timestamp":1772553648477,"version":"3.50.1"},"reference-count":44,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,1,5]],"date-time":"2023-01-05T00:00:00Z","timestamp":1672876800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,5]],"date-time":"2023-01-05T00:00:00Z","timestamp":1672876800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100000761","name":"Imperial College London","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000761","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,1,5]]},"DOI":"10.1109\/fg57933.2023.10042638","type":"proceedings-article","created":{"date-parts":[[2023,2,16]],"date-time":"2023-02-16T23:02:10Z","timestamp":1676588530000},"page":"1-8","source":"Crossref","is-referenced-by-count":6,"title":["SS-VAERR: Self-Supervised Apparent Emotional Reaction Recognition from Video"],"prefix":"10.1109","author":[{"given":"Marija","family":"Jegorova","sequence":"first","affiliation":[{"name":"Meta Reality Labs,London,United Kingdom"}]},{"given":"Stavros","family":"Petridis","sequence":"additional","affiliation":[{"name":"Meta Reality Labs,London,United Kingdom"}]},{"given":"Maja","family":"Pantic","sequence":"additional","affiliation":[{"name":"Meta Reality Labs,London,United Kingdom"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCTEC.2017.00022"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00896"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2021.3063609"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2944808"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00610"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2013.6553805"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2017.2764438"},{"key":"ref8","article-title":"Pre-training strategies and datasets for facial representation learning","volume":"abs\/2103.16554","author":"Bulat","year":"2021","journal-title":"CoRR"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ACII52823.2021.9597460"},{"key":"ref10","article-title":"Affectnet: A database for facial expression, valence, and arousal computing in the wild","author":"Mollahosseini","year":"2017","journal-title":"arXiv"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2011.07.002"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01158-4"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3497510"},{"key":"ref14","article-title":"Emerging properties in self-supervised vision transformers","volume":"abs\/2104.14294","author":"Caron","year":"2021","journal-title":"CoRR"},{"key":"ref15","first-page":"297","article-title":"Expression, affect, action unit recognition: Aff-wild2, multi-task learning and arcface","volume-title":"30th British Machine Vision Conference 2019, BMVC 2019","author":"Kollias","year":"2019"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2010.1"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/2808196.2811642"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3266302.3266316"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3242969.3242972"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-020-00280-0"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.222"},{"key":"ref22","article-title":"Very deep convolutional networks for large-scale image recognition","volume-title":"International Conference on Learning Representations, ICLR 2015, Conference Track Proceedings","author":"Simonyan","year":"2015"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2009.191"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-019-05855-6"},{"key":"ref27","article-title":"Self-supervised representation learning: Introduction, advances and challenges","volume":"abs\/2110.09327","author":"Ericsson","year":"2021","journal-title":"CoRR"},{"key":"ref28","article-title":"Unsupervised representation learning by predicting image rotations","volume-title":"International Conference on Learning Representations, ICLR 2018","author":"Gidaris"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.278"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46487-9_40"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s00371-018-1613-8"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46466-4_5"},{"key":"ref33","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","volume-title":"Proceedings of the 37th International Conference on Machine Learning, ICML 2020","volume":"119","author":"Chen"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01118"},{"key":"ref36","article-title":"Lira: Learning visual speech representations from audio through self-supervision","volume":"abs\/2106.09171","author":"Ma","year":"2021","journal-title":"CoRR"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3462244.3479955"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref40","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","volume-title":"International Conference on Learning Representations, ICLR","author":"Dosovitskiy","year":"2021"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-94"},{"key":"ref42","article-title":"LRS3-TED: a large-scale dataset for visual speech recognition","volume":"abs\/1809.00496","author":"Afouras","year":"2018","journal-title":"CoRR"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00525"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.116"}],"event":{"name":"2023 IEEE 17th International Conference on Automatic Face and Gesture Recognition (FG)","location":"Waikoloa Beach, HI, USA","start":{"date-parts":[[2023,1,5]]},"end":{"date-parts":[[2023,1,8]]}},"container-title":["2023 IEEE 17th International Conference on Automatic Face and Gesture Recognition (FG)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10042469\/10042499\/10042638.pdf?arnumber=10042638","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T13:15:45Z","timestamp":1707830145000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10042638\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,5]]},"references-count":44,"URL":"https:\/\/doi.org\/10.1109\/fg57933.2023.10042638","relation":{},"subject":[],"published":{"date-parts":[[2023,1,5]]}}}