{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T14:46:31Z","timestamp":1770821191865,"version":"3.50.1"},"reference-count":74,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2211550"],"award-info":[{"award-number":["2211550"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000183","name":"Army Research Office","doi-asserted-by":"publisher","award":["W911NF-20-2-0053"],"award-info":[{"award-number":["W911NF-20-2-0053"]}],"id":[{"id":"10.13039\/100000183","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1925001"],"award-info":[{"award-number":["1925001"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Affective Comput."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/taffc.2025.3568024","type":"journal-article","created":{"date-parts":[[2025,5,8]],"date-time":"2025-05-08T13:39:13Z","timestamp":1746711553000},"page":"3000-3013","source":"Crossref","is-referenced-by-count":2,"title":["SetPeER: Set-Based Personalized Emotion Recognition With Weak Supervision"],"prefix":"10.1109","volume":"16","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5883-5954","authenticated-orcid":false,"given":"Minh","family":"Tran","sequence":"first","affiliation":[{"name":"Institute for Creative Technologies, Department of Computer Science, University of Southern California, Los Angeles, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5558-2421","authenticated-orcid":false,"given":"Yufeng","family":"Yin","sequence":"additional","affiliation":[{"name":"Institute for Creative Technologies, Department of Computer Science, University of Southern California, Los Angeles, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5873-1434","authenticated-orcid":false,"given":"Mohammad","family":"Soleymani","sequence":"additional","affiliation":[{"name":"Institute for Creative Technologies, Department of Computer Science, University of Southern California, Los Angeles, CA, USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-2170"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.comcom.2021.09.013"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747754"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/FG52635.2021.9666982"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1037\/14669-004"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3263585"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-020-09405-4"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2022.3187336"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2019.09.005"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2016.02.018"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/SMAP.2018.8501881"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"ref13","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Baevski","year":"2020"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2017.2736999"},{"key":"ref15","first-page":"10078","article-title":"VideoMAE: Masked autoencoders are data-efficient learners for self-supervised video pre-training","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Tong","year":"2022"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2016.2515617"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.1188976"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/MMUL.2012.26"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-74889-2_43"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2013.6553805"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2944808"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2010.5583006"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2014.2336244"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2008.4607572"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3382507.3418872"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1208"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/230"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3233184"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414496"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.3390\/s18113744"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2016.2625250"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/2663204.2663247"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874246"},{"key":"ref35","first-page":"485","article-title":"A personalized affective memory model for improving emotion recognition","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Barros","year":"2019"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2020.3002657"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ACII55700.2022.9953863"},{"key":"ref38","first-page":"3391","article-title":"Deep sets","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Zaheer","year":"2017"},{"key":"ref39","first-page":"3744","article-title":"Set transformer: A framework for attention-based permutation-invariant neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lee","year":"2019"},{"key":"ref40","first-page":"1410","article-title":"Rep the Set: Neural networks for learning set representations","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Skianis","year":"2020"},{"key":"ref41","article-title":"Learning prototype-oriented set representations for meta-learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Guo","year":"2021"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240578"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1929"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.600"},{"key":"ref46","article-title":"Expression, affect, action unit recognition: Aff-wild2, multi-task learning and arcface","volume-title":"Brit. Mach. Vis. Conf.","author":"Kollias","year":"2019"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"ref48","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford","year":"2023"},{"key":"ref49","article-title":"RoBERTa: A robustly optimized bert pretraining approach","author":"Liu","year":"2019"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.372"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/S18-1001"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1050"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1404"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1080\/02699939208411068"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447678"},{"key":"ref56","first-page":"1298","article-title":"Data2vec: A general framework for self-supervised learning in speech, vision and language","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Baevski","year":"2022"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00259"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00626"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00461"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00479"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00469"},{"key":"ref62","first-page":"1180","article-title":"Unsupervised domain adaptation by backpropagation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ganin","year":"2015"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.2307\/2223319"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref65","first-page":"6309","article-title":"Neural discrete representation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Oord","year":"2017"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acllong.353"},{"key":"ref67","article-title":"Representation learning with contrastive predictive coding","author":"Oord","year":"2018"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.2307\/j.ctvcm4g18.8"},{"key":"ref69","article-title":"Decoupled weight decay regularization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Loshchilov","year":"2019"},{"issue":"11","key":"ref70","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"Maaten","year":"2008","journal-title":"J. Mach. Learn. Res."},{"key":"ref71","first-page":"61","article-title":"Could speaker, gender or age awareness be beneficial in speech-based emotion recognition?","volume-title":"Proc. 10th Int. Conf. Lang. Resour. Eval.","author":"Sidorov","year":"2016"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1656"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.746"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1145\/3437359.3465586"}],"container-title":["IEEE Transactions on Affective Computing"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/5165369\/11269911\/10993348-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5165369\/11269911\/10993348.pdf?arnumber=10993348","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:58:49Z","timestamp":1764269929000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10993348\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":74,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/taffc.2025.3568024","relation":{},"ISSN":["1949-3045","2371-9850"],"issn-type":[{"value":"1949-3045","type":"electronic"},{"value":"2371-9850","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}