{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T05:26:11Z","timestamp":1780637171790,"version":"3.54.1"},"reference-count":74,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,8,21]],"date-time":"2022-08-21T00:00:00Z","timestamp":1661040000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,8,21]],"date-time":"2022-08-21T00:00:00Z","timestamp":1661040000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,8,21]]},"DOI":"10.1109\/icpr56361.2022.9956589","type":"proceedings-article","created":{"date-parts":[[2022,11,29]],"date-time":"2022-11-29T14:34:13Z","timestamp":1669732453000},"page":"2589-2596","source":"Crossref","is-referenced-by-count":21,"title":["Multimodal Emotion Recognition with Modality-Pairwise Unsupervised Contrastive Loss"],"prefix":"10.1109","author":[{"given":"Riccardo","family":"Franceschini","sequence":"first","affiliation":[{"name":"Eurecat, Centre Tecnol&#x00F2;gic de Catalunya,Cerdanyola del Valles,Spain"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Enrico","family":"Fini","sequence":"additional","affiliation":[{"name":"University of Trento,Department of Information Engineering and Computer Science,Trento,Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Cigdem","family":"Beyan","sequence":"additional","affiliation":[{"name":"University of Trento,Department of Information Engineering and Computer Science,Trento,Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alessandro","family":"Conti","sequence":"additional","affiliation":[{"name":"University of Trento,Department of Information Engineering and Computer Science,Trento,Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Federica","family":"Arrigoni","sequence":"additional","affiliation":[{"name":"University of Trento,Department of Information Engineering and Computer Science,Trento,Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Elisa","family":"Ricci","sequence":"additional","affiliation":[{"name":"University of Trento,Department of Information Engineering and Computer Science,Trento,Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref73","article-title":"CMU Multimodal SDK","year":"0"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2019.104886"},{"key":"ref71","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1371\/journal.pone.0196391","article-title":"The ryerson audio-visual database of emotional speech and song (ravdess): A dynamic, multimodal set of facial and vocal expressions in north american english","volume":"13","author":"livingstone","year":"2018","journal-title":"PLoS ONE"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00689"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1142"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-85099-1_8"},{"key":"ref38","first-page":"35","author":"sharma","year":"2021","journal-title":"A Survey on Automatic Multimodal Emotion Recognition in the Wild"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00156"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/s00779-019-01235-y"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414996"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682541"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2020.532279"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.3390\/s20195559"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/SISY52375.2021.9582508"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01258-8_7"},{"key":"ref60","article-title":"Attention is all you need","volume":"30","author":"vaswani","year":"2017","journal-title":"NeurIPS"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00675"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2016.2603342"},{"key":"ref63","article-title":"The kinetics human action video dataset","volume":"abs 1705 6950","author":"kay","year":"2017","journal-title":"CoRR"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/MWSCAS47672.2021.9531812"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.116"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3116530"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/505"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461829"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i02.5492"},{"key":"ref67","doi-asserted-by":"crossref","first-page":"18","DOI":"10.25080\/Majora-7b98e3ed-003","article-title":"librosa: Audio and music signal analysis in python","volume":"8","author":"mcfee","year":"2015","journal-title":"the 14th Python in Science Conference"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1673"},{"key":"ref69","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"radford","year":"2021","journal-title":"ICML"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2006-157"},{"key":"ref1","article-title":"SPRING: Socially pertinent robots in gerontological healthcare","year":"0"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1891"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2021.04.023"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.417"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413910"},{"key":"ref23","first-page":"22 243","article-title":"Big self-supervised models are strong semi-supervised learners","volume":"33","author":"chen","year":"2020","journal-title":"NeurIPS"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.challengehml-1.3"},{"key":"ref25","article-title":"Unsupervised multimodal language representations using convolutional autoencoders","volume":"abs 2110 3007","author":"koromilas","year":"2021","journal-title":"ArXiv"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.3390\/s21165452"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/MMUL.2021.3080305"},{"key":"ref59","article-title":"3d human action representation learning via cross-view consistency pursuit","author":"linguo","year":"2021","journal-title":"CVPR"},{"key":"ref58","article-title":"Unsupervised Human Action Recognition with Skeletal Graph Laplacian and Self-Supervised Viewpoints Invariance","author":"paoletti","year":"2021","journal-title":"BMVC"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240578"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1929"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.nlpbt-1.1"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICSEC47112.2019.8974707"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/s12652-021-03529-7"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2798607"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2021.3095425"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/2522848.2531741"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICEIC49074.2020.9051332"},{"key":"ref13","first-page":"1","article-title":"Dynamic emotion modeling with learnable graphs and graph inception network","author":"shirian","year":"2021","journal-title":"IEEE Trans on Multimedia"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.143"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2019.00130"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2021.103178"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2984368"},{"key":"ref18","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","author":"chen","year":"2020","journal-title":"ICML"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00377"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICMCS.2018.8525872"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/IWCMC51323.2021.9498861"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1080\/0361073X.2014.882210"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MCE.2021.3062802"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2013.00118"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1080\/02699931.2018.1454403"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383618"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2019.00184"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1566"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-3304"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP40778.2020.9191019"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/3380744"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2019.8925444"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.challengehml-1.1"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/K18-1025"},{"key":"ref43","article-title":"Multimodal language analysis in the wild: Cmu-mosei dataset and interpretable dynamic fusion graph","author":"zadeh","year":"2018","journal-title":"ACL"}],"event":{"name":"2022 26th International Conference on Pattern Recognition (ICPR)","location":"Montreal, QC, Canada","start":{"date-parts":[[2022,8,21]]},"end":{"date-parts":[[2022,8,25]]}},"container-title":["2022 26th International Conference on Pattern Recognition (ICPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9956007\/9955631\/09956589.pdf?arnumber=9956589","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,19]],"date-time":"2022-12-19T15:04:12Z","timestamp":1671462252000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9956589\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,21]]},"references-count":74,"URL":"https:\/\/doi.org\/10.1109\/icpr56361.2022.9956589","relation":{},"subject":[],"published":{"date-parts":[[2022,8,21]]}}}