{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,10]],"date-time":"2025-04-10T05:13:50Z","timestamp":1744262030299,"version":"3.37.3"},"reference-count":75,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2023,4,1]],"date-time":"2023-04-01T00:00:00Z","timestamp":1680307200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"},{"start":{"date-parts":[[2023,4,1]],"date-time":"2023-04-01T00:00:00Z","timestamp":1680307200000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100008982","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-2016719","CAREER IIS-1453781"],"award-info":[{"award-number":["CNS-2016719","CAREER IIS-1453781"]}],"id":[{"id":"10.13039\/501100008982","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Affective Comput."],"published-print":{"date-parts":[[2023,4,1]]},"DOI":"10.1109\/taffc.2021.3127390","type":"journal-article","created":{"date-parts":[[2021,11,11]],"date-time":"2021-11-11T20:30:35Z","timestamp":1636662635000},"page":"1376-1390","source":"Crossref","is-referenced-by-count":8,"title":["Quantifying Emotional Similarity in Speech"],"prefix":"10.1109","volume":"14","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3633-6756","authenticated-orcid":false,"given":"John","family":"Harvill","sequence":"first","affiliation":[{"name":"Department of Electrical and Computer Engineering, The University of Texas at Dallas, Richardson, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1175-1577","authenticated-orcid":false,"given":"Seong-Gyun","family":"Leem","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, The University of Texas at Dallas, Richardson, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1316-6141","authenticated-orcid":false,"given":"Mohammed","family":"AbdelWahab","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, The University of Texas at Dallas, Richardson, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1891-0267","authenticated-orcid":false,"given":"Reza","family":"Lotfian","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, The University of Texas at Dallas, Richardson, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4075-4072","authenticated-orcid":false,"given":"Carlos","family":"Busso","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, The University of Texas at Dallas, Richardson, TX, USA"}]}],"member":"263","reference":[{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2064164"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2898816"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2009.4959919"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2010.10.001"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-60"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2017.8273608"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/1460676.1460684"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2018.2879512"},{"key":"ref53","first-page":"1","article-title":"A siamese neural network with modified distance loss for transfer learning in speech emotion recognition","author":"feng","year":"2020","journal-title":"Proc AAAI-20 Workshop Affective Content Anal Interactive Affect Response"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2019.2928297"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2017.2695460"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1391"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICMEW.2014.6890568"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"1103","DOI":"10.21437\/Interspeech.2017-1494","article-title":"Jointly predicting arousal, valence and dominance with multi-task learning","author":"parthasarathy","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2014.2352268"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2478"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953107"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472670"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.3813\/AAA.919214"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR48806.2021.9413144"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953194"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1209"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-62365-4_48"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2278"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC45102.2020.9294562"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2991484"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1608"},{"key":"ref43","article-title":"Deep speaker: An end-to-end neural speaker embedding system","author":"li","year":"2017","journal-title":"arXiv 1705 02304"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1432"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1109\/TSA.2004.838534","article-title":"Toward detecting emotions in spoken dialogs","volume":"13","author":"lee","year":"2005","journal-title":"IEEE Trans Speech Audio Process"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2010-643"},{"key":"ref9","first-page":"4157","article-title":"Analyzing the memory of BLSTM neural networks for enhanced emotion classification in dyadic spoken interactions","author":"w\u00f6llmer","year":"2012","journal-title":"Proc Int Conf Acoust Speech Signal Process"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/s10044-006-0025-y"},{"key":"ref3","article-title":"Big data analytics and AI in mental healthcare","author":"rosenfeld","year":"2019","journal-title":"arXiv 1903 12071"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-47987-2_93"},{"key":"ref5","first-page":"7","article-title":"Integrating affect sensors in an intelligent tutoring system","author":"d\u2019mello","year":"2005","journal-title":"Proc Int Conf Intell User Interfaces Affect Interact"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.194"},{"key":"ref35","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","author":"chen","year":"2020","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472194"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00393"},{"key":"ref36","article-title":"Speech SIMCLR: Combining contrastive and reconstruction objective for self-supervised speech representation learning","author":"jiang","year":"2020","journal-title":"arXiv 2010 13991"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1142\/S0218001493000339"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1037\/\/0022-3514.76.5.805"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3031549"},{"key":"ref74","first-page":"12449","article-title":"Wav2vec 2.0: A framework for self-supervised learning of speech representations","author":"baevski","year":"2020","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.202"},{"key":"ref32","first-page":"1","article-title":"Siamese neural networks for one-shot image recognition","author":"koch","year":"2015","journal-title":"Proc ICML Deep Learn Workshop"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/S0272-7358(02)00130-7"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TBME.2010.2091640"},{"key":"ref39","first-page":"354","article-title":"In defense of the triplet loss for person re-identification","author":"hermans","year":"0","journal-title":"in Proc IEEE\/CVF Conf Comput Vis Pattern Recognit Workshops"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_28"},{"key":"ref71","article-title":"MobileNets: Efficient convolutional neural networks for mobile vision applications","author":"howard","year":"2017","journal-title":"arXiv 1704 04861"},{"article-title":"YAMNet","year":"2020","author":"plakal","key":"ref70"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1242"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2015-330"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3023632"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472668"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(03)00138-2"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-429"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2018.2878715"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2457417"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2593944"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2493525"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2015.7344648"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2014.01.003"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874246"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1052"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2013-56"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683273"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2017.2736999"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1145\/3242969.3243019"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-24600-5_47"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2015.7344627"}],"container-title":["IEEE Transactions on Affective Computing"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/5165369\/10138707\/9612052-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5165369\/10138707\/09612052.pdf?arnumber=9612052","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,19]],"date-time":"2023-06-19T18:10:12Z","timestamp":1687198212000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9612052\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,1]]},"references-count":75,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/taffc.2021.3127390","relation":{},"ISSN":["1949-3045","2371-9850"],"issn-type":[{"type":"electronic","value":"1949-3045"},{"type":"electronic","value":"2371-9850"}],"subject":[],"published":{"date-parts":[[2023,4,1]]}}}