{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:25:47Z","timestamp":1775229947269,"version":"3.50.1"},"reference-count":73,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Affective Comput."],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1109\/taffc.2025.3547218","type":"journal-article","created":{"date-parts":[[2025,3,3]],"date-time":"2025-03-03T13:34:01Z","timestamp":1741008841000},"page":"1929-1941","source":"Crossref","is-referenced-by-count":3,"title":["Testing Correctness, Fairness, and Robustness of Speech Emotion Recognition Models"],"prefix":"10.1109","volume":"16","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-2493-2182","authenticated-orcid":false,"given":"Anna","family":"Derington","sequence":"first","affiliation":[{"name":"audEERING GmbH, Gilching, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2628-8261","authenticated-orcid":false,"given":"Hagen","family":"Wierstorf","sequence":"additional","affiliation":[{"name":"audEERING GmbH, Gilching, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0930-2948","authenticated-orcid":false,"given":"Ali","family":"\u00d6zkil","sequence":"additional","affiliation":[{"name":"Jabra, GN Audio, Ballerup, Denmark"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0330-8545","authenticated-orcid":false,"given":"Florian","family":"Eyben","sequence":"additional","affiliation":[{"name":"audEERING GmbH, Gilching, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2689-0545","authenticated-orcid":false,"given":"Felix","family":"Burkhardt","sequence":"additional","affiliation":[{"name":"audEERING GmbH, Gilching, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6478-8699","authenticated-orcid":false,"given":"Bj\u00f6rn W.","family":"Schuller","sequence":"additional","affiliation":[{"name":"audEERING GmbH, Gilching, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/s42254-022-00441-7"},{"key":"ref2","first-page":"1","article-title":"Underspecification presents challenges for credibility in modern machine learning","volume":"23","author":"D\u2019Amour","year":"2022","journal-title":"J. Mach. Learn. Res."},{"key":"ref3","article-title":"On the opportunities and risks of foundation models","author":"Bommasani","year":"2021"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3287560.3287596"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/tse.2019.2962027"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1017\/9781316771273"},{"key":"ref7","first-page":"167","article-title":"An approach to software testing of machine learning applications","volume-title":"Proc. 19th Int. Conf. Softw. Eng. Knowl. Eng.","author":"Murphy","year":"2007"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3180155.3180220"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.442"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"347","DOI":"10.1145\/3377811.3380422","article-title":"DeepBillboard: Systematic physical-world testing of autonomous driving systems","volume-title":"Proc. ACM\/IEEE 42nd Int. Conf. Softw. Eng.","author":"Zhou","year":"2020"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2018.02.004"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/icassp43922.2022.9747348"},{"key":"ref13","article-title":"Best practices for noise-based augmentation to improve the performance of deployable speech-based emotion recognition systems","author":"Jaiswal","year":"2021"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2022-10371"},{"key":"ref15","article-title":"Bias and fairness on multimodal emotion detection algorithms","author":"Schmitz","year":"2022"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1147\/jrd.2019.2942287"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"ref18","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Baevski","year":"2020"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"issue":"PMLR","key":"ref20","first-page":"1298","article-title":"Data2vec: A general framework for self-supervised learning in speech, vision and language","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Baevski","year":"2022"},{"key":"ref21","first-page":"107547","article-title":"xLSTM: extended long short-term memory","volume-title":"Adv. Neural Inf. Process. Syst.","volume":"37","author":"Beck","year":"2024"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10094895"},{"key":"ref23","article-title":"Towards testing of deep learning systems with training set reduction","author":"Spieker","year":"2019"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2014.2336244"},{"key":"ref25","article-title":"Test splits for CREMA-D, emoDB, IEMOCAP, MELD, RAVDESS","author":"Wierstorf","year":"2023"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/eurospeech.1997-482"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2005-446"},{"key":"ref28","first-page":"3501","article-title":"EMOVO corpus: An italian emotional speech database","volume-title":"Proc. 9th Int. Conf. Lang. Resour. Eval.","author":"Costantini","year":"2014"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1050"},{"key":"ref31","first-page":"1597","article-title":"EmotionLines: An emotion corpus of multi-party conversations","volume-title":"Proc. 11th Int. Conf. Lang. Resour. Eval.","author":"Hsu","year":"2018"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2017.2736999"},{"key":"ref33","article-title":"Polish emotional speech database","year":"2019"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.1188976"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1111\/j.1467-9280.2007.02024.x"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1024\/1421-0185\/a000180"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICSMC.2012.6378303"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-015-3119-y"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2003.813506"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3457607"},{"issue":"1","key":"ref41","first-page":"14730","article-title":"The measure and mismeasure of fairness: A critical review of fair machine learning","volume":"24","author":"Corbett-Davies","year":"2023","journal-title":"J. Mach. Learn. Res."},{"key":"ref42","article-title":"Review of mathematical frameworks for fairness in machine learning","author":"Barrio","year":"2020"},{"key":"ref43","first-page":"120","article-title":"Fair regression: Quantitative definitions and reduction-based algorithms","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Agarwal","year":"2019"},{"key":"ref44","article-title":"Speech accent archive","author":"Weinberger","year":"2015"},{"key":"ref45","first-page":"4211","article-title":"Common voice: A massively-multilingual speech corpus","volume-title":"Proc. 12th Conf. Lang. Resour. Eval.","author":"Ardila","year":"2020"},{"key":"ref46","first-page":"479","article-title":"OPUS-MT \u2014 Building open translation services for the World","volume-title":"Proc. 22nd Annu. Conf. Eur. Assoc. Mach. Transl.","author":"Tiedemann","year":"2020"},{"key":"ref47","article-title":"Argos translate","author":"Finlay","year":"2019"},{"key":"ref48","article-title":"coqui-ai\/TTS, version 0.6.1","author":"Eren","year":"2021"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053512"},{"key":"ref50","article-title":"PRAAT: Doing phonetics by computer [computer program]","author":"Boersma","year":"2023"},{"key":"ref51","article-title":"Intriguing properties of neural networks","volume-title":"Int. Conf. Learn. Representations (ICLR)","author":"Szegedy","year":"2024"},{"key":"ref52","article-title":"Measuring neural net robustness with constraints","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Bastani","year":"2016"},{"key":"ref53","article-title":"MUSAN: A music, speech, and noise corpus","author":"Snyder","year":"2015"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/acii.2017.8273622"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2019-1525"},{"key":"ref56","first-page":"1","article-title":"Evaluation of speech dereverberation algorithms using the mardy database","volume-title":"Proc. Int. Workshop Acoustic Echo Noise Control","author":"Wen","year":"2006"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICDSP.2009.5201259"},{"key":"ref58","article-title":"Timit acoustic-phonetic continuous speech corpus","author":"Garofolo","year":"1993"},{"key":"ref59","first-page":"7669","article-title":"Libri-light: A benchmark for ASR with limited or no supervision","volume-title":"Proc. ICASSP 2020 IEEE Int. Conf. Acoustics, Speech Signal Process.","author":"Kahn","year":"2020"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1965"},{"key":"ref61","first-page":"993","article-title":"VoxPopuli: A large-scale multilingual speech corpus for representation learning, semi-supervised learning and interpretation","volume-title":"Proc. 59th Annu. Meeting Assoc. Comput. Linguistics, 11th Int. Joint Conf. Natural Lang. Process. (Volume 1: Long Papers)","author":"Wang","year":"2021"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3263585"},{"key":"ref64","first-page":"34","article-title":"Switchboard-1 release 2 LDC97S62","author":"Godfrey","year":"1993","journal-title":"Linguistic Data Consortium"},{"key":"ref65","article-title":"Fisher english training speech part 1 transcript","author":"Cieri","year":"2004"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2826"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383459"},{"key":"ref68","first-page":"16","article-title":"Speech recognition and keyword spotting for low-resource languages: Babel project research at cued","volume-title":"Proc. 4th Int. Workshop Spoken Lang. Technol. Under-Resourced Lang.","author":"Gales","year":"2014"},{"key":"ref69","article-title":"Audio xLSTM: Learning self-supervised audio representations with xLSTM","author":"Yadav","year":"2024"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.21437\/odyssey.2024-35"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3030497"},{"key":"ref73","article-title":"Distilling the knowledge in a neural network","volume-title":"Proc. Adv. Neural Inf. Process. Syst. deep learn. workshop","author":"Hinton","year":"2015"}],"container-title":["IEEE Transactions on Affective Computing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5165369\/11152495\/10908859.pdf?arnumber=10908859","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T05:18:32Z","timestamp":1757135912000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10908859\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7]]},"references-count":73,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/taffc.2025.3547218","relation":{},"ISSN":["1949-3045","2371-9850"],"issn-type":[{"value":"1949-3045","type":"electronic"},{"value":"2371-9850","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7]]}}}