{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T03:13:53Z","timestamp":1778728433113,"version":"3.51.4"},"reference-count":54,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T00:00:00Z","timestamp":1762473600000},"content-version":"vor","delay-in-days":6,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100004371","name":"Tampereen Yliopisto","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004371","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004012","name":"Jane and Aatos Erkko Fundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004012","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Speech Communication"],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1016\/j.specom.2025.103327","type":"journal-article","created":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T23:20:20Z","timestamp":1762298420000},"page":"103327","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["FinnAffect: An affective speech corpus for spontaneous Finnish"],"prefix":"10.1016","volume":"175","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-3126-1026","authenticated-orcid":false,"given":"Kalle","family":"Lahtinen","sequence":"first","affiliation":[]},{"given":"Liisa","family":"Mustanoja","sequence":"additional","affiliation":[]},{"given":"Okko","family":"R\u00e4s\u00e4nen","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.specom.2025.103327_b1","doi-asserted-by":"crossref","first-page":"26","DOI":"10.1159\/000091405","article-title":"Emotions in vowel segments of continuous speech: Analysis of the glottal flow using the normalized amplitude quotient","volume":"63","author":"Airas","year":"2006","journal-title":"Phonetica"},{"key":"10.1016\/j.specom.2025.103327_b2","doi-asserted-by":"crossref","unstructured":"Amiriparian, S., Packa\u0144, F., Gerczuk, M., Schuller, B.W., 2024a. ExHuBERT: Enhancing HuBERT Through Block Extension and Fine-Tuning on 37 Emotion Datasets. In: Proc. INTERSPEECH. pp. 2635\u20132639.","DOI":"10.21437\/Interspeech.2024-280"},{"key":"10.1016\/j.specom.2025.103327_b3","series-title":"ExHuBERT huggingface model card","author":"Amiriparian","year":"2024"},{"key":"10.1016\/j.specom.2025.103327_b4","series-title":"Proc. of the 34th International Conference on Neural Information Processing Systems","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","author":"Baevski","year":"2020"},{"key":"10.1016\/j.specom.2025.103327_b5","doi-asserted-by":"crossref","unstructured":"Burkhardt, F., Paeschke, A., Rolfes, M., Sendlmeier, W.F., Weiss, B., 2005. A database of German emotional speech. In: INTERSPEECH 2005. pp. 1517\u20131520.","DOI":"10.21437\/Interspeech.2005-446"},{"key":"10.1016\/j.specom.2025.103327_b6","doi-asserted-by":"crossref","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","article-title":"IEMOCAP: Interactive emotional dyadic motion capture database","volume":"42","author":"Busso","year":"2008","journal-title":"Lang. Resour. Eval."},{"key":"10.1016\/j.specom.2025.103327_b7","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1177\/001316446002000104","article-title":"A coefficient of agreement for nominal scales","volume":"20","author":"Cohen","year":"1960","journal-title":"Educ. Psychol. Meas."},{"key":"10.1016\/j.specom.2025.103327_b8","first-page":"1970","article-title":"Recognizing emotion in speech","volume":"vol. 3","author":"Dellaert","year":"1996"},{"key":"10.1016\/j.specom.2025.103327_b9","doi-asserted-by":"crossref","first-page":"134","DOI":"10.3758\/BRM.42.1.134","article-title":"Affective norms for 210 british english and finnish nouns","volume":"42","author":"Eilola","year":"2010","journal-title":"Behav. Res. Methods"},{"key":"10.1016\/j.specom.2025.103327_b10","doi-asserted-by":"crossref","first-page":"429","DOI":"10.1017\/S0140525X0999094X","article-title":"The myth of language universals: Language diversity and its importance for cognitive science","volume":"32","author":"Evans","year":"2009","journal-title":"Behav. Brain Sci."},{"key":"10.1016\/j.specom.2025.103327_b11","doi-asserted-by":"crossref","first-page":"190","DOI":"10.1109\/TAFFC.2015.2457417","article-title":"The geneva minimalistic acoustic parameter set (GeMAPS) for voice research and affective computing","volume":"7","author":"Eyben","year":"2016","journal-title":"IEEE Trans. Affect. Comput."},{"key":"10.1016\/j.specom.2025.103327_b12","series-title":"Proceedings of the 18th ACM International Conference on Multimedia","first-page":"1459","article-title":"Opensmile: The munich versatile and fast open-source audio feature extractor","author":"Eyben","year":"2010"},{"issue":"378","key":"10.1016\/j.specom.2025.103327_b13","article-title":"Measuring nominal scale agreement among many raters","volume":"76","author":"Fleiss","year":"1971","journal-title":"Psychol. Bull."},{"key":"10.1016\/j.specom.2025.103327_b14","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2023.127015","article-title":"A review on speech emotion recognition: A survey, recent advances, challenges, and the influence of noise","volume":"568","author":"George","year":"2024","journal-title":"Neurocomputing"},{"key":"10.1016\/j.specom.2025.103327_b15","series-title":"Odyssey 2024 emotion recognition competition baseline for arousal prediction","author":"Goncalves","year":"2024"},{"key":"10.1016\/j.specom.2025.103327_b16","series-title":"Odyssey 2024 emotion recognition competition baseline for valence prediction","author":"Goncalves","year":"2024"},{"key":"10.1016\/j.specom.2025.103327_b17","doi-asserted-by":"crossref","unstructured":"Goncalves, L., Salman, A.N., Reddy Naini, L., Thebaud, T., Paola Garcia, N., Sisman, B., Busso, C., 2024c. Odyssey2024 - speech emotion recognition challenge: Dataset, baseline framework, and results. In: Odyssey 2024: The Speaker and Language Recognition Workshop, Quebec, Canada.","DOI":"10.21437\/odyssey.2024-35"},{"key":"10.1016\/j.specom.2025.103327_b18","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1016\/0304-3975(85)90224-5","article-title":"Clustering to minimize the maximum intercluster distance","volume":"38","author":"Gonzalez","year":"1985","journal-title":"Theoret. Comput. Sci."},{"key":"10.1016\/j.specom.2025.103327_b19","doi-asserted-by":"crossref","first-page":"3451","DOI":"10.1109\/TASLP.2021.3122291","article-title":"HuBERT: Self-supervised speech representation learning by masked prediction of hidden units","volume":"29","author":"Hsu","year":"2021","journal-title":"IEEE\/ACM Trans. Audio, Speech, Lang. Process."},{"key":"10.1016\/j.specom.2025.103327_b20","series-title":"Finding Groups in Data: An Introduction to Cluster Analysis","author":"Kaufman","year":"2009"},{"key":"10.1016\/j.specom.2025.103327_b21","series-title":"Principles of Linguistic Change. Volume 1, Internal Factors","first-page":"73","author":"Labov","year":"1994"},{"key":"10.1016\/j.specom.2025.103327_b22","series-title":"Interspeech 2025","first-page":"3958","article-title":"Investigating affect mining techniques for annotation sample selection in the creation of finnish affective speech corpus","author":"Lahtinen","year":"2025"},{"key":"10.1016\/j.specom.2025.103327_b23","doi-asserted-by":"crossref","unstructured":"Lavechin, M., M\u00e9tais, M., Titeux, H., Boissonnet, A., Copet, J., Rivi\u00e8re, M., Bergelson, E., Cristia, A., Dupoux, E., Bredin, H., 2023. Brouhaha: Multi-Task Training for Voice Activity Detection, Speech-to-Noise Ratio, and C50 Room Acoustics Estimation. In: 2023 IEEE Automatic Speech Recognition and Understanding Workshop. ASRU, pp. 1\u20137.","DOI":"10.1109\/ASRU57964.2023.10389718"},{"key":"10.1016\/j.specom.2025.103327_b24","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1109\/TSA.2004.838534","article-title":"Toward detecting emotions in spoken dialogs","volume":"13","author":"Lee","year":"2005","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"10.1016\/j.specom.2025.103327_b25","series-title":"Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)","first-page":"345","article-title":"Grapheme-based cross-language forced alignment: Results with uralic languages","author":"Leinonen","year":"2021"},{"key":"10.1016\/j.specom.2025.103327_b26","series-title":"Contributions to Probability and Statistics: Essays in Honor of Harold Hotelling","first-page":"278","article-title":"Robust tests for the equality of variances","author":"Levene","year":"1960"},{"key":"10.1016\/j.specom.2025.103327_b27","doi-asserted-by":"crossref","first-page":"581","DOI":"10.1007\/s10579-023-09644-5","article-title":"FinnSentiment: a finnish social media corpus for sentiment polarity annotation","volume":"57","author":"Lind\u00e9n","year":"2023","journal-title":"Lang. Resour. Eval."},{"key":"10.1016\/j.specom.2025.103327_b28","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1371\/journal.pone.0196391","article-title":"The ryerson audio-visual database of emotional speech and song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American english","volume":"13","author":"Livingstone","year":"2018","journal-title":"PLoS One"},{"key":"10.1016\/j.specom.2025.103327_b29","doi-asserted-by":"crossref","first-page":"471","DOI":"10.1109\/TAFFC.2017.2736999","article-title":"Building naturalistic emotionally balanced speech corpus by retrieving emotional speech from existing podcast recordings","volume":"10","author":"Lotfian","year":"2019","journal-title":"IEEE Trans. Affect. Comput."},{"key":"10.1016\/j.specom.2025.103327_b30","doi-asserted-by":"crossref","first-page":"1295","DOI":"10.1007\/s10579-022-09606-3","article-title":"Lahjoita puhetta: a large-scale corpus of spoken finnish with some benchmarks","volume":"57","author":"Moisio","year":"2022","journal-title":"Lang. Resour. Eval."},{"key":"10.1016\/j.specom.2025.103327_b31","series-title":"PyTorch: An Imperative Style, High-Performance Deep Learning Library","author":"Paszke","year":"2019"},{"key":"10.1016\/j.specom.2025.103327_b32","first-page":"2825","article-title":"Scikit-learn: Machine learning in python","volume":"12","author":"Pedregosa","year":"2011","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.specom.2025.103327_b33","first-page":"88","article-title":"Which emotions are basic","volume":"vol. 69","author":"Prinz","year":"2004"},{"key":"10.1016\/j.specom.2025.103327_b34","doi-asserted-by":"crossref","DOI":"10.1016\/j.specom.2024.103102","article-title":"Analyzing the influence of different speech data corpora and speech features on speech emotion recognition: A review","volume":"162","author":"Rathi","year":"2024","journal-title":"Speech Commun."},{"key":"10.1016\/j.specom.2025.103327_b35","doi-asserted-by":"crossref","first-page":"848","DOI":"10.1037\/0022-3514.57.5.848","article-title":"A cross-cultural study of a circumplex model of affect","volume":"57","author":"Russell","year":"1989","journal-title":"J. Pers. Soc. Psychol."},{"key":"10.1016\/j.specom.2025.103327_b36","first-page":"379","article-title":"A cross-cultural investigation of emotion inferences from voice and speech: Implications for speech technology","volume":"vol. 2","author":"Scherer","year":"2000"},{"key":"10.1016\/j.specom.2025.103327_b37","doi-asserted-by":"crossref","unstructured":"Schuller, B., Steidl, S., Batliner, A., 2009. The INTERSPEECH 2009 emotion challenge. In: INTERSPEECH 2009. pp. 312\u2013315.","DOI":"10.21437\/Interspeech.2009-103"},{"key":"10.1016\/j.specom.2025.103327_b38","doi-asserted-by":"crossref","unstructured":"Schuller, B., Steidl, S., Batliner, A., Vinciarelli, A., Scherer, K., Ringeval, F., Chetouani, M., Weninger, F., Eyben, F., Marchi, E., Mortillaro, M., Salamin, H., Polychroniou, A., Valente, F., Kim, S., 2013. The INTERSPEECH 2013 computational paralinguistics challenge: social signals, conflict, emotion, autism. In: INTERSPEECH 2013. pp. 148\u2013152.","DOI":"10.21437\/Interspeech.2013-56"},{"key":"10.1016\/j.specom.2025.103327_b39","doi-asserted-by":"crossref","unstructured":"Seabold, S., Perktold, J., 2010. Statsmodels: Econometric and Statistical Modeling with Python. In: 9th Python in Science Conference.","DOI":"10.25080\/Majora-92bf1922-011"},{"key":"10.1016\/j.specom.2025.103327_b40","series-title":"MediaTeam speech corpus : a first large finnish emotional speech database","author":"Sepp\u00e4nen","year":"2003"},{"key":"10.1016\/j.specom.2025.103327_b41","doi-asserted-by":"crossref","DOI":"10.3389\/fpsyg.2016.01907","article-title":"Affective meaning, concreteness, and subjective frequency norms for Indonesian words","volume":"7","author":"Sianipar","year":"2016","journal-title":"Front. Psychol."},{"key":"10.1016\/j.specom.2025.103327_b42","unstructured":"Tampere University,, 2023. Longitudinal data of Tampere spoken language. http:\/\/urn.fi\/urn:nbn:fi:lb-2022090821. Tampere University, The Unit of Languages and Institute for the Languages of Finland and Liisa Mustanoja."},{"key":"10.1016\/j.specom.2025.103327_b43","doi-asserted-by":"crossref","first-page":"383","DOI":"10.1177\/00238309040470040301","article-title":"Automatic discrimination of emotion from spoken finnish","volume":"47","author":"Toivanen","year":"2004","journal-title":"Lang. Speech"},{"key":"10.1016\/j.specom.2025.103327_b44","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1080\/14015430500293926","article-title":"Emotions in [a]: A perceptual and acoustic study","volume":"31","author":"Toivanen","year":"2006","journal-title":"Logop. Phoniatr. Vocology"},{"key":"10.1016\/j.specom.2025.103327_b45","series-title":"The downloadable version of the longitudinal corpus of finnish spoken in helsinki (1970s, 1990s and 2010s)","author":"University of Helsinki","year":"2014"},{"key":"10.1016\/j.specom.2025.103327_b46","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1016\/j.specom.2023.02.001","article-title":"Development of a speech emotion recognizer for large-scale child-centered audio recordings from a hospital environment","volume":"148","author":"Vaaras","year":"2023","journal-title":"Speech Commun."},{"key":"10.1016\/j.specom.2025.103327_b47","doi-asserted-by":"crossref","unstructured":"Vaaras, E., Ahlqvist-Bj\u00f6rkroth, S., Drossos, K., R\u00e4s\u00e4nen, O., 2021. Automatic Analysis of the Emotional Content of Speech in Daylong Child-Centered Recordings from a Neonatal Intensive Care Unit. In: INTERSPEECH 2021. pp. 3380\u20133384.","DOI":"10.21437\/Interspeech.2021-303"},{"key":"10.1016\/j.specom.2025.103327_b48","doi-asserted-by":"crossref","unstructured":"Vaaras, E., Airaksinen, M., R\u00e4s\u00e4nen, O., 2022. Analysis of Self-Supervised Learning and Dimensionality Reduction Methods in Clustering-Based Active Learning for Speech Emotion Recognition. In: INTERSPEECH 2022. pp. 1143\u20131147.","DOI":"10.21437\/Interspeech.2022-329"},{"key":"10.1016\/j.specom.2025.103327_b49","doi-asserted-by":"crossref","first-page":"129","DOI":"10.3109\/14015439.2014.915982","article-title":"Perception of emotional nonsense sentences in China, Egypt, Estonia, Finland, Russia, Sweden, and the USA","volume":"40","author":"Waaramaa","year":"2015","journal-title":"Logop. Phoniatr. Vocology"},{"key":"10.1016\/j.specom.2025.103327_b50","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1080\/14015430500456739","article-title":"The role of F3 in the vocal expression of emotions","volume":"31","author":"Waaramaa","year":"2006","journal-title":"Logop. Phoniatr. Vocology"},{"key":"10.1016\/j.specom.2025.103327_b51","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1016\/j.jvoice.2008.04.004","article-title":"Perception of emotional valences and activity levels from vowel segments of continuous speech","volume":"24","author":"Waaramaa","year":"2010","journal-title":"J. Voice"},{"key":"10.1016\/j.specom.2025.103327_b52","doi-asserted-by":"crossref","first-page":"249","DOI":"10.1159\/000151762","article-title":"Monopitched expression of emotions in different vowels","volume":"60","author":"Waaramaa","year":"2008","journal-title":"Folia Phoniatr. Logop."},{"key":"10.1016\/j.specom.2025.103327_b53","doi-asserted-by":"crossref","first-page":"584","DOI":"10.1525\/aa.1986.88.3.02a00030","article-title":"Human emotions: Universal or culture-specific?","volume":"88","author":"Wierzbicka","year":"1986","journal-title":"Am. Anthropol."},{"key":"10.1016\/j.specom.2025.103327_b54","series-title":"Speech analyzer for analyzing pitch or frequency perturbations in individual speech pattern to determine the emotional state of the person","author":"Williamson","year":"1978"}],"updated-by":[{"DOI":"10.1016\/j.specom.2026.103396","type":"erratum","label":"Erratum","source":"publisher","updated":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000}}],"container-title":["Speech Communication"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167639325001426?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167639325001426?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T03:01:02Z","timestamp":1778727662000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167639325001426"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11]]},"references-count":54,"alternative-id":["S0167639325001426"],"URL":"https:\/\/doi.org\/10.1016\/j.specom.2025.103327","relation":{},"ISSN":["0167-6393"],"issn-type":[{"value":"0167-6393","type":"print"}],"subject":[],"published":{"date-parts":[[2025,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"FinnAffect: An affective speech corpus for spontaneous Finnish","name":"articletitle","label":"Article Title"},{"value":"Speech Communication","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.specom.2025.103327","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 The Authors. Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"103327"}}