{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T15:20:18Z","timestamp":1776093618831,"version":"3.50.1"},"reference-count":77,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,8,13]],"date-time":"2022-08-13T00:00:00Z","timestamp":1660348800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,8,13]],"date-time":"2022-08-13T00:00:00Z","timestamp":1660348800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100004681","name":"Higher Education Commission, Pakistan","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100004681","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Lang Resources &amp; Evaluation"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1007\/s10579-022-09610-7","type":"journal-article","created":{"date-parts":[[2022,8,13]],"date-time":"2022-08-13T17:02:36Z","timestamp":1660410156000},"page":"915-944","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Speech emotion recognition for the Urdu language"],"prefix":"10.1007","volume":"57","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7074-2988","authenticated-orcid":false,"given":"Nimra","family":"Zaheer","sequence":"first","affiliation":[]},{"given":"Obaid Ullah","family":"Ahmad","sequence":"additional","affiliation":[]},{"given":"Mudassir","family":"Shabbir","sequence":"additional","affiliation":[]},{"given":"Agha Ali","family":"Raza","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,8,13]]},"reference":[{"key":"9610_CR1","unstructured":"Ali, H., Ahmad, N., Yahya, K. M., & Farooq, O. (2012). A medium vocabulary Urdu isolated words balanced corpus for automatic speech recognition. In 2012 international conference on electronics computer technology (ICECT 2012) (pp. 473\u2013476)."},{"key":"9610_CR2","doi-asserted-by":"crossref","unstructured":"Atta, F., van\u00a0de Weijer, J., & Zhu, L. (2020). Saraiki. Journal of the International Phonetic Association, 1\u201321.","DOI":"10.1017\/S0025100320000328"},{"key":"9610_CR3","unstructured":"Bahdanau, D., Cho, K., & Bengio, Y. (2015). Neural machine translation by jointly learning to align and translate. In 3rd international conference on learning representations (ICLR 2015). Computational and Biological Learning Society."},{"key":"9610_CR4","unstructured":"Batliner, A., Fischer, K., Huber, R., Spilker, J., &  N\u00f6th, E. (2000). Desperately seeking emotions or: Actors, wizards, and human beings. ISCA tutorial and research workshop (ITRW) on speech and emotion. International Speech Communication Association."},{"key":"9610_CR5","unstructured":"Batliner, A., Steidl, S., & N\u00f6th, E. (2008). Releasing a thoroughly annotated and processed spontaneous emotional database: The FAU Aibo emotion corpus. In Proceedings of a satellite workshop of IREC (p.\u00a028). European Language Resources Association."},{"key":"9610_CR6","doi-asserted-by":"crossref","unstructured":"Burkhardt, F., Paeschke, A., Rolfes, M., Sendlmeier, W. F., & Weiss, B. (2005). A database of German emotional speech. In Ninth European conference on speech communication and technology (pp.\u00a01517\u20131520). Lisbon, PortugalInternational Speech Communication Association.","DOI":"10.21437\/Interspeech.2005-446"},{"issue":"4","key":"9610_CR7","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso, C., Bulut, M., Lee, C.-C., Kazemzadeh, A., Mower, E., Kim, S., & Narayanan, S. S. (2008). IEMOCAP: Interactive emotional dyadic motion capture database. Language Resources and Evaluation, 42(4), 335.","journal-title":"Language resources and evaluation."},{"issue":"1","key":"9610_CR8","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1109\/TAFFC.2016.2515617","volume":"8","author":"C Busso","year":"2016","unstructured":"Busso, C., Parthasarathy, S., Burmania, A., AbdelWahab, M., Sadoughi, N., & Provost, E. M. (2016). MSP-IMPROV: An acted corpus of dyadic interactions to study emotion perception. IEEE Transactions on Affective Computing., 8(1), 67\u201380.","journal-title":"IEEE Transactions on Affective Computing."},{"key":"9610_CR9","doi-asserted-by":"publisher","unstructured":"Cai, X., Yuan, J., Zheng, R., Huang, L., & Church, K. (2021). Speech emotion recognition with multi-task learning. In Proceedings of interspeech 2021 (pp. 4508\u20134512). https:\/\/doi.org\/10.21437\/Interspeech.2021-1852","DOI":"10.21437\/Interspeech.2021-1852"},{"key":"9610_CR10","doi-asserted-by":"crossref","unstructured":"Castillo, J. C., Fern\u00e1ndez-Caballero, A., Castro-Gonz\u00e1lez, \u00c1., Salichs, M. A., & L\u00f3pez, M. T. (2014a). A framework for recognizing and regulating emotions in the elderly. In L.\u00a0Pecchia, L. L.\u00a0Chen, C.\u00a0Nugent, & J.\u00a0Bravo (Eds.), Ambient assisted living and daily activities (pp. 320\u2013327). Springer.","DOI":"10.1007\/978-3-319-13105-4_46"},{"key":"9610_CR11","doi-asserted-by":"crossref","unstructured":"Castillo, J.C., Fern\u00e1ndez-Caballero, A., Castro-Gonz\u00e1lez, \u00c1., Salichs, M.A., & L\u00f3pez, M. T. (2014b). A framework for recognizing and regulating emotions in the elderly. In L.\u00a0Pecchia, L. L.\u00a0Chen, C.\u00a0Nugent, & J.\u00a0Bravo (Eds.), Ambient assisted living and daily activities (pp. 320\u2013327). Springer.","DOI":"10.1007\/978-3-319-13105-4_46"},{"key":"9610_CR12","unstructured":"Cauldwell, R. T. (2000). Where did the anger go? The role of context in interpreting emotion in speech. In ISCA tutorial and research workshop (ITRW) on speech and emotion. International Speech Communication Association."},{"key":"9610_CR13","doi-asserted-by":"crossref","unstructured":"Chatziagapi, A., Paraskevopoulos, G., Sgouropoulos, D., Pantazopoulos, G., Nikandrou, M., Giannakopoulos, T., & Narayanan, S. (2019). Data augmentation using gans for speech emotion recognition. In Interspeech (pp. 171\u2013175). International Speech Communication Association.","DOI":"10.21437\/Interspeech.2019-2561"},{"key":"9610_CR15","doi-asserted-by":"crossref","unstructured":"Chen, J., She, Y., Zheng, M., Shu, Y., Wang, Y., & Xu, Y. (2019). A multimodal affective computing approach for children companion robots. In Proceedings of the seventh international symposium of Chinese CHI (pp. 57\u201364).","DOI":"10.1145\/3332169.3333569"},{"issue":"10","key":"9610_CR14","doi-asserted-by":"publisher","first-page":"1440","DOI":"10.1109\/LSP.2018.2860246","volume":"25","author":"M Chen","year":"2018","unstructured":"Chen, M., He, X., Yang, J., & Zhang, H. (2018). 3-D convolutional recurrent neural networks with attention model for speech emotion recognition. IEEE Signal Processing Letters., 25(10), 1440\u20131444.","journal-title":"IEEE Signal Processing Letters."},{"key":"9610_CR16","unstructured":"Costantini, G., Iaderola, I., Paoloni, A., & Todisco, M. (2014). EMOVO Corpus: An italian emotional speech database. In International conference on language resources and evaluation (IREC 2014) (pp. 3501\u20133504). European Language Resources Association."},{"key":"9610_CR17","doi-asserted-by":"crossref","unstructured":"Cummins, N., Amiriparian, S., Hagerer, G., Batliner, A., Steidl, S., & Schuller, B. W. (2017). An image-based deep spectrum feature representation for the recognition of emotional speech. In  Proceedings of the 25th acm international conference on multimedia (pp. 478\u2013484).","DOI":"10.1145\/3123266.3123371"},{"issue":"1\u20132","key":"9610_CR18","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1016\/S0167-6393(02)00070-5","volume":"40","author":"E Douglas-Cowie","year":"2003","unstructured":"Douglas-Cowie, E., Campbell, N., Cowie, R., & Roach, P. (2003). Emotional speech: Towards a new generation of databases. Speech communication., 40(1\u20132), 33\u201360.","journal-title":"Speech communication."},{"key":"9610_CR19","doi-asserted-by":"crossref","unstructured":"Douglas-Cowie, E., Devillers, L., Martin, J.-C., Cowie, R., Savvidou, S., Abrilian, S., & Cox, C. (2005). Multimodal databases of everyday emotion: Facing up to complexity. In Ninth European conference on speech communication and technology (p.\u00a04). International Speech Communication Association.","DOI":"10.21437\/Interspeech.2005-381"},{"key":"9610_CR20","unstructured":"Eberhard, D. M., Simons, G. F., & Fennig, C. D. (2020). Ethnologue: Languages of the world, 23rd edn (Vol. 23). Dallas."},{"key":"9610_CR21","doi-asserted-by":"crossref","unstructured":"Engberg, I. S., Hansen, A. V., Andersen, O., & Dalsgaard, P. (1997). Design, recording and verification of a danish emotional speech database. In Fifth European conference on speech communication and technology.","DOI":"10.21437\/Eurospeech.1997-482"},{"key":"9610_CR22","doi-asserted-by":"crossref","unstructured":"Eyben, F., Scherer, K. R., Schuller, B. W., Sundberg, J., Andr\u00e9, E., Busso, C., Devillers, L. Y., Epps, J., Laukka, P., Narayanan, S. S., & Truong, K. P. (2015). The geneva minimalistic acoustic parameter set (GeMAPS) for voice research and affective computing. IEEE Transactions on Affective Computing, 7(2), 190\u2013202.","DOI":"10.1109\/TAFFC.2015.2457417"},{"key":"9610_CR23","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.neunet.2017.02.013","volume":"92","author":"HM Fayek","year":"2017","unstructured":"Fayek, H. M., Lech, M., & Cavedon, L. (2017). Evaluating deep learning architectures for speech emotion recognition. Neural Networks., 92, 60\u201368.","journal-title":"Neural Networks."},{"key":"9610_CR24","doi-asserted-by":"publisher","unstructured":"Ghulam, S. M., & Soomro, T. R. (2018). Twitter and Urdu. In 2018 international conference on computing, mathematics and engineering technologies (ICOMET) (p.\u00a01-6). IEEE. https:\/\/doi.org\/10.1109\/ICOMET.2018.8346370","DOI":"10.1109\/ICOMET.2018.8346370"},{"key":"9610_CR25","doi-asserted-by":"crossref","unstructured":"Grimm, M., Kroschel, K., & Narayanan, S. (2008). The vera am mittag german audio-visual emotional speech database. In 2008 IEEE international conference on multimedia and expo (pp. 865\u2013868). IEEE.","DOI":"10.1109\/ICME.2008.4607572"},{"key":"9610_CR26","doi-asserted-by":"crossref","unstructured":"Han, W., Jiang, T., Li, Y., Schuller, B., & Ruan, H. (2020). Ordinal learning for emotion recognition in customer service calls. In ICASSP 2020-2020 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 6494\u20136498). IEEE.","DOI":"10.1109\/ICASSP40776.2020.9053648"},{"key":"9610_CR27","unstructured":"Ijaz, M., & Hussain, S. (2007). Corpus based Urdu lexicon development. In The proceedings of conference on language technology (CLT07), University of Peshawar, Pakistan (Vol.\u00a073, pp.\u00a012). Academia."},{"key":"9610_CR28","unstructured":"Jackson, P., & Haq, S. (2014). Surrey audio-visual expressed emotion (SAVEE) database."},{"issue":"3","key":"9610_CR29","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1007\/s10919-015-0209-5","volume":"39","author":"R J\u00fcrgens","year":"2015","unstructured":"J\u00fcrgens, R., Grass, A., Drolet, M., & Fischer, J. (2015). Effect of acting experience on emotion expression and recognition in voice: Non-actors provide better stimuli than expected. Journal of Nonverbal Behavior, 39(3), 195\u2013214.","journal-title":"Journal of nonverbal behavior."},{"key":"9610_CR30","unstructured":"Kabir, H., & Saleem, A. M. (2002). Speech assessment methods phonetic alphabet (SAMPA): Analysis of Urdu."},{"key":"9610_CR31","doi-asserted-by":"crossref","unstructured":"Koolagudi, S. G., Maity, S., Kumar, V. A., Chakrabarti, S., & Rao, K. S. (2009). IITKGP-SESC: Speech database for emotion analysis. In International conference on contemporary computing (pp. 485\u2013492).","DOI":"10.1007\/978-3-642-03547-0_46"},{"key":"9610_CR32","doi-asserted-by":"crossref","unstructured":"Koolagudi, S. G., Reddy, R., Yadav, J., & Rao, K. S. (2011). IITKGP-SEHSC: Hindi speech corpus for emotion analysis. In 2011 international conference on devices and communications (ICDECOM) (pp. 1\u20135).","DOI":"10.1109\/ICDECOM.2011.5738540"},{"key":"9610_CR33","doi-asserted-by":"crossref","unstructured":"Kostoulas, T., Mporas, I., Ganchev, T., & Fakotakis, N. (2008). The effect of emotional speech on a smart-home application. In International conference on industrial, engineering and other applications of applied intelligent systems (pp. 305\u2013310). Springer.","DOI":"10.1007\/978-3-540-69052-8_32"},{"key":"9610_CR34","doi-asserted-by":"publisher","unstructured":"Kumar, P., Kaushik, V., & Raman, B. (2021). Towards the explainability of multimodal speech emotion recognition. In Proceedings of interspeech 2021 (pp. 1748\u20131752). https:\/\/doi.org\/10.21437\/Interspeech.2021-1718","DOI":"10.21437\/Interspeech.2021-1718"},{"key":"9610_CR35","doi-asserted-by":"publisher","unstructured":"Kumar, T. M., Sanchez, E., Tzimiropoulos, G., Giesbrecht, T., Valstar, M. (2021). Stochastic process regression for cross-cultural speech emotion recognition. In Proceedings of interspeech 2021 (pp. 3390\u20133394). https:\/\/doi.org\/10.21437\/Interspeech.2021-610","DOI":"10.21437\/Interspeech.2021-610"},{"key":"9610_CR36","doi-asserted-by":"publisher","unstructured":"Kumawat, P., & Routray, A. (2021). Applying TDNN architectures for analyzing duration dependencies on speech emotion recognition. In Proceedings of interspeech 2021 (pp. 3410\u20133414). https:\/\/doi.org\/10.21437\/Interspeech.2021-2168","DOI":"10.21437\/Interspeech.2021-2168"},{"key":"9610_CR37","doi-asserted-by":"publisher","unstructured":"Latif, S., Qayyum, A., Usman, M., Qadir, J. (2018). Cross lingual speech emotion recognition: Urdu vs. western languages. In 2018 international conference on frontiers of information technology (FIT) (pp. 88\u201393). IEEE. https:\/\/doi.org\/10.1109\/FIT.2018.00023","DOI":"10.1109\/FIT.2018.00023"},{"key":"9610_CR38","doi-asserted-by":"publisher","unstructured":"Leem, S.-G., Fulford, D., Onnela, J.-P., Gard, D., & Busso, C. (2021). Separation of emotional and reconstruction embeddings on ladder network to improve speech emotion recognition robustness in noisy conditions. In Proceedings of interspeech 2021 (pp. 2871\u20132875). https:\/\/doi.org\/10.21437\/Interspeech.2021-1438","DOI":"10.21437\/Interspeech.2021-1438"},{"key":"9610_CR40","doi-asserted-by":"crossref","unstructured":"Li, A., Zheng, F., Byrne, W., Fung, P., Kamm, T., Liu, Y., & Chen, X. (2000). CASS: A phonetically transcribed corpus of mandarin spontaneous speech. In Sixth international conference on spoken language processing (pp.\u00a0485-488). International Speech Communication Association.","DOI":"10.21437\/ICSLP.2000-120"},{"key":"9610_CR41","doi-asserted-by":"crossref","unstructured":"Li, B., Dimitriadis, D., & Stolcke, A. (2019). Acoustic and lexical sentiment analysis for customer service calls. In ICASSP 2019\u20132019 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp.\u00a05876\u20135880).","DOI":"10.1109\/ICASSP.2019.8683679"},{"key":"9610_CR42","doi-asserted-by":"crossref","unstructured":"Li, J.-L., & Lee, C.-C. (2019). Attentive to individual: A multimodal emotion recognition network with personalized attention profile. In Interspeech (pp. 211\u2013215). International Speech Communication Association.","DOI":"10.21437\/Interspeech.2019-2044"},{"issue":"6","key":"9610_CR39","doi-asserted-by":"publisher","first-page":"913","DOI":"10.1007\/s12652-016-0406-z","volume":"8","author":"Y Li","year":"2017","unstructured":"Li, Y., Tao, J., Chao, L., Bao, W., & Liu, Y. (2017). CHEAVD: A chinese natural emotional audio-visual database. Journal of Ambient Intelligence and Humanized Computing., 8(6), 913\u2013924.","journal-title":"Journal of Ambient Intelligence and Humanized Computing."},{"key":"9610_CR43","doi-asserted-by":"publisher","unstructured":"Liu, J., & Wang, H. (2021). Graph isomorphism network for speech emotion recognition. In Proceedings of  interspeech 2021 (pp. 3405\u20133409). https:\/\/doi.org\/10.21437\/Interspeech.2021-1154","DOI":"10.21437\/Interspeech.2021-1154"},{"issue":"5","key":"9610_CR44","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0196391","volume":"13","author":"SR Livingstone","year":"2018","unstructured":"Livingstone, S. R., & Russo, F. A. (2018). The ryerson audio-visual database of emotional speech and song (ravdess): A dynamic, multimodal set of facial and vocal expressions in north american english. PLoS ONE., 13(5), e0196391.","journal-title":"PloS one."},{"key":"9610_CR45","doi-asserted-by":"crossref","unstructured":"Makarova, V., & Petrushin, V. A. (2002). Ruslana: A database of russian emotional utterances. In Seventh international conference on spoken language processing.","DOI":"10.21437\/ICSLP.2002-560"},{"key":"9610_CR46","doi-asserted-by":"crossref","unstructured":"McFee, B., Raffel, C., Liang, D., Ellis, D. P., McVicar, M., Battenberg, E., & Nieto, O. (2015). librosa: Audio and music signal analysis in python. In Proceedings of the 14th python in science conference (Vol.\u00a08, pp. 18\u201325). Academia.","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"9610_CR47","doi-asserted-by":"publisher","unstructured":"Meddeb, M., Karray, H., & Alimi, A.M. (2017). Building and analysing emotion corpus of the arabic speech. In 2017 1st international workshop on arabic script analysis and recognition (ASAR) (pp. 134\u2013139). IEEE. https:\/\/doi.org\/10.1109\/ASAR.2017.8067775","DOI":"10.1109\/ASAR.2017.8067775"},{"key":"9610_CR48","unstructured":"Montero, J. M., Guti\u00e9rrez-Arriola, J., Col\u00e1s, J., Enriquez, E., & Pardo, J. M. (1999). Analysis and modelling of emotional speech in spanish. In Proceedngs of of ICPHS (Vol.\u00a02, pp. 957\u2013960)."},{"key":"9610_CR49","unstructured":"Moriyama, T., Mori, S., & Ozawa, S. (2009). A synthesis method of emotional speech using subspace constraints in prosody. Journal of Information Processing Society of Japan., 50(3), 1181\u20131191."},{"key":"9610_CR50","unstructured":"Murphy, K. P. (2012). Machine learning: A probabilistic perspective. MIT press."},{"issue":"1","key":"9610_CR51","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10579-018-9427-x","volume":"53","author":"OM Nezami","year":"2019","unstructured":"Nezami, O. M., Lou, P. J., & Karami, M. (2019). Shemo: A large-scale validated database for persian speech emotion detection. Language Resources and Evaluation., 53(1), 1\u201316.","journal-title":"Language Resources and Evaluation."},{"issue":"1","key":"9610_CR52","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1186\/1687-4722-2013-26","volume":"2013","author":"C Oflazoglu","year":"2013","unstructured":"Oflazoglu, C., & Yildirim, S. (2013). Recognizing emotion from turkish speech using acoustic features. EURASIP Journal on Audio, Speech, and Music Processing, 2013(1), 26.","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing."},{"key":"9610_CR53","doi-asserted-by":"crossref","unstructured":"Parry, J., Palaz, D., Clarke, G., Lecomte, P., Mead, R., Berger, M., & Hofer, G. (2019). Analysis of deep learning architectures for cross-corpus speech emotion recognition. In Interspeech (pp. 1656\u20131660). International Speech Communication Association.","DOI":"10.21437\/Interspeech.2019-2753"},{"key":"9610_CR54","doi-asserted-by":"publisher","unstructured":"Qasim, M., Nawaz, S., Hussain, S., & Habib, T. (2016). Urdu speech recognition system for district names of pakistan: Development, challenges and solutions. In 2016 conference of the oriental chapter of international committee for coordination and standardization of speech databases and assessment techniques (O-COCOSDA) (pp.\u00a028\u201332). IEEE. https:\/\/doi.org\/10.1109\/ICSDA.2016.7918979","DOI":"10.1109\/ICSDA.2016.7918979"},{"issue":"3","key":"9610_CR55","doi-asserted-by":"publisher","first-page":"1467","DOI":"10.1007\/s11235-011-9624-z","volume":"52","author":"S Ramakrishnan","year":"2013","unstructured":"Ramakrishnan, S., & El Emary, I. M. (2013). Speech emotion recognition approaches in human computer interaction. Telecommunication Systems, 52(3), 1467\u20131478.","journal-title":"Telecommunication Systems."},{"key":"9610_CR56","doi-asserted-by":"publisher","unstructured":"Raza, A. A., Athar, A., Randhawa, S., Tariq, Z., Saleem, M. B., Zia, H. B., & Rosenfeld, R. (2018). Rapid collection of spontaneous speech corpora using telephonic community forums. In Proceedings of interspeech 2018 (pp. 1021\u20131025). https:\/\/doi.org\/10.21437\/Interspeech.2018-1139","DOI":"10.21437\/Interspeech.2018-1139"},{"key":"9610_CR57","doi-asserted-by":"crossref","unstructured":"Raza, A. A., Hussain, S., Sarfraz, H., Ullah, I., & Sarfraz, Z. (2009). Design and development of phonetically rich Urdu speech corpus. In 2009 oriental cocosda international conference on speech database and assessments (pp. 38\u201343).","DOI":"10.1109\/ICSDA.2009.5278380"},{"key":"9610_CR58","doi-asserted-by":"publisher","unstructured":"Ringeval, F., Sonderegger, A., Sauer, J., & Lalanne, D. (2013). Introducing the recola multimodal corpus of remote collaborative and affective interactions. In 2013 10th IEEE international conference and workshops on automatic face and gesture recognition (FG) (pp. 1\u20138). IEEE. https:\/\/doi.org\/10.1109\/FG.2013.6553805","DOI":"10.1109\/FG.2013.6553805"},{"issue":"3","key":"9610_CR59","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1016\/0092-6566(77)90037-X","volume":"11","author":"JA Russell","year":"1977","unstructured":"Russell, J. A., & Mehrabian, A. (1977). Evidence for a three-factor theory of emotions. Journal of Research in Personality, 11(3), 273\u2013294. https:\/\/doi.org\/10.1016\/0092-6566(77)90037-X","journal-title":"Journal of Research in Personality."},{"key":"9610_CR60","doi-asserted-by":"crossref","unstructured":"Sager, J., Shankar, R., Reinhold, J., & Venkataraman, A. (2019). Vesus: A crowd-annotated database to study emotion production and perception in spoken english. In Interspeech (pp. 316\u2013320). International Speech Communication Association.","DOI":"10.21437\/Interspeech.2019-1413"},{"issue":"4","key":"9610_CR61","doi-asserted-by":"publisher","first-page":"543","DOI":"10.1016\/j.specom.2011.11.004","volume":"54","author":"M Sahidullah","year":"2012","unstructured":"Sahidullah, M., & Saha, G. (2012). Design, analysis and experimental evaluation of block based transformation in MFCC computation for speaker recognition. Speech Communication, 54(4), 543\u2013565.","journal-title":"Speech communication."},{"key":"9610_CR62","doi-asserted-by":"publisher","unstructured":"Santoso, J., Yamada, T., Makino, S., Ishizuka, K., & Hiramura, T. (2021). Speech emotion recognition based on attention weight correction using word-level confidence measure. In Proceedings of  interspeech 2021 (pp. 1947\u20131951). https:\/\/doi.org\/10.21437\/Interspeech.2021-411","DOI":"10.21437\/Interspeech.2021-411"},{"key":"9610_CR63","unstructured":"Sarfraz, H., Hussain, S., Bokhari, R., Raza, A.A., Ullah, I., Sarfraz, Z., & Parveen, R. (2010). Speech corpus development for a speaker independent spontaneous Urdu speech recognition system. In Proceedings of the O-COCOSDA, Kathmandu, Nepal."},{"issue":"1\u20132","key":"9610_CR64","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1016\/S0167-6393(02)00084-5","volume":"40","author":"KR Scherer","year":"2003","unstructured":"Scherer, K. R. (2003). Vocal communication of emotion: A review of research paradigms. Speech Communication, 40(1\u20132), 227\u2013256.","journal-title":"Speech communication."},{"key":"9610_CR65","doi-asserted-by":"crossref","unstructured":"Schuller, B., Steidl, S., Batliner, A., Hirschberg, J., Burgoon, J. K., Baird, A., Elkins, A., Zhang, Y., Coutinho, E., & Evanini, K. (2016). The interspeech 2016 computational paralinguistics challenge: Deception, sincerity & native language. In 17th annual conference of the international speech communication association (Interspeech) (Vol. 1\u20135, pp. 2001\u20132005). International Speech Communication Association.","DOI":"10.21437\/Interspeech.2016-129"},{"key":"9610_CR66","doi-asserted-by":"publisher","unstructured":"Scott, K. M., Ashby, S., & Hanna, J. (2020). \u201cHuman, all too human\u201d: NOAA weather radio and the emotional impact of synthetic voices. In Proceedings of the 2020 CHI conference on human factors in computing systems (p.\u00a01-9). Association for Computing Machinery. https:\/\/doi.org\/10.1145\/3313831.3376338","DOI":"10.1145\/3313831.3376338"},{"key":"9610_CR67","doi-asserted-by":"crossref","unstructured":"Sebastian, J., & Pierucci, P. (2019). Fusion techniques for utterance-level emotion recognition combining speech and transcripts. In Interspeech (pp. 51\u201355). International Speech Communication Association.","DOI":"10.21437\/Interspeech.2019-3201"},{"key":"9610_CR68","unstructured":"Simonyan, K., & Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556."},{"issue":"3","key":"9610_CR69","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1121\/1.1915893","volume":"8","author":"SS Stevens","year":"1937","unstructured":"Stevens, S. S., Volkmann, J., & Newman, E. B. (1937). A scale for the measurement of the psychological magnitude pitch. The Journal of the Acoustical Society of America, 8(3), 185\u2013190.","journal-title":"The Journal of the Acoustical Society of America."},{"issue":"1","key":"9610_CR70","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1007\/s10772-018-9491-z","volume":"21","author":"M Swain","year":"2018","unstructured":"Swain, M., Routray, A., & Kabisatpathy, P. (2018). Databases, features and classifiers for speech emotion recognition: A review. International Journal of Speech Technology, 21(1), 93\u2013120.","journal-title":"International Journal of Speech Technology."},{"key":"9610_CR71","doi-asserted-by":"publisher","unstructured":"Vashistha, A., Garg, A., Anderson, R., & Raza, A. A. (2019). Threats, abuses, flirting, and blackmail: Gender inequity in social media voice forums. In Proceedings of the 2019 CHI conference on human factors in computing systems (pp.\u00a01\u201313). Association for Computing Machinery. https:\/\/doi.org\/10.1145\/3290605.3300302","DOI":"10.1145\/3290605.3300302"},{"key":"9610_CR72","unstructured":"Walker, K., Ma, X., Graff, D., Strassel, S., Sessa, S., & Jones, K. (2015). RATS speech activity detection. Abacus Data Network 11272.1\/AB2\/1UISJ7."},{"key":"9610_CR73","doi-asserted-by":"crossref","unstructured":"Xu, X., Deng, J., Cummins, N., Zhang, Z., Zhao, L., & Schuller, B. W. (2019). Autonomous emotion learning in speech: A view of zero-shot speech emotion recognition. In Interspeech (pp. 949\u2013953). International Speech Communication Association.","DOI":"10.21437\/Interspeech.2019-2406"},{"issue":"3","key":"9610_CR74","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1109\/TAFFC.2016.2553038","volume":"8","author":"S Zhalehpour","year":"2016","unstructured":"Zhalehpour, S., Onder, O., Akhtar, Z., & Erdem, C. E. (2016). Baum-1: A spontaneous audio-visual face database of affective and mental states. IEEE Transactions on Affective Computing, 8(3), 300\u2013313.","journal-title":"IEEE Transactions on Affective Computing."},{"key":"9610_CR75","unstructured":"Zhang, J. T. F. L. M., & Jia, H. (2008). Design of speech corpus for mandarin text to speech. In The blizzard challenge 2008 workshop (p.\u00a04). International Speech Communication Association."},{"key":"9610_CR76","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1016\/j.bspc.2018.08.035","volume":"47","author":"J Zhao","year":"2019","unstructured":"Zhao, J., Mao, X., & Chen, L. (2019). Speech emotion recognition using deep 1D & 2D CNN ISTM networks. Biomedical Signal Processing and Control., 47, 312\u2013323.","journal-title":"Biomedical Signal Processing and Control."},{"key":"9610_CR77","unstructured":"Zia, A. A. R. H. B., & Athar, A. (2018). Pronouncur: An Urdu pronunciation lexicon generator. In Proceedings of the eleventh international conference on language resources and evaluation (IREC 2018). European Language Resources Association (ELRA)."}],"container-title":["Language Resources and Evaluation"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-022-09610-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10579-022-09610-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-022-09610-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,20]],"date-time":"2023-05-20T15:09:16Z","timestamp":1684595356000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10579-022-09610-7"}},"subtitle":["Dataset and evaluation"],"short-title":[],"issued":{"date-parts":[[2022,8,13]]},"references-count":77,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,6]]}},"alternative-id":["9610"],"URL":"https:\/\/doi.org\/10.1007\/s10579-022-09610-7","relation":{},"ISSN":["1574-020X","1574-0218"],"issn-type":[{"value":"1574-020X","type":"print"},{"value":"1574-0218","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,8,13]]},"assertion":[{"value":"25 July 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 August 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}