{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T23:45:26Z","timestamp":1776728726192,"version":"3.51.2"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2017,2,9]],"date-time":"2017-02-09T00:00:00Z","timestamp":1486598400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2017,6]]},"DOI":"10.1007\/s10772-017-9396-2","type":"journal-article","created":{"date-parts":[[2017,2,9]],"date-time":"2017-02-09T10:35:21Z","timestamp":1486636521000},"page":"239-246","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":95,"title":["Vocal-based emotion recognition using random forests and decision tree"],"prefix":"10.1007","volume":"20","author":[{"given":"Fatemeh","family":"Noroozi","sequence":"first","affiliation":[]},{"given":"Tomasz","family":"Sapi\u0144ski","sequence":"additional","affiliation":[]},{"given":"Dorota","family":"Kami\u0144ska","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8460-5717","authenticated-orcid":false,"given":"Gholamreza","family":"Anbarjafari","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,2,9]]},"reference":[{"issue":"2","key":"9396_CR1","doi-asserted-by":"crossref","first-page":"155","DOI":"10.1007\/s10462-012-9368-5","volume":"43","author":"CN Anagnostopoulos","year":"2015","unstructured":"Anagnostopoulos, C. N., Iliou, T., & Giannoukos, I. (2015). Features and classifiers for emotion recognition from speech: A survey from 2000 to 2011. Artificial Intelligence Review, 43(2), 155\u2013177.","journal-title":"Artificial Intelligence Review"},{"key":"9396_CR2","first-page":"103","volume":"2014","author":"G Anbarjafari","year":"2014","unstructured":"Anbarjafari, G., & Aabloo, A. (2014). Expression recognition by using facial and vocal expressions. V&L Net, 2014, 103\u2013105.","journal-title":"V&L Net"},{"key":"9396_CR3","doi-asserted-by":"crossref","unstructured":"Atassi, H., Esposito, A., Smekal, Z. (2011). Analysis of high-level features for vocal emotion recognition. In 2011 34th international conference on telecommunications and signal processing (TSP) (pp. 361\u2013366). IEEE","DOI":"10.1109\/TSP.2011.6043708"},{"key":"9396_CR4","unstructured":"Bahreini, K., Nadolski, R., Westera, W. (2013). Filtwam and voice emotion recognition. In Games and learning alliance (vol. 8605, pp. 116\u2013129). Springer."},{"key":"9396_CR5","unstructured":"Bellantonio, M., Haque, M. A., Rodriguez, P., Nasrollahi, K., Telve, T., Escarela, S., Gonzalez, J., Moeslund, T. B., Rasti, P., Anbarjafari, G. (2016). Spatio-temporal pain recognition in cnn-based super-resolved facial images. In International conference on pattern recognition (ICPR). Springer."},{"key":"9396_CR6","volume-title":"Praat software","author":"P Boersma","year":"2013","unstructured":"Boersma, P., & Weenink, D. (2013). Praat software. Amsterdam: University of Amsterdam."},{"key":"9396_CR7","doi-asserted-by":"crossref","unstructured":"Borchert, M., Dusterhoft, A. (2005). Emotions in speech-experiments with prosody and quality features in speech for use in categorical and dimensional emotion recognition environments. In Proceedings of 2005 IEEE international conference on natural language processing and knowledge engineering, 2005. IEEE NLP-KE\u201905 (pp. 147\u2013151). IEEE.","DOI":"10.1109\/NLPKE.2005.1598724"},{"key":"9396_CR8","unstructured":"Bouckaert, R. R., Frank, E., Hall, M., Kirkby, R., Reutemann, P., Seewald, A., Scuse, D. (2013). Weka manual for version 3-7-8."},{"issue":"1","key":"9396_CR9","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman, L. (2001). Random forests. Machine learning, 45(1), 5\u201332.","journal-title":"Machine learning"},{"issue":"1","key":"9396_CR10","first-page":"39","volume":"20","author":"R Burget","year":"2011","unstructured":"Burget, R., Karasek, J., & Smekal, Z. (2011). Recognition of emotions in czech newspaper headlines. Radioengineering, 20(1), 39\u201347.","journal-title":"Radioengineering"},{"issue":"1","key":"9396_CR11","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1109\/79.911197","volume":"18","author":"R Cowie","year":"2001","unstructured":"Cowie, R., Douglas-Cowie, E., Tsapatsoulis, N., Votsis, G., Kollias, S., Fellenz, W., et al. (2001). Emotion recognition in human-computer interaction. IEEE Signal Processing Magazine, 18(1), 32\u201380.","journal-title":"IEEE Signal Processing Magazine"},{"issue":"1\u20132","key":"9396_CR12","doi-asserted-by":"crossref","first-page":"47","DOI":"10.1017\/S0025100300005417","volume":"27","author":"D Deterding","year":"1997","unstructured":"Deterding, D. (1997). The formants of monophthong vowels in standard southern british english pronunciation. Journal of the International Phonetic Association, 27(1\u20132), 47\u201355.","journal-title":"Journal of the International Phonetic Association"},{"key":"9396_CR13","doi-asserted-by":"crossref","unstructured":"Devillers, L., Vidrascu, L. (2006). Real-life emotions detection with lexical and paralinguistic cues on human-human call center dialogs. In Interspeech (pp. 801\u2013804).","DOI":"10.21437\/Interspeech.2006-275"},{"issue":"4","key":"9396_CR14","doi-asserted-by":"crossref","first-page":"407","DOI":"10.1016\/j.neunet.2005.03.007","volume":"18","author":"L Devillers","year":"2005","unstructured":"Devillers, L., Vidrascu, L., & Lamel, L. (2005). Challenges in real-life emotion annotation and machine learning based detection. Neural Networks, 18(4), 407\u2013422.","journal-title":"Neural Networks"},{"issue":"3","key":"9396_CR15","doi-asserted-by":"crossref","first-page":"572","DOI":"10.1016\/j.patcog.2010.09.020","volume":"44","author":"M Ayadi El","year":"2011","unstructured":"El Ayadi, M., Kamel, M. S., & Karray, F. (2011). Survey on speech emotion recognition: Features, classification schemes, and databases. Pattern Recognition, 44(3), 572\u2013587.","journal-title":"Pattern Recognition"},{"key":"9396_CR16","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1016\/j.patrec.2015.02.013","volume":"66","author":"A Esposito","year":"2015","unstructured":"Esposito, A., Esposito, A. M., & Vogel, C. (2015). Needs and challenges in human computer interaction for processing social emotional information. Pattern Recognition Letters, 66, 41\u201351.","journal-title":"Pattern Recognition Letters"},{"key":"9396_CR17","doi-asserted-by":"crossref","unstructured":"Fayek, H., Lech, M., Cavedon, L. (2015). Towards real-time speech emotion recognition using deep neural networks. In 2015 9th international conference on signal processing and communication systems (ICSPCS) (pp. 1\u20135). IEEE.","DOI":"10.1109\/ICSPCS.2015.7391796"},{"issue":"3","key":"9396_CR18","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1016\/j.jcomdis.2005.11.005","volume":"39","author":"MM Gorham-Rowan","year":"2006","unstructured":"Gorham-Rowan, M. M., & Laures-Gore, J. (2006). Acoustic-perceptual correlates of voice quality in elderly men and women. Journal of communication disorders, 39(3), 171\u2013184.","journal-title":"Journal of communication disorders"},{"key":"9396_CR19","unstructured":"Haq, S., Jackson, P. J., Edge, J. (2008). Audio-visual feature selection and reduction for emotion classification. In Proceedings of international conference on auditory-visual speech processing (AVSP), Tangalooma, Australia (2008)"},{"key":"9396_CR20","unstructured":"Hunter, G., Kebede, H. (2012). Formant frequencies of British English vowels produced by native speakers of farsi. In Acoustics 2012"},{"issue":"1","key":"9396_CR21","first-page":"235","volume":"2","author":"AB Ingale","year":"2012","unstructured":"Ingale, A. B., & Chaudhari, D. (2012). Speech emotion recognition. International Journal of Soft Computing and Engineering (IJSCE), 2(1), 235\u2013238.","journal-title":"International Journal of Soft Computing and Engineering (IJSCE)"},{"key":"9396_CR22","unstructured":"Jackson, P., Haq, S. (2014). Surrey audio-visual expressed emotion(savee) database."},{"issue":"2","key":"9396_CR23","doi-asserted-by":"crossref","first-page":"165","DOI":"10.2478\/v10177-012-0024-4","volume":"58","author":"D Kami\u0144ska","year":"2012","unstructured":"Kami\u0144ska, D., & Pelikant, A. (2012). Recognition of human emotion from a speech signal based on plutchik\u2019s model. International Journal of Electronics and Telecommunications, 58(2), 165\u2013170.","journal-title":"International Journal of Electronics and Telecommunications"},{"issue":"2","key":"9396_CR24","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1007\/s10772-011-9125-1","volume":"15","author":"SG Koolagudi","year":"2012","unstructured":"Koolagudi, S. G., & Rao, K. S. (2012). Emotion recognition from speech: A review. International Journal of Speech Technology, 15(2), 99\u2013117.","journal-title":"International Journal of Speech Technology"},{"issue":"3","key":"9396_CR25","first-page":"18","volume":"2","author":"A Liaw","year":"2002","unstructured":"Liaw, A., & Wiener, M. (2002). Classification and regression by randomforest. R News, 2(3), 18\u201322.","journal-title":"R News"},{"key":"9396_CR26","doi-asserted-by":"crossref","DOI":"10.1201\/9781584888796","volume-title":"Computational methods of feature selection","author":"H Liu","year":"2007","unstructured":"Liu, H., & Motoda, H. (2007). Computational methods of feature selection. Boca Raton: CRC Press."},{"key":"9396_CR27","doi-asserted-by":"crossref","unstructured":"L\u00fcsi, I., Escarela, S., Anbarjafari, G. (2016). Sase: Rgb-depth database for human head pose estimation. In Computer vision\u2013ECCV 2016 workshops (pp. 325\u2013336). Springer","DOI":"10.1007\/978-3-319-49409-8_26"},{"key":"9396_CR28","unstructured":"Millhouse, T., Clermont, F., Davis, P. (2002). Exploring the importance of formant bandwidths in the production of the singer\u2019s formant. In Proceedings of the 9th Australian SST (pp. 373\u2013378)."},{"key":"9396_CR29","doi-asserted-by":"crossref","unstructured":"Neiberg, D., Elenius, K., Laskowski, K. (2006). Emotion recognition in spontaneous speech using gmms. In Interspeech (pp. 809\u2013812)","DOI":"10.21437\/Interspeech.2006-277"},{"issue":"3","key":"9396_CR30","doi-asserted-by":"crossref","first-page":"470","DOI":"10.1111\/insr.12042_10","volume":"81","author":"K Nordhausen","year":"2013","unstructured":"Nordhausen, K. (2013). Ensemble methods: Foundations and algorithms by Zhi-Hua Zhou. International Statistical Review, 81(3), 470\u2013470.","journal-title":"International Statistical Review"},{"issue":"4","key":"9396_CR31","doi-asserted-by":"crossref","first-page":"603","DOI":"10.1016\/S0167-6393(03)00099-2","volume":"41","author":"TL Nwe","year":"2003","unstructured":"Nwe, T. L., Foo, S. W., & De Silva, L. C. (2003). Speech emotion recognition using hidden markov models. Speech Communication, 41(4), 603\u2013623.","journal-title":"Speech Communication"},{"key":"9396_CR32","doi-asserted-by":"crossref","unstructured":"Palm, G., Glodek, M. (2013). Towards emotion recognition in human computer interaction. In Neural nets and surroundings (vol. 19, pp. 323\u2013336). Springer.","DOI":"10.1007\/978-3-642-35467-0_32"},{"key":"9396_CR33","first-page":"222","volume":"3","author":"VA Petrushin","year":"2000","unstructured":"Petrushin, V. A. (2000). Emotion recognition in speech signal: experimental study, development, and application. Studies, 3, 222\u2013225.","journal-title":"Studies"},{"issue":"1","key":"9396_CR34","first-page":"52","volume":"22","author":"J Pribil","year":"2013","unstructured":"Pribil, J., & Pribilova, A. (2013). Determination of formant features in czech and slovak for gmm emotional speech classifier. Radioengineering, 22(1), 52\u201359.","journal-title":"Radioengineering"},{"issue":"5","key":"9396_CR35","doi-asserted-by":"crossref","first-page":"340","DOI":"10.1016\/j.evolhumbehav.2007.05.002","volume":"28","author":"DA Puts","year":"2007","unstructured":"Puts, D. A., Hodges, C. R., C\u00e1rdenas, R. A., & Gaulin, S. J. (2007). Men\u2019s voices as dominance signals: Vocal fundamental and formant frequencies influence dominance attributions among men. Evolution and Human Behavior, 28(5), 340\u2013344.","journal-title":"Evolution and Human Behavior"},{"key":"9396_CR36","doi-asserted-by":"crossref","unstructured":"Rabiei, M., Gasparetto, A. (2014). A system for feature classification of emotions based on speech analysis; applications to human-robot interaction. In 2014 second RSI\/ISM international conference on robotics and mechatronics (ICRoM) (pp. 795\u2013800). IEEE","DOI":"10.1109\/ICRoM.2014.6991001"},{"key":"9396_CR37","doi-asserted-by":"crossref","unstructured":"Refaeilzadeh, P., Tang, L., Liu, H. (2009). Cross-validation. In Encyclopedia of database systems (pp. 532\u2013538). Springer (2009)","DOI":"10.1007\/978-0-387-39940-9_565"},{"issue":"10","key":"9396_CR38","doi-asserted-by":"crossref","first-page":"1619","DOI":"10.1109\/TPAMI.2006.211","volume":"28","author":"JJ Rodriguez","year":"2006","unstructured":"Rodriguez, J. J., Kuncheva, L. I., & Alonso, C. J. (2006). Rotation forest: A new classifier ensemble method. IEEE Transactions on Pattern Analysis and Machine Intelligence, 28(10), 1619\u20131630.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"1","key":"9396_CR39","doi-asserted-by":"crossref","first-page":"40","DOI":"10.1016\/j.csl.2011.11.003","volume":"27","author":"KR Scherer","year":"2013","unstructured":"Scherer, K. R. (2013). Vocal markers of emotion: Comparing induction and acting elicitation. Computer Speech & Language, 27(1), 40\u201358.","journal-title":"Computer Speech & Language"},{"issue":"1","key":"9396_CR40","doi-asserted-by":"crossref","first-page":"218","DOI":"10.1016\/j.csl.2013.10.002","volume":"29","author":"KR Scherer","year":"2015","unstructured":"Scherer, K. R., Sundberg, J., Tamarit, L., & Salom\u00e3o, G. L. (2015). Comparing the acoustic expression of emotion in the speaking and the singing voice. Computer Speech & Language, 29(1), 218\u2013235.","journal-title":"Computer Speech & Language"},{"key":"9396_CR41","doi-asserted-by":"crossref","unstructured":"Schuller, B., Seppi, D., Batliner, A., Maier, A., Steidl, S. (2007). Towards more reality in the recognition of emotional speech. In IEEE international conference on Acoustics, speech and signal processing, 2007. ICASSP 2007 (vol. 4, pp. IV\u2013941). IEEE.","DOI":"10.1109\/ICASSP.2007.367226"},{"issue":"12","key":"9396_CR42","doi-asserted-by":"crossref","first-page":"1856","DOI":"10.1016\/j.imavis.2005.12.021","volume":"25","author":"N Sebe","year":"2007","unstructured":"Sebe, N., Lew, M. S., Sun, Y., Cohen, I., Gevers, T., & Huang, T. S. (2007). Authentic facial expression analysis. Image and Vision Computing, 25(12), 1856\u20131863.","journal-title":"Image and Vision Computing"},{"key":"9396_CR43","doi-asserted-by":"crossref","unstructured":"Stiefelhagen, R., F\u00fcgen, C., Gieselmann, P., Holzapfel, H., Nickel, K., Waibel, A. (2004). Natural human-robot interaction using speech, head pose and gestures. In 2004 IEEE\/RSJ international conference on intelligent robots and systems, 2004 (IROS 2004). Proceedings (vol. 3, pp. 2422\u20132427). IEEE.","DOI":"10.1109\/IROS.2004.1389771"},{"key":"9396_CR44","doi-asserted-by":"crossref","unstructured":"Sun, N., Zheng, W., Sun, C., Zou, C., Zhao, L. (2006). Facial expression recognition based on boostingtree. In Advances in neural networks-ISNN 2006 (pp 77\u201384). Springer.","DOI":"10.1007\/11760023_12"},{"issue":"1","key":"9396_CR45","doi-asserted-by":"crossref","first-page":"40","DOI":"10.3758\/BF03213026","volume":"9","author":"JT Townsend","year":"1971","unstructured":"Townsend, J. T. (1971). Theoretical analysis of an alphabetic confusion matrix. Perception & Psychophysics, 9(1), 40\u201350.","journal-title":"Perception & Psychophysics"},{"key":"9396_CR46","doi-asserted-by":"crossref","unstructured":"Vlasenko, B., Schuller, B., Wendemuth, A., Rigoll, G. (2007). Frame vs. turn-level: emotion recognition from speech considering static and dynamic processing. In Affective computing and intelligent interaction (pp. 139\u2013147). Springer.","DOI":"10.1007\/978-3-540-74889-2_13"},{"key":"9396_CR47","doi-asserted-by":"crossref","unstructured":"Vogt, T., Andr\u00e9, E., Wagner, J. (2008). Automatic recognition of emotions from speech: A review of the literature and recommendations for practical realisation. In Affect and emotion in human-computer interaction (vol. 4868, pp. 75\u201391). Springer.","DOI":"10.1007\/978-3-540-85099-1_7"},{"issue":"1","key":"9396_CR48","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1109\/T-AFFC.2010.16","volume":"2","author":"CH Wu","year":"2011","unstructured":"Wu, C. H., & Liang, W. B. (2011). Emotion recognition of affective speech based on multiple classifiers using acoustic-prosodic information and semantic labels. IEEE Transactions on Affective Computing, 2(1), 10\u201321.","journal-title":"IEEE Transactions on Affective Computing"},{"key":"9396_CR49","doi-asserted-by":"crossref","unstructured":"Yoon, W. J., Park, K. S. (2007). A study of emotion recognition and its applications. In: Modeling decisions for artificial intelligence (pp. 455\u2013462). Springer.","DOI":"10.1007\/978-3-540-73729-2_43"},{"key":"9396_CR50","doi-asserted-by":"crossref","unstructured":"Zeng, Z., Hu, Y., Roisman, G. I., Wen, Z., Fu, Y., Huang, T. S. (2007). Audio-visual spontaneous emotion recognition. In Artifical intelligence for human computing (pp. 72\u201390). Springer.","DOI":"10.1007\/978-3-540-72348-6_4"},{"key":"9396_CR51","doi-asserted-by":"publisher","unstructured":"Zhang, S., Zhao, X., Lei, B. (2013). Speech emotion recognition using an enhanced kernel isomap for human-robot interaction. International Journal of Advanced Robotic Systems. doi: 10.5772\/55403 .","DOI":"10.5772\/55403"},{"key":"9396_CR52","doi-asserted-by":"crossref","DOI":"10.1201\/b12207","volume-title":"Ensemble methods: Foundations and algorithms","author":"ZH Zhou","year":"2012","unstructured":"Zhou, Z. H. (2012). Ensemble methods: Foundations and algorithms. Boca Raton: CRC Press."}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-017-9396-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-017-9396-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-017-9396-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,23]],"date-time":"2022-07-23T22:02:16Z","timestamp":1658613736000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-017-9396-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,2,9]]},"references-count":52,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2017,6]]}},"alternative-id":["9396"],"URL":"https:\/\/doi.org\/10.1007\/s10772-017-9396-2","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,2,9]]}}}