{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T17:41:52Z","timestamp":1778694112455,"version":"3.51.4"},"reference-count":156,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2021,1,20]],"date-time":"2021-01-20T00:00:00Z","timestamp":1611100800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,20]],"date-time":"2021-01-20T00:00:00Z","timestamp":1611100800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1007\/s10772-021-09808-0","type":"journal-article","created":{"date-parts":[[2021,1,20]],"date-time":"2021-01-20T19:16:13Z","timestamp":1611170173000},"page":"367-388","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":71,"title":["A review on speech processing using machine learning paradigm"],"prefix":"10.1007","volume":"24","author":[{"given":"Kishor Barasu","family":"Bhangale","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3938-7495","authenticated-orcid":false,"given":"K.","family":"Mohanaprasad","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,1,20]]},"reference":[{"issue":"6","key":"9808_CR1","doi-asserted-by":"publisher","first-page":"165","DOI":"10.5958\/2278-4853.2020.00195.0","volume":"9","author":"AZ Abbosovna","year":"2020","unstructured":"Abbosovna, A. Z. (2020). Interactive games as a way to improve speech skills in foreign language lessons. Asian Journal of Multidimensional Research (AJMR), 9(6), 165\u2013171.","journal-title":"Asian Journal of Multidimensional Research (AJMR)"},{"key":"9808_CR2","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1016\/j.csl.2019.07.001","volume":"59","author":"K Abdellah","year":"2020","unstructured":"Abdellah, K., Francis, G., Juan, R. O., & Jean, S. (2020). Principal component analysis of the spectrogram of the speech signal: Interpretation and application to dysarthric speech. Computer Speech & Language, 59, 114\u2013122.","journal-title":"Computer Speech & Language"},{"key":"9808_CR3","doi-asserted-by":"crossref","unstructured":"Afshan, A., Guo, J., Park, S. J., Ravi, V., Flint, J., & Alwan, A. (2018, September). Effectiveness of voice quality features in detecting depression. In Interspeech (pp. 1676\u20131680).","DOI":"10.21437\/Interspeech.2018-1399"},{"key":"9808_CR4","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1016\/j.specom.2019.12.001","volume":"116","author":"MB Ak\u00e7ay","year":"2020","unstructured":"Ak\u00e7ay, M. B., & O\u011fuz, K. (2020). Speech emotion recognition: Emotional models, databases, features, preprocessing methods, supporting modalities, and classifiers. Speech Communication, 116, 56\u201376.","journal-title":"Speech Communication"},{"key":"9808_CR5","doi-asserted-by":"crossref","unstructured":"Alhargan, A., Cooke, N., & Binjammaz, T. (2017). Multimodal affect recognition in an interactive gaming environment using eye tracking and speech signals. In: Proceedings of the 19th ACM international conference on multimodal interaction, pp. 479\u2013486.","DOI":"10.1145\/3136755.3137016"},{"key":"9808_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/JTEHM.2019.2940900","volume":"7","author":"L Ali","year":"2019","unstructured":"Ali, L., Zhu, C., Zhang, Z., & Liu, Y. (2019). Automated detection of Parkinson\u2019s disease based on multiple types of sustained phonations using linear discriminant analysis and genetically optimized neural network. IEEE Journal of Translational Engineering in Health and Medicine, 7, 1\u201310.","journal-title":"IEEE Journal of Translational Engineering in Health and Medicine"},{"key":"9808_CR7","doi-asserted-by":"crossref","unstructured":"Alves, S. F., Silva, I. N., Ranieri, C. M., & Ferasoli Filho, H. (2014). Assisted robot navigation based on speech recognition and synthesis. In\u00a05th ISSNIP-IEEE biosignals and biorobotics conference (2014): Biosignals and robotics for better and safer living (BRC), pp. 1\u20135.","DOI":"10.1109\/BRC.2014.6881003"},{"key":"9808_CR8","doi-asserted-by":"crossref","unstructured":"Amberkar, A., Awasarmol, P., Deshmukh, G., & Dave, P. (2018). Speech recognition using recurrent neural networks. In: International conference on current trends towards converging technologies (ICCTCT), Coimbatore, pp. 1\u20134.","DOI":"10.1109\/ICCTCT.2018.8551185"},{"key":"9808_CR9","doi-asserted-by":"crossref","unstructured":"Anjana, J. S., & Poorna, S. S. (2018, March). Language identification from speech features using SVM and LDA. In: 2018 international conference on wireless communications, signal processing and networking (WiSPNET), pp. 1\u20134.","DOI":"10.1109\/WiSPNET.2018.8538638"},{"issue":"3","key":"9808_CR10","first-page":"181","volume":"6","author":"MA Anusuya","year":"2009","unstructured":"Anusuya, M. A., & Katti, S. K. (2009). Speech recognition by machine: A review. International Journal of Computer Science and Information Security, 6(3), 181\u2013205.","journal-title":"International Journal of Computer Science and Information Security"},{"issue":"9\u201310","key":"9808_CR11","doi-asserted-by":"publisher","first-page":"661","DOI":"10.1080\/08839514.2018.1430469","volume":"31","author":"E Babaee","year":"2017","unstructured":"Babaee, E., Anuar, N. B., Abdul Wahab, A. W., Shamshirband, S., & Chronopoulos, A. T. (2017). An overview of audio event detection methods from feature extraction to classification. Applied Artificial Intelligence, 31(9\u201310), 661\u2013714.","journal-title":"Applied Artificial Intelligence"},{"key":"9808_CR12","first-page":"319","volume-title":"Support vector machine based voice activity detection","author":"M Baig","year":"2006","unstructured":"Baig, M., Masud, S., & Awais, M. (2006). Support vector machine based voice activity detection (pp. 319\u2013322). Tottori: International Symposium on Intelligent Signal Processing and Communications."},{"key":"9808_CR13","doi-asserted-by":"crossref","unstructured":"Bakshi, A., & Kopparapu, S. K. (2019). Spoken Indian language classification using GMM supervectors and artificial neural networks. IEEE Bombay Section Signature Conference (IBSSC), Mumbai, India, pp. 1\u20136.","DOI":"10.1109\/IBSSC47189.2019.8972979"},{"key":"9808_CR14","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1016\/B978-0-12-819445-4.00005-9","volume":"11","author":"S Barde","year":"2020","unstructured":"Barde, S., & Kaimal, V. (2020). Speech recognition technique for identification of raga. Cognitive Informatics, Computer Modelling, and Cognitive Science, 11, 101\u2013117.","journal-title":"Cognitive Informatics, Computer Modelling, and Cognitive Science"},{"key":"9808_CR15","doi-asserted-by":"crossref","unstructured":"Bariz\u00e3o, A. H., Fermino, M. A., Dajer, M. E., Liboni, L. H. B., & Spatti, D. H. (2018). Voice disorder classification using MLP and wavelet packet transform. International joint conference on neural networks (IJCNN), Rio de Janeiro, pp. 1\u20138","DOI":"10.1109\/IJCNN.2018.8489121"},{"key":"9808_CR16","doi-asserted-by":"crossref","unstructured":"Bavkar, S., & Sahare, S. (2013). PCA based single channel speech enhancement method for highly noisy environment. International conference on advances in computing, communications and informatics (ICACCI), pp. 1103\u20131107.","DOI":"10.1109\/ICACCI.2013.6637331"},{"key":"9808_CR17","doi-asserted-by":"crossref","unstructured":"Bhakre, S. K., & Bang, A. (2016). Emotion recognition on the basis of audio signal using Naive Bayes classifier. International conference on advances in computing, communications and informatics (ICACCI), Jaipur, pp. 2363\u20132367.","DOI":"10.1109\/ICACCI.2016.7732408"},{"issue":"6","key":"9808_CR18","first-page":"55","volume":"8","author":"KB Bhangale","year":"2018","unstructured":"Bhangale, K. B., et al. (2018). Synthetic speech spoofing detection using Mfcc And Svm. IOSR Journal of Engineering (IOSRJEN), 8(6), 55\u201361.","journal-title":"IOSR Journal of Engineering (IOSRJEN)"},{"issue":"5","key":"9808_CR19","doi-asserted-by":"publisher","first-page":"2266","DOI":"10.1007\/s00034-018-0962-x","volume":"38","author":"CC Bhanja","year":"2019","unstructured":"Bhanja, C. C., Laskar, M. A., & Laskar, R. H. (2019). A pre-classification-based language identification for Northeast Indian languages using prosody and spectral features. Circuits, Systems, and Signal Processing, 38(5), 2266\u20132296.","journal-title":"Circuits, Systems, and Signal Processing"},{"key":"9808_CR20","doi-asserted-by":"crossref","unstructured":"Bharali, S. S., & Kalita, S. K. (2017). Speaker identification using vector quantization and I-vector with reference to Assamese language. In: International conference on wireless communications, signal processing and networking (WiSPNET), Chennai, pp. 164\u2013168.","DOI":"10.1109\/WiSPNET.2017.8299740"},{"key":"9808_CR21","doi-asserted-by":"crossref","unstructured":"Bharath, K. P., & Kumar, R. M. (2019). Multitaper based MFCC feature extraction for robust speaker recognition system. In: Innovations in power and advanced computing technologies (i-PACT), Vellore, pp. 1\u20135.","DOI":"10.1109\/i-PACT44901.2019.8960206"},{"key":"9808_CR22","first-page":"1","volume":"1","author":"S Biswas","year":"2020","unstructured":"Biswas, S., & Solanki, S. S. (2020). Speaker recognition: An enhanced approach to identify singer voice using neural network. International Journal of Speech Technology, 1, 1\u201313.","journal-title":"International Journal of Speech Technology"},{"key":"9808_CR23","doi-asserted-by":"crossref","unstructured":"Burkhardt, F., Paeschke, A., Rolfes, M., Sendlmeier, W. F., & Weiss, B. (2005). A database of German emotional speech. In\u00a0Ninth European conference on speech communication and technology. Interspeech, (pp. 1517\u20131520).","DOI":"10.21437\/Interspeech.2005-446"},{"key":"9808_CR24","first-page":"4960","volume-title":"Listen, attend and spell: A neural network for large vocabulary conversational speech recognition","author":"William Chan","year":"2016","unstructured":"Chan, William, Jaitly, Navdeep, Le, Quoc, & Vinyals, Oriol. (2016). Listen, attend and spell: A neural network for large vocabulary conversational speech recognition (pp. 4960\u20134964). Speech and Signal Processing (ICASSP): IEEE Int. Conf. on Acoustics."},{"key":"9808_CR25","doi-asserted-by":"crossref","unstructured":"Chen, X., Li, H., Ma, L., Liu, X., & Chen, J. (2015). Teager Mel and PLP fusion feature based speech emotion recognition. In: Fifth international conference on instrumentation and measurement, computer, communication and control (IMCCC), Qinhuangdao, pp. 1109\u20131114.","DOI":"10.1109\/IMCCC.2015.239"},{"key":"9808_CR26","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1007\/978-981-13-9683-0_14","volume-title":"Smart computing paradigms: New progresses and challenges","author":"NB Chittaragi","year":"2020","unstructured":"Chittaragi, N. B., & Koolagudi, S. G. (2020). Sentence-based dialect identification system using extreme gradient boosting algorithm. Smart computing paradigms: New progresses and challenges (pp. 131\u2013138). Singapore: Springer."},{"key":"9808_CR27","doi-asserted-by":"crossref","unstructured":"Chougala, M., & Kuntoji, S. (2016). Novel text independent speaker recognition using LPC based formants. In: International conference on electrical, electronics, and optimization techniques (ICEEOT), Chennai, pp. 510\u2013513.","DOI":"10.1109\/ICEEOT.2016.7755666"},{"key":"9808_CR28","doi-asserted-by":"crossref","unstructured":"Cuiling, L. (2016). English Speech Recognition Method Based on Hidden Markov Model. International Conference on Smart Grid and Electrical Automation (ICSGEA), Zhangjiajie, 94-97.","DOI":"10.1109\/ICSGEA.2016.63"},{"issue":"11","key":"9808_CR29","doi-asserted-by":"publisher","first-page":"1590","DOI":"10.1109\/TASLP.2014.2341914","volume":"22","author":"S Cumani","year":"2014","unstructured":"Cumani, S., & Laface, P. (2014). Large-scale training of pairwise support vector machines for speaker recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 22(11), 1590\u20131600.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9808_CR30","doi-asserted-by":"crossref","unstructured":"Dahmani, M., & Guerti, M. (2017). Vocal folds pathologies classification using Na\u00efve Bayes networks. 6th International Conference on Systems and Control (ICSC), Batna. pp. 426\u2013432.","DOI":"10.1109\/ICoSC.2017.7958686"},{"key":"9808_CR31","doi-asserted-by":"crossref","unstructured":"Dai, J., Vijayarajan, V., Peng, X., Tan, L. & Jiang, J. (2018). Speech recognition using sparse discrete wavelet decomposition feature extraction. In: IEEE international conference on electro\/information technology (EIT), Rochester, MI, pp. 812\u2013816.","DOI":"10.1109\/EIT.2018.8500254"},{"key":"9808_CR32","unstructured":"Deka, B. K., & Das, P. (2019). An analysis of an isolated assamese digit recognition using MFCC and DTW. 6th international conference on computing for sustainable global development (INDIACom), New Delhi, India, pp. 46\u201350."},{"key":"9808_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1155\/2019\/4368036","volume":"2019","author":"V Delic","year":"2019","unstructured":"Delic, V., et al. (2019). Speech technology progress based on new machine learning paradigm. Computational Intelligence and Neuroscience, 2019, 1\u201319.","journal-title":"Computational Intelligence and Neuroscience"},{"key":"9808_CR34","doi-asserted-by":"publisher","first-page":"355","DOI":"10.1109\/TASLP.2019.2955293","volume":"28","author":"M Diez","year":"2020","unstructured":"Diez, M., Burget, L., Landini, F., & \u010cernock\u00fd, J. (2020). Analysis of speaker diarization based on Bayesian HMM with eigenvoice priors. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 28, 355\u2013368.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9808_CR35","doi-asserted-by":"crossref","unstructured":"Djamal, E. C., Nurhamidah, N., & Ilyas, R. (2017). Spoken word recognition using MFCC and learning vector quantization. In: 4th international conference on electrical engineering, computer science and informatics (EECSI), Yogyakarta, pp. 1\u20136.","DOI":"10.11591\/eecsi.v4.1043"},{"issue":"1","key":"9808_CR36","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1109\/TASL.2009.2023165","volume":"11","author":"AJ Eronen","year":"2010","unstructured":"Eronen, A. J., & Klapuri, A. P. (2010). Music tempo estimation with k-NN regression. IEEE Transactions on Audio, Speech and Language Processing, 11(1), 50\u201357.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"key":"9808_CR37","volume-title":"Master handbook of acoustics","author":"FA Everest","year":"2009","unstructured":"Everest, F. A., & Pohlmann, K. C. (2009). Master handbook of acoustics (5th ed.). New York, NY: McGraw-Hill.","edition":"5"},{"key":"9808_CR38","doi-asserted-by":"crossref","unstructured":"Fan, L., Ke, D., Fu, X., Lu, S., & Xu, B. (2012). Power-normalized PLP (PNPLP) feature for robust speech recognition. In: 8th international symposium on Chinese spoken language processing, Kowloon, pp. 224\u2013228.","DOI":"10.1109\/ISCSLP.2012.6423529"},{"key":"9808_CR39","unstructured":"Garg, A., & Sharma, P. (2016). Survey on acoustic modeling and feature extraction for speech recognition. In: 3rd international conference on computing for sustainable global development (INDIACom, pp. 2291\u20132295)."},{"key":"9808_CR40","doi-asserted-by":"crossref","unstructured":"Gillespie, S., Logan, Y. Y., Moore, E., Laures-Gore, J., Russell, S., & Patel, R. (2017, August). Cross-database models for the classification of dysarthria presence. In\u00a0Interspeech\u00a0(pp. 3127\u20133131).","DOI":"10.21437\/Interspeech.2017-216"},{"key":"9808_CR41","doi-asserted-by":"crossref","unstructured":"Gon\u00e7alves, C., Rocha, T., Reis, A., & Barroso, J. (2017). AppVox: An application to assist people with speech impairments in their speech therapy sessions. In: World conference on information systems and technologies. Springer, pp. 581\u2013591.","DOI":"10.1007\/978-3-319-56538-5_59"},{"key":"9808_CR42","doi-asserted-by":"publisher","first-page":"277","DOI":"10.1007\/978-3-642-30567-2_23","volume":"294","author":"D Guerchi","year":"2012","unstructured":"Guerchi, D., & Mohamed, E. E. (2012). LPC-Based Narrowband Speech Steganography. In: Benlamri R. (eds) Networked Digital Technologies. NDT 2012. Communications in Computer and Information Science, 294, 277\u2013288.","journal-title":"Communications in Computer and Information Science"},{"key":"9808_CR43","doi-asserted-by":"crossref","unstructured":"Guiming, D., Xia, W., Guangyan, W., Yan, Z., & Dan, L. (2016). Speech recognition based on convolutional neural networks. In: IEEE International Conference On Signal And Image Processing (ICSIP), Beijing, pp. 708\u2013711.","DOI":"10.1109\/SIPROCESS.2016.7888355"},{"key":"9808_CR44","doi-asserted-by":"crossref","unstructured":"Gupta, H., & Gupta, D. (2016). LPC and LPCC method of feature extraction in Speech Recognition System. 6th international conference - cloud system and big data engineering (Confluence), Noida, pp. 498\u2013502.","DOI":"10.1109\/CONFLUENCE.2016.7508171"},{"key":"9808_CR45","doi-asserted-by":"crossref","unstructured":"Gupta, K., & Gupta, D. (2016). An analysis on LPC, RASTA and MFCC techniques in Automatic Speech recognition system. 2016 6th international conference - cloud system and big data engineering (confluence), Noida, pp. 493\u2013497.","DOI":"10.1109\/CONFLUENCE.2016.7508170"},{"issue":"3","key":"9808_CR46","doi-asserted-by":"publisher","first-page":"185","DOI":"10.5573\/IEIESPC.2020.9.3.185","volume":"9","author":"E Han","year":"2020","unstructured":"Han, E., & Cha, H. (2020). Adaptive feature generation for speech emotion recognition. IEIE Transactions on Smart Processing & Computing, 9(3), 185\u2013192.","journal-title":"IEIE Transactions on Smart Processing & Computing"},{"key":"9808_CR47","doi-asserted-by":"publisher","first-page":"2369","DOI":"10.3233\/IFS-151554","volume":"28","author":"A Hazrat","year":"2015","unstructured":"Hazrat, A., Ahmad, N., & Zhou, X. (2015). Automatic speech recognition of Urdu words using linear discriminant analysis. Journal of Intelligent and Fuzzy Systems, 28, 2369\u20132375.","journal-title":"Journal of Intelligent and Fuzzy Systems"},{"key":"9808_CR48","doi-asserted-by":"crossref","unstructured":"Heck, P., & Chou, K. C. (1994). Gaussian mixture model classifiers for machine monitoring. Proceedings of ICASSP \u201894. IEEE international conference on acoustics, speech and signal processing, Adelaide, SA, Vol. 6, pp. 133\u2013136.","DOI":"10.1109\/ICASSP.1994.389922"},{"key":"9808_CR49","doi-asserted-by":"crossref","unstructured":"Hidayat, R., Bejo, A., Sumaryono, S., & Winursito, A. (2018). Denoising speech for MFCC feature extraction using wavelet transformation in speech recognition system. 10th international conference on information technology and electrical engineering (ICITEE), Kuta, pp. 280\u2013284.","DOI":"10.1109\/ICITEED.2018.8534807"},{"key":"9808_CR50","doi-asserted-by":"crossref","unstructured":"Hsieh, H., Chien, J., Shinoda, K., & Furui, S. (2009). Independent component analysis for noisy speech recognition. IEEE international conference on acoustics, speech and signal processing, pp. 4369\u20134372.","DOI":"10.1109\/ICASSP.2009.4960597"},{"key":"9808_CR51","volume-title":"Spoken language processing: A guide to theory, algorithm, and system development","author":"X Huang","year":"2001","unstructured":"Huang, X., Acero, A., Hon, H. W., & Reddy, R. (2001). Spoken language processing: A guide to theory, algorithm, and system development. Upper Saddle River, NJ: Prentice Hall PTR."},{"key":"9808_CR52","first-page":"127","volume-title":"Fast and effective copy-move detection of digital audio based on auto segment","author":"X Huang","year":"2020","unstructured":"Huang, X., Liu, Z., Lu, W., Liu, H., & Xiang, S. (2020). Fast and effective copy-move detection of digital audio based on auto segment (pp. 127\u2013142). In Digital Forensics and Forensic Investigations: Breakthroughs in Research and Practice."},{"key":"9808_CR53","doi-asserted-by":"publisher","first-page":"142009","DOI":"10.1109\/ACCESS.2019.2944386","volume":"7","author":"Y Huang","year":"2019","unstructured":"Huang, Y., Xiao, J., Tian, K., Wu, A., & Zhang, G. (2019). Research on robustness of emotion recognition under environmental noise conditions. IEEE Access, 7, 142009\u2013142021.","journal-title":"IEEE Access"},{"key":"9808_CR54","doi-asserted-by":"crossref","unstructured":"Ing-Jr, D., & Ming, Y. H. (2014). An HMM-like dynamic time warping scheme for automatic speech recognition. Mathematical Problems in Engineering, pp. 1\u20138.","DOI":"10.1155\/2014\/898729"},{"key":"9808_CR55","doi-asserted-by":"crossref","unstructured":"Ishimoto, Y., Teraoka, T., & Enomoto, M. (2017). End-of-utterance prediction by prosodic features and phrase-dependency structure in spontaneous Japanese speech. Interspeech, pp. 1681\u20131685.","DOI":"10.21437\/Interspeech.2017-837"},{"key":"9808_CR56","doi-asserted-by":"crossref","unstructured":"Jacob, A. (2016, April). Speech emotion recognition based on minimal voice quality features. In 2016 International conference on communication and signal processing (ICCSP) (pp. 0886\u20130890). IEEE.","DOI":"10.1109\/ICCSP.2016.7754275"},{"issue":"16","key":"9808_CR57","first-page":"667","volume":"118","author":"B Jena","year":"2018","unstructured":"Jena, B., & Singh, S. S. (2018). Analysis of stressed speech on Teager energy operator (TEO). International Journal of Pure and Applied Mathematics, 118(16), 667\u2013680.","journal-title":"International Journal of Pure and Applied Mathematics"},{"issue":"2","key":"9808_CR58","doi-asserted-by":"publisher","first-page":"754","DOI":"10.1109\/TVLSI.2015.2413454","volume":"24","author":"J Jo","year":"2016","unstructured":"Jo, J., Yoo, H., & Park, I. (2016). Energy-efficient floating-point MFCC extraction architecture for speech recognition systems. IEEE Transactions on Very Large Scale Integration (VLSI) Systems, 24(2), 754\u2013758.","journal-title":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems"},{"issue":"2","key":"9808_CR59","first-page":"253","volume":"22","author":"J Jung","year":"2017","unstructured":"Jung, J., & Kim, G. (2017). Machine learning based speech disorder detection system. Journal of Broadcast Engineering, 22(2), 253\u2013256.","journal-title":"Journal of Broadcast Engineering"},{"key":"9808_CR60","doi-asserted-by":"crossref","unstructured":"Kandpal, N., & Madhusudan B. R. (2010). Implementation of PCA & ICA for voice ecognition and separation of speech. IEEE International Conference on Advanced Management Science(ICAMS 2010), pp. 536\u2013538.","DOI":"10.1109\/ICAMS.2010.5553181"},{"issue":"11","key":"9808_CR61","doi-asserted-by":"publisher","first-page":"5049","DOI":"10.1007\/s00034-018-0805-9","volume":"37","author":"A Kanhe","year":"2018","unstructured":"Kanhe, A., & Aghila, G. (2018). A DCT\u2013SVD-based speech steganography in voiced frames. Circuits, Systems, and Signal Processing, 37(11), 5049\u20135068.","journal-title":"Circuits, Systems, and Signal Processing"},{"key":"9808_CR62","doi-asserted-by":"crossref","unstructured":"Kathania, H. K., Shahnawazuddin, S., Adiga, N., & Ahmad, W. (2018, April). Role of prosodic features on children\u2019s speech recognition. In\u00a02018 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp. 5519\u20135523.","DOI":"10.1109\/ICASSP.2018.8461668"},{"issue":"3","key":"9808_CR63","doi-asserted-by":"publisher","first-page":"198","DOI":"10.18178\/ijmlc.2018.8.3.687","volume":"8","author":"X Ke","year":"2018","unstructured":"Ke, X., Zhu, Y., Wen, L., & Zhang, W. (2018). Speech emotion recognition based on SVM and ANN. International Journal of Machine Learning and Computing, 8(3), 198\u2013202.","journal-title":"International Journal of Machine Learning and Computing"},{"key":"9808_CR64","doi-asserted-by":"crossref","unstructured":"Khan, A. & Roy, U. K. (2017). Emotion recognition using prosodie and spectral features of speech and Na\u00efve Bayes Classifier. In: International conference on wireless communications, signal processing and networking (WiSPNET), Chennai, pp. 1017\u20131021.","DOI":"10.1109\/WiSPNET.2017.8299916"},{"issue":"12","key":"9808_CR65","doi-asserted-by":"publisher","first-page":"634","DOI":"10.1049\/joe.2017.0210","volume":"2017","author":"P Khunarsa","year":"2017","unstructured":"Khunarsa, P. (2017). Single-signal entity approach for sung word recognition with artificial neural network and time\u2013frequency audio features. The Journal of Engineering, 2017(12), 634\u2013645.","journal-title":"The Journal of Engineering"},{"issue":"9","key":"9808_CR66","doi-asserted-by":"publisher","first-page":"1581","DOI":"10.1109\/TNSRE.2017.2681691","volume":"25","author":"M Kim","year":"2017","unstructured":"Kim, M., Kim, Y., Yoo, J., Wang, J., & Kim, H. (2017). Regularized speaker adaptation of KL-HMM for dysarthric speech recognition. IEEE Transactions on Neural Systems and Rehabilitation Engineering, 25(9), 1581\u20131591.","journal-title":"IEEE Transactions on Neural Systems and Rehabilitation Engineering"},{"issue":"7","key":"9808_CR67","doi-asserted-by":"publisher","first-page":"1315","DOI":"10.1109\/TASLP.2016.2545928","volume":"24","author":"C Kim","year":"2016","unstructured":"Kim, C., & Stern, R. M. (2016). Power-normalized cepstral coefficients (PNCC) for robust speech recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 24(7), 1315\u20131329.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9808_CR68","doi-asserted-by":"crossref","unstructured":"Koehler, J., Morgan, N., Hermansky, H., Hirsch, H. G., & Tong, G. (1994). Integrating RASTA-PLP into speech recognition. In: Proceedings of ICASSP \u201894. IEEE international conference on acoustics, speech and signal processing, Adelaide, SA, 1, pp. 421\u2013424.","DOI":"10.1109\/ICASSP.1994.389266"},{"issue":"5","key":"9808_CR69","doi-asserted-by":"publisher","first-page":"549","DOI":"10.1016\/j.jvoice.2015.06.010","volume":"30","author":"M Kohler","year":"2016","unstructured":"Kohler, M., Vellasco, M. M., & Cataldo, E. (2016). Analysis and classification of voice pathologies using glottal signal parameters. Journal of Voice, 30(5), 549\u2013556.","journal-title":"Journal of Voice"},{"key":"9808_CR70","doi-asserted-by":"crossref","unstructured":"Kohlschein, C., Schmitt, M., Sch\u00fcller, B., Jeschke, S., & Werner, C. J. (2017). A machine learning based system for the automatic evaluation of aphasia speech. In: IEEE 19th international conference on e-health networking, applications and services, pp. 1\u20136.","DOI":"10.1109\/HealthCom.2017.8210766"},{"key":"9808_CR71","doi-asserted-by":"crossref","unstructured":"Laleye, F. A. A., Ezin, E. C., & Motamed, C. (2014). Weighted combination of Naive Bayes and LVQ classifier for Fongbe phoneme classification. Tenth international conference on signal-image technology and internet-based systems, Marrakech, pp. 7\u201313.","DOI":"10.1109\/SITIS.2014.84"},{"issue":"1","key":"9808_CR72","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1109\/TASL.2012.2215599","volume":"21","author":"H Le","year":"2013","unstructured":"Le, H., Oparin, I., Allauzen, A., Gauvain, J., & Yvon, F. (2013). Structured output layer neural network language models for speech recognition. IEEE Transactions on Audio, Speech and Language Processing, 21(1), 197\u2013206.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"key":"9808_CR73","doi-asserted-by":"crossref","unstructured":"Lee, S. (2015). Hybrid Na\u00efve Bayes K-nearest neighbor method implementation on speech emotion recognition. In: IEEE advanced information technology, electronic and automation control conference (IAEAC), Chongqing, pp. 349\u2013353.","DOI":"10.1109\/IAEAC.2015.7428573"},{"issue":"11","key":"9808_CR74","first-page":"5541","volume":"12","author":"K Lee","year":"2018","unstructured":"Lee, K., Moon, C., & Nam, Y. (2018). Diagnosing vocal disorders using cobweb clustering of the jitter, shimmer, and harmonics-to-noise ratio. KSII Transactions on Internet & Information Systems, 12(11), 5541\u20135554.","journal-title":"KSII Transactions on Internet & Information Systems"},{"key":"9808_CR75","doi-asserted-by":"crossref","unstructured":"Lee, D., Park, H., Lim, M., & Kim, J. (2019). Dynamic time warping-based Korean spoken word detection system using euclidean distance in intelligent personal assistants. IEEE 8th Global Conference on Consumer Electronics (GCCE), Osaka, Japan, pp. 519\u2013520.","DOI":"10.1109\/GCCE46687.2019.9015383"},{"key":"9808_CR76","doi-asserted-by":"crossref","unstructured":"Li, X., Tao, J., Johnson, M. T., Soltis, J., Savage, A., Leong, K. M., & Newman, J. D. (2007, April). Stress and emotion classification using jitter and shimmer features. In\u00a02007 IEEE International Conference on Acoustics, Speech and Signal Processing-ICASSP\u201907, 4, IV-1081.","DOI":"10.1109\/ICASSP.2007.367261"},{"key":"9808_CR77","doi-asserted-by":"crossref","unstructured":"Lin, J., & Zhang, B. (2018). A music retrieval method based on hidden markov model. In\u00a02018 International conference on intelligent transportation, big data & smart city (ICITBS), (pp. 732\u2013735).","DOI":"10.1109\/ICITBS.2018.00189"},{"key":"9808_CR78","doi-asserted-by":"publisher","first-page":"2837","DOI":"10.1007\/s11042-016-3257-x","volume":"76","author":"P Liu","year":"2017","unstructured":"Liu, P., Li, S., & Wang, H. (2017). Steganography integrated into linear predictive coding for low bit-rate speech codec. Multimed Tools Appl, 76, 2837\u20132859.","journal-title":"Multimed Tools Appl"},{"key":"9808_CR79","doi-asserted-by":"publisher","first-page":"141170","DOI":"10.1109\/ACCESS.2020.3013066","volume":"8","author":"L Liu","year":"2020","unstructured":"Liu, L., & Yang, J. (2020). Study on feature complementarity of statistics, energy, and principal information for spoofing detection. IEEE Access, 8, 141170\u2013141181.","journal-title":"IEEE Access"},{"key":"9808_CR80","doi-asserted-by":"publisher","first-page":"102455","DOI":"10.1016\/j.amjoto.2020.102455","volume":"289","author":"A Lovato","year":"2020","unstructured":"Lovato, A., Bonora, C., Genovese, E., Amato, C., Maiolino, L., & de Filippis, C. (2020). A panel of jitter\/shimmer may identify functional dysphonia at risk of failure after speech therapy. American Journal of Otolaryngology, 289, 102455.","journal-title":"American Journal of Otolaryngology"},{"key":"9808_CR81","doi-asserted-by":"publisher","first-page":"765.e1","DOI":"10.1016\/j.jvoice.2015.10.012","volume":"30","author":"A Lovato","year":"2016","unstructured":"Lovato, A., Colle, W. D., Giacomelli, L., Piacente, A., Righetto, L., Marioni, G., et al. (2016). Multi-dimensional voice program (MDVP) vs praat for assessing euphonic subjects: A preliminary study on the gender-discriminating power of acoustic analysis software. Journal of Voice, 30, 765.e1\u2013765.e5.","journal-title":"Journal of Voice"},{"key":"9808_CR82","doi-asserted-by":"publisher","first-page":"702","DOI":"10.1109\/LSP.2014.2313410","volume":"21","author":"Liang Lu","year":"2014","unstructured":"Lu, Liang, & Steve, R. (2014a). Probabilistic linear discriminant analysis for acoustic modeling. IEEE Signal Processing Letters, 21, 702\u2013706.","journal-title":"IEEE Signal Processing Letters"},{"key":"9808_CR83","unstructured":"Lu, L., & Steve, R. (2014b). Tied probabilistic linear discriminant analysis for speech recognition. ArXiv\u00a0abs\/1411.0895, pp. 1\u20135."},{"issue":"11","key":"9808_CR84","doi-asserted-by":"publisher","first-page":"1815","DOI":"10.1109\/TASLP.2019.2928143","volume":"27","author":"N Maghsoodi","year":"2019","unstructured":"Maghsoodi, N., Sameti, H., Zeinali, H., & Stafylakis, T. (2019). Speaker recognition with random digit strings using uncertainty normalized HMM-based i-vectors. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 27(11), 1815\u20131825.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9808_CR85","first-page":"1","volume":"463","author":"M Malik","year":"2020","unstructured":"Malik, M., Malik, M. K., Mehmood, K., & Makhdoom, I. (2020). Automatic speech recognition: A survey. Multimedia Tools and Applications, 463, 1\u201347.","journal-title":"Multimedia Tools and Applications"},{"key":"9808_CR86","doi-asserted-by":"crossref","unstructured":"Manurung, D. B., Dirgantoro, B., & Setianingsih, C. (2018). Speaker recognition for digital forensic audio analysis using learning vector quantization method. In: IEEE international conference on internet of things and intelligence system (IOTAIS), Bali, pp. 221\u2013226.","DOI":"10.1109\/IOTAIS.2018.8600852"},{"key":"9808_CR87","doi-asserted-by":"crossref","unstructured":"Mao, J., He, Y., & Liu, Z. (2018). Speech emotion recognition based on linear discriminant analysis and support vector machine decision tree. 37th Chinese Control Conference (CCC), Wuhan, pp. 5529\u20135533.","DOI":"10.23919\/ChiCC.2018.8482931"},{"key":"9808_CR88","doi-asserted-by":"crossref","unstructured":"Mary, L. (2019). Extraction and representation of prosody for speaker, language, emotion, and speech recognition. In\u00a0Extraction of prosody for automatic speaker, language, emotion and speech recognition\u00a0(pp. 23\u201343). Cham: Springer.","DOI":"10.1007\/978-3-319-91171-7_2"},{"issue":"8","key":"9808_CR89","doi-asserted-by":"publisher","first-page":"860","DOI":"10.1049\/iet-spr.2013.0270","volume":"8","author":"A Matza","year":"2014","unstructured":"Matza, A., & Bistritz, Y. (2014). Skew Gaussian mixture models for speaker recognition. IET Signal Processing, 8(8), 860\u2013867.","journal-title":"IET Signal Processing"},{"key":"9808_CR90","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1155\/2017\/8783751","volume":"78","author":"TA Mesallam","year":"2017","unstructured":"Mesallam, T. A., Farahat, M., Malki, K. H., Alsulaiman, M., Ali, Z., Al-Nasheri, A., et al. (2017). Development of the arabic voice pathology database and its evaluation by using speech features and machine learning algorithms. Journal of Healthcare Engineering, 78, 1\u201314.","journal-title":"Journal of Healthcare Engineering"},{"issue":"10","key":"9808_CR91","first-page":"1675","volume":"9","author":"K Mohanaprasad","year":"2014","unstructured":"Mohanaprasad, K., & Arulmozhivarman, P. (2014). Wavelet based adaptive filtering algorithms for acoustic noise cancellation. International Review on Computers and Software, 9(10), 1675\u20131681.","journal-title":"International Review on Computers and Software"},{"key":"9808_CR92","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/j.apacoust.2015.04.004","volume":"97","author":"K Mohanaprasad","year":"2015","unstructured":"Mohanaprasad, K., & Arulmozhivarman, P. (2015a). Wavelet based ICA using maximisation of non-Gaussianity for acoustic echo cancellation during double talk situation. Applied Acoustics, 97, 37\u201345.","journal-title":"Applied Acoustics"},{"issue":"12","key":"9808_CR93","doi-asserted-by":"publisher","first-page":"3915","DOI":"10.1007\/s00034-015-0038-0","volume":"34","author":"K Mohanaprasad","year":"2015","unstructured":"Mohanaprasad, K., & Arulmozhivarman, P. (2015b). Wavelet-based ica using maximum likelihood estimation and information-theoretic measure for acoustic echo cancellation during double talk situation. Circuits, Systems, and Signal Processing, 34(12), 3915\u20133931.","journal-title":"Circuits, Systems, and Signal Processing"},{"issue":"55","key":"9808_CR94","first-page":"1004","volume":"10","author":"K Mohanaprasad","year":"2015","unstructured":"Mohanaprasad, K., & Sankarganesh, S. (2015). Speech separation using wavelet based independent component analysis. International Journal of Applied Engineering Research, 10(55), 1004\u20131008.","journal-title":"International Journal of Applied Engineering Research"},{"key":"9808_CR95","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1007\/s10772-019-09595-9","volume":"22","author":"K Mohanaprasad","year":"2019","unstructured":"Mohanaprasad, K., Singh, A., Sinha, K., et al. (2019). Noise reduction in speech signals using adaptive independent component analysis (ICA) for hands free communication devices. International Journal of Speech Technology, 22, 169\u2013177.","journal-title":"International Journal of Speech Technology"},{"key":"9808_CR96","doi-asserted-by":"publisher","first-page":"978","DOI":"10.1121\/1.428272","volume":"107","author":"PJ Murphy","year":"2020","unstructured":"Murphy, P. J. (2020). Spectral characterization of jitter, shimmer, and additive noise in synthetically generated voice signals. The Journal of the Acoustical Society of America, 107, 978\u2013988.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"9808_CR97","unstructured":"Narendra, N. P., & Alku, P. (2018). Dysarthric speech classification using glottal features computed from non-words, words and sentences.\u00a0Interspeech, pp. 3403\u20133407."},{"key":"9808_CR98","doi-asserted-by":"crossref","unstructured":"Nath, M. K. (2009). Independent component analysis of real data. In: Seventh international conference on advances in pattern recognition, pp. 149\u2013152.","DOI":"10.1109\/ICAPR.2009.110"},{"key":"9808_CR99","doi-asserted-by":"crossref","unstructured":"Nayana, P. K., Mathew, D., & Thomas, A. (2017). Performance comparison of speaker recognition systems using GMM and i-Vector methods with PNCC and RASTA PLP features. In: International conference on intelligent computing, instrumentation and control technologies (ICICICT), Kannur, pp. 438\u2013443.","DOI":"10.1109\/ICICICT1.2017.8342603"},{"key":"9808_CR100","first-page":"1","volume":"7","author":"NS Nehe","year":"2012","unstructured":"Nehe, N. S., & Holambe, R. S. (2012). DWT and LPC based feature extraction methods for isolated word recognition. Journal of audio speech music proc., 7, 1\u20137.","journal-title":"Journal of audio speech music proc."},{"key":"9808_CR101","doi-asserted-by":"crossref","unstructured":"Shabani S., & Norouzi, Y. (2016). Speech recognition using principal components analysis and neural networks. IEEE 8th international conference on intelligent systems (IS), Sofia, pp. 90\u201395.","DOI":"10.1109\/IS.2016.7737405"},{"key":"9808_CR102","doi-asserted-by":"crossref","unstructured":"Nyodu, K., & Sambyo, K. (2018). Automatic identification of arunachal language using K-nearest neighbor algorithm. In: International conference on advances in computing, communication control and networking (ICACCCN), Greater Noida (UP), India, pp. 213\u2013216.","DOI":"10.1109\/ICACCCN.2018.8748270"},{"key":"9808_CR103","doi-asserted-by":"crossref","unstructured":"Perotin, L., Serizel, R., Vincent, E., & Gu\u00e9rin, A. (2018). Multichannel speech separation with recurrent neural networks from high-order ambisonics recordings. In\u00a02018 IEEE international conference on acoustics, speech and signal processing (ICASSP, pp. 36\u201340.","DOI":"10.1109\/ICASSP.2018.8461370"},{"key":"9808_CR104","doi-asserted-by":"crossref","unstructured":"Qian, G. (2019). A music retrieval approach based on hidden markov model. 11th international conference on measuring technology and mechatronics automation (ICMTMA), Qiqihar, China, pp. 721\u2013725.","DOI":"10.1109\/ICMTMA.2019.00165"},{"issue":"12","key":"9808_CR105","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/2000000001","volume":"1","author":"LR Rabiner","year":"2007","unstructured":"Rabiner, L. R., & Schafer, R. W. (2007). Introduction to digital speech processing. Foundations and Trends in Signal Processing, 1(12), 1\u2013194.","journal-title":"Foundations and Trends in Signal Processing"},{"key":"9808_CR106","unstructured":"Ram, S., & Preeti, R. (2007). Spectral Subtraction Speech Enhancement with RASTA Filtering. Proc. of National Conference on Communications (NCC), pp. 1\u20135."},{"key":"9808_CR107","doi-asserted-by":"crossref","unstructured":"Ramaiah, V. S., & Rao, R. R. (2016). Multi-speaker activity detection using zero crossing rate. International conference on communication and signal processing (ICCSP), Melmaruvathur, pp. 23\u201326.","DOI":"10.1109\/ICCSP.2016.7754232"},{"key":"9808_CR108","doi-asserted-by":"crossref","unstructured":"Ranny. (2016). Voice recognition using k nearest neighbor and double distance method. In: International conference on industrial engineering, management science and application (ICIMSA), Jeju, pp. 1\u20135.","DOI":"10.1109\/ICIMSA.2016.7504045"},{"key":"9808_CR109","doi-asserted-by":"publisher","first-page":"15273","DOI":"10.1109\/ACCESS.2020.2967224","volume":"8","author":"MK Reddy","year":"2020","unstructured":"Reddy, M. K., Alku, P., & Rao, K. S. (2020). Detection of specific language impairment in children using glottal source features. IEEE Access, 8, 15273\u201315279.","journal-title":"IEEE Access"},{"key":"9808_CR110","doi-asserted-by":"crossref","unstructured":"Ren, Y., Liu, J., Tan, X., Zhang, C., Tao, Q. I. N., Zhao, Z., & Liu, T. Y. (2020). SimulSpeech: End-to-end simultaneous speech to text translation. In: Proceedings of the 58th annual meeting of the association for computational linguistic, pp. 3787\u20133796.","DOI":"10.18653\/v1\/2020.acl-main.350"},{"issue":"1","key":"9808_CR111","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1109\/89.365379","volume":"3","author":"DA Reynolds","year":"1995","unstructured":"Reynolds, D. A., & Rose, R. C. (1995). Robust text-independent speaker identification using Gaussian mixture speaker models. IEEE Transactions on Speech and Audio Processing, 3(1), 72\u201383.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9808_CR112","doi-asserted-by":"crossref","unstructured":"Rizwan, M., & Anderson, D. V. (2014) Using k-nearest neighbor and speaker ranking for phoneme prediction. In: 13th international conference on machine learning and applications, Detroit, MI, pp. 383\u2013387.","DOI":"10.1109\/ICMLA.2014.68"},{"key":"9808_CR113","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-30425-0","volume-title":"Springer handbook of acoustics","author":"TD Rossing","year":"2007","unstructured":"Rossing, T. D. (2007). Springer handbook of acoustics. New York, NY: Springer Nature."},{"key":"9808_CR114","doi-asserted-by":"crossref","unstructured":"Rudresh, M. D., Latha, A. S., Suganya, J., & Nayana, C. G. (2017). Performance analysis of speech digit recognition using cepstrum and vector quantization. In: International conference on electrical, electronics, communication, computer, and optimization techniques (ICEECCOT), Mysuru, pp. 1\u20136.","DOI":"10.1109\/ICEECCOT.2017.8284580"},{"key":"9808_CR115","doi-asserted-by":"crossref","unstructured":"Ruzanski, E., Hansen, J. H., Finan, D., Meyerhoff, J., Norris, W., & Wollert, T. (2005). Improved\u201d TEO\u201d feature-based automatic stress detection using physiological and acoustic speech sensors. In\u00a0Ninth European conference on speech communication and technology. pp. 2653\u20132656.","DOI":"10.21437\/Interspeech.2005-252"},{"issue":"2","key":"9808_CR116","first-page":"64","volume":"1","author":"F Sadaoki","year":"2005","unstructured":"Sadaoki, F. (2005). 50 years of progress in speech and speaker recognition research. ECTI Transactions on Computer and Information Technology, 1(2), 64\u201374.","journal-title":"ECTI Transactions on Computer and Information Technology"},{"issue":"2","key":"9808_CR117","first-page":"565","volume":"20","author":"A Sanchis","year":"2012","unstructured":"Sanchis, A., Juan, A., & Vidal, E. (2012). A word-based Na\u00efve Bayes classifier for confidence estimation in speech recognition. IEEE Transactions on Audio, Speech and Language Processing, 20(2), 565\u2013574.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"key":"9808_CR118","doi-asserted-by":"crossref","unstructured":"Sangeetha, R., & Nalini, N. J. (2020). Singer identification using MFCC and CRP features with support vector machines. In\u00a0Computational intelligence in pattern recognition\u00a0(pp. 295\u2013306). Springer, Singapore.","DOI":"10.1007\/978-981-13-9042-5_25"},{"issue":"4","key":"9808_CR119","doi-asserted-by":"publisher","first-page":"839","DOI":"10.1109\/TASLP.2017.2667880","volume":"25","author":"SS Sarfjoo","year":"2017","unstructured":"Sarfjoo, S. S., Demiro\u011flu, S., & King, S. (2017). Using eigenvoices and nearest-neighbors in HMM-based cross-lingual speaker adaptation with limited data. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 25(4), 839\u2013851.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"12","key":"9808_CR120","doi-asserted-by":"publisher","first-page":"16097","DOI":"10.1007\/s11042-018-6946-9","volume":"78","author":"WS Sayed","year":"2019","unstructured":"Sayed, W. S., Tolba, M. F., Radwan, A. G., & Abd-El-Hafiz, S. K. (2019). FPGA realization of a speech encryption system based on a generalized modified chaotic transition map and bit permutation. Multimedia Tools and Applications, 78(12), 16097\u201316127.","journal-title":"Multimedia Tools and Applications"},{"key":"9808_CR121","unstructured":"Selva, S. N., & Shantha, R. S. K. (2014). Text independent voice based students attendance system under noisy environment using RASTA-MFCC feature. International conference on communication and network technologies, Sivakasi, pp. 182\u2013187."},{"issue":"1","key":"9808_CR122","doi-asserted-by":"publisher","first-page":"102","DOI":"10.1016\/j.aei.2014.01.001","volume":"28","author":"SR Shahamiri","year":"2014","unstructured":"Shahamiri, S. R., & Salim, S. S. B. (2014a). Artificial neural networks as speech recognisers for dysarthric speech: Identifying the best-performing set of MFCC parameters and studying a speaker-independent approach. Advanced Engineering Informatics, 28(1), 102\u2013110.","journal-title":"Advanced Engineering Informatics"},{"issue":"5","key":"9808_CR123","doi-asserted-by":"publisher","first-page":"1053","DOI":"10.1109\/TNSRE.2014.2309336","volume":"22","author":"SR Shahamiri","year":"2014","unstructured":"Shahamiri, S. R., & Salim, S. S. B. (2014b). A multi-views multi-learners approach towards dysarthric speech recognition using multi-nets artificial neural networks. IEEE Transactions on Neural Systems and Rehabilitation Engineering, 22(5), 1053\u20131063.","journal-title":"IEEE Transactions on Neural Systems and Rehabilitation Engineering"},{"key":"9808_CR124","first-page":"1","volume":"2014","author":"M Shahbakhi","year":"2014","unstructured":"Shahbakhi, M., Far, D. T., & Tahami, E. (2014). Speech analysis for diagnosis of Parkinson\u2019s disease using genetic algorithm and support vector machine. Journal of Biomedical Science and Engineering, 2014, 1\u201313.","journal-title":"Journal of Biomedical Science and Engineering"},{"issue":"4","key":"9808_CR125","doi-asserted-by":"publisher","first-page":"1347","DOI":"10.1109\/TASL.2011.2178597","volume":"20","author":"RU Solera","year":"2012","unstructured":"Solera, R. U., Garcia-Moral, A. I., Pelaez-Moreno, C., Martinez-Ramon, M., & Diaz-de-Maria, F. (2012). Real-time robust automatic speech recognition using compact support vector machines. IEEE Transactions on Audio, Speech and Language Processing, 20(4), 1347\u20131361.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"key":"9808_CR126","doi-asserted-by":"crossref","unstructured":"Sonawane, A., Inamdar, M. U. & Bhangale, K. B. (2017). Sound based human emotion recognition using MFCC & multiple SVM. International conference on information, communication, instrumentation and control (ICICIC), Indore, pp. 1\u20134.","DOI":"10.1109\/ICOMICON.2017.8279046"},{"key":"9808_CR127","doi-asserted-by":"crossref","unstructured":"Song, P., Zheng, W., Liu, J., Li, J., & Xinran, Z. (2015). A novel speech emotion recognition method via transfer PCA and sparse coding. Chinese conference on biometric recognition, pp. 393\u2013400.","DOI":"10.1007\/978-3-319-25417-3_46"},{"key":"9808_CR128","doi-asserted-by":"crossref","unstructured":"Sreehari, V. R., & Mary, L. (2018). Automatic speaker recognition using stationary wavelet coefficients of LP residual. IEEE Region 10 Conference, Jeju, Korea (South), pp. 1595\u20131600.","DOI":"10.1109\/TENCON.2018.8650279"},{"key":"9808_CR129","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1162\/tacl_a_00096","volume":"4","author":"K Stratos","year":"2016","unstructured":"Stratos, K., Collins, M., & Hsu, D. (2016). Unsupervised part-of-speech tagging with anchor hidden markov models. Transactions of the Association for Computational Linguistics, 4, 245\u2013257.","journal-title":"Transactions of the Association for Computational Linguistics"},{"issue":"1","key":"9808_CR130","first-page":"102","volume":"23","author":"R Su","year":"2015","unstructured":"Su, R., Liu, X., & Wang, L. (2015). Automatic complexity control of generalized variable parameter HMMs for noise robust speech recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 23(1), 102\u2013114.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"1","key":"9808_CR131","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1186\/s13636-018-0145-5","volume":"2019","author":"L Sun","year":"2019","unstructured":"Sun, L., Fu, S., & Wang, F. (2019). Decision tree SVM model with Fisher feature selection for speech emotion recognition. EURASIP Journal on Audio, Speech, and Music Processing, 2019(1), 2.","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"issue":"8","key":"9808_CR132","first-page":"1275","volume":"3","author":"D Sunita","year":"2014","unstructured":"Sunita, D., & Yusuf, M. (2014). Speech processing: A review. International Journal of Advanced Research in Computer Engineering & Technology (IJARCET), 3(8), 1275\u20131278.","journal-title":"International Journal of Advanced Research in Computer Engineering & Technology (IJARCET)"},{"key":"9808_CR133","doi-asserted-by":"crossref","unstructured":"Tadeusiewicz, R. (2010). Speech in human system interaction. In: 3rd international conference on human system interaction, Rzeszow, pp. 2\u201313.","DOI":"10.1109\/HSI.2010.5514597"},{"key":"9808_CR134","doi-asserted-by":"publisher","first-page":"1228","DOI":"10.1016\/j.protcy.2014.10.138","volume":"16","author":"JP Teixeira","year":"2014","unstructured":"Teixeira, J. P., & Fernandes, P. O. (2014). Jitter, shimmer and HNR classification within gender, tones and vowels in healthy voices. Procedia Technology, 16, 1228\u20131237.","journal-title":"Procedia Technology"},{"key":"9808_CR135","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1016\/j.procs.2017.11.004","volume":"121","author":"JP Teixeira","year":"2017","unstructured":"Teixeira, J. P., Fernandes, P. O., & Alves, N. (2017). Vocal acoustic analysis\u2013classification of dysphonic voices with artificial neural networks. Procedia Computer Science, 121, 19\u201326.","journal-title":"Procedia Computer Science"},{"key":"9808_CR136","doi-asserted-by":"publisher","first-page":"1112","DOI":"10.1016\/j.protcy.2013.12.124","volume":"9","author":"JP Teixeira","year":"2013","unstructured":"Teixeira, J. P., Oliveira, C., & Lopes, C. (2013). Vocal acoustic analysis \u2013 jitter, shimmer and hnr parameters. Procedia Technology, 9, 1112\u20131122.","journal-title":"Procedia Technology"},{"key":"9808_CR137","doi-asserted-by":"crossref","unstructured":"Vacher, M., Lecouteux, B., Romero, J. S., Ajili, M., Portet, F., & Rossato, S. (2015). Speech and speaker recognition for home automation: Preliminary results. IEEE international conference on speech technology and human-computer dialogue (SpeD), pp. 1\u201310.","DOI":"10.1109\/SPED.2015.7343100"},{"key":"9808_CR138","doi-asserted-by":"crossref","unstructured":"Vachhani, B. B., & Patil, H. A. (2013). Use of PLP cepstral features for phonetic segmentation. In: International conference on Asian language processing, Urumqi, pp. 143\u2013146.","DOI":"10.1109\/IALP.2013.47"},{"key":"9808_CR139","doi-asserted-by":"crossref","unstructured":"Varghese, D., & Mathew, D. (2016). Phoneme classification using Reservoirs with MFCC and Rasta-PLP features. International conference on computer communication and informatics (ICCCI), Coimbatore, pp. 1\u20136.","DOI":"10.1109\/ICCCI.2016.7480007"},{"key":"9808_CR140","first-page":"1","volume":"216","author":"M Velankar","year":"2018","unstructured":"Velankar, M., Deshpande, A., & Kulkarni, P. (2018). Melodic pattern recognition in Indian classical music for raga identification. International Journal of Information Technology, 216, 1\u20138.","journal-title":"International Journal of Information Technology"},{"key":"9808_CR141","doi-asserted-by":"crossref","unstructured":"Wang, C. (2018). Interpreting neural network hate speech classifiers. In: Proceedings of the 2nd workshop on abusive language online (ALW2), pp. 86\u201392.","DOI":"10.18653\/v1\/W18-5111"},{"issue":"3","key":"9808_CR142","doi-asserted-by":"publisher","first-page":"2263","DOI":"10.1121\/1.4979052","volume":"141","author":"Z Wu","year":"2017","unstructured":"Wu, Z., & Ortega-Llebaria, M. (2017). Pitch shape modulates the time course of tone vs pitch-accent identification in Mandarin Chinese. The Journal of the Acoustical Society of America, 141(3), 2263\u20132276.","journal-title":"The Journal of the Acoustical Society of America"},{"issue":"8","key":"9808_CR143","doi-asserted-by":"publisher","first-page":"466","DOI":"10.1109\/LSP.2011.2159374","volume":"18","author":"J Wu","year":"2011","unstructured":"Wu, J., & Zhang, X. (2011). Efficient multiple kernel support vector machine based voice activity detection. IEEE Signal Processing Letters, 18(8), 466\u2013469.","journal-title":"IEEE Signal Processing Letters"},{"key":"9808_CR144","doi-asserted-by":"crossref","unstructured":"Xiao-chun, L., Jun-xun, Y., & Wei-ping, H. (2012). A text-independent speaker recognition system based on Probabilistic Principle Component Analysis. 3rd international conference on system science, engineering design and manufacturing informatization, pp. 255\u2013260.","DOI":"10.1109\/ICSSEM.2012.6340721"},{"key":"9808_CR145","first-page":"1","volume-title":"Dynamic time warping for speech recognition with training part to reduce the computation","author":"S Xihao","year":"2013","unstructured":"Xihao, S., & Miyanaga, Y. (2013). Dynamic time warping for speech recognition with training part to reduce the computation (pp. 1\u20134). Circuits and Systems: International Symposium on Signals."},{"key":"9808_CR146","doi-asserted-by":"publisher","first-page":"153724","DOI":"10.1109\/ACCESS.2019.2948946","volume":"7","author":"Y Xue","year":"2019","unstructured":"Xue, Y., Mu, K., Wang, Y., Chen, Y., Zhong, P., & Wen, J. (2019). Robust speech steganography using differential SVD. IEEE Access, 7, 153724\u2013153733.","journal-title":"IEEE Access"},{"issue":"9","key":"9808_CR147","doi-asserted-by":"publisher","first-page":"901","DOI":"10.1109\/LSP.2013.2273127","volume":"20","author":"S Yaman","year":"2013","unstructured":"Yaman, S., & Pelecanos, J. (2013). Using polynomial kernel support vector machines for speaker verification. IEEE Signal Processing Letters, 20(9), 901\u2013904.","journal-title":"IEEE Signal Processing Letters"},{"key":"9808_CR148","doi-asserted-by":"crossref","unstructured":"Yao, X., Xu, N., Gao, M., Jiang, A., & Liu, X. (2016, December). Comparison analysis of classifiers for speech under stress. In\u00a02016 IEEE International Conference on Internet of Things (iThings) and IEEE Green Computing and Communications (GreenCom) and IEEE Cyber, Physical and Social Computing (CPSCom) and IEEE Smart Data (SmartData), pp. 429\u2013432.","DOI":"10.1109\/iThings-GreenCom-CPSCom-SmartData.2016.101"},{"key":"9808_CR149","first-page":"1","volume":"1366","author":"P Yurika","year":"2019","unstructured":"Yurika, P., Erwin, H., & Erwin, P. A. (2019). Speech recognition using Dynamic Time Warping (DTW). Journal of Physics: Conference Series, 1366, 1\u20136.","journal-title":"Journal of Physics: Conference Series"},{"key":"9808_CR150","doi-asserted-by":"crossref","unstructured":"Zaw, T. H., & War, N. (2017). The combination of spectral entropy, zero crossing rate, short time energy and linear prediction error for voice activity detection. 20th International conference of computer and information technology (ICCIT), Dhaka, pp. 1\u20135.","DOI":"10.1109\/ICCITECHN.2017.8281794"},{"key":"9808_CR151","doi-asserted-by":"crossref","unstructured":"Zhang, Y., & Abdulla W. H. (2007b). Eigenanalysis applied to speaker identification using gammatone auditory filterbank and independent component analysis. 9th international symposium on signal processing and its applications, pp. 1\u20134.","DOI":"10.21437\/Interspeech.2006-190"},{"key":"9808_CR152","doi-asserted-by":"crossref","unstructured":"Zhang, Y., & Abdulla, W. H. (2007a). Robust speaker identification in noisy environment using cross diagonal GTF-ICA feature. In: 6th International conference on information, communications & signal processing, pp. 1\u20134.","DOI":"10.1109\/ICICS.2007.4449735"},{"issue":"9","key":"9808_CR153","doi-asserted-by":"publisher","first-page":"e18689","DOI":"10.2196\/18689","volume":"8","author":"L Zhang","year":"2020","unstructured":"Zhang, L., Qu, Y., Jin, B., Jing, L., Gao, Z., & Liang, Z. (2020). An intelligent mobile-enabled system for diagnosing Parkinson disease: Development and validation of a speech impairment detection system. JMIR Medical Informatics, 8(9), e18689.","journal-title":"JMIR Medical Informatics"},{"key":"9808_CR154","doi-asserted-by":"crossref","unstructured":"Zhang, L., Zhao, Y., Zhang, P., Yan, K., & Zhang, W. (2015). Chinese accent detection research based on RASTA - PLP algorithm. Proceedings of 2015 international conference on intelligent computing and internet of things, Harbin, pp. 31\u201334.","DOI":"10.1109\/ICAIOT.2015.7111531"},{"key":"9808_CR155","doi-asserted-by":"publisher","first-page":"27874","DOI":"10.1109\/ACCESS.2019.2901812","volume":"7","author":"X Zhang","year":"2019","unstructured":"Zhang, X., Zou, X., Sun, M., Zheng, T. F., Jia, C., & Wang, Y. (2019). Noise robust speaker recognition based on adaptive frame weighting in GMM for i-vector extraction. IEEE Access, 7, 27874\u201327882.","journal-title":"IEEE Access"},{"key":"9808_CR156","doi-asserted-by":"crossref","unstructured":"Zhu, J., Zhang, J., Chen, Q., & Tu, P. (2017). Speaker recognition based on the improved double-threshold endpoint algorithm and multistage vector quantization. IEEE 9th international conference on communication software and networks (ICCSN), Guangzhou, pp. 1056\u20131061.","DOI":"10.1109\/ICCSN.2017.8230272"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-021-09808-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-021-09808-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-021-09808-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,28]],"date-time":"2023-01-28T14:06:46Z","timestamp":1674914806000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-021-09808-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,20]]},"references-count":156,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2021,6]]}},"alternative-id":["9808"],"URL":"https:\/\/doi.org\/10.1007\/s10772-021-09808-0","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,1,20]]},"assertion":[{"value":"7 May 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 January 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 January 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}