{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T18:02:44Z","timestamp":1775584964671,"version":"3.50.1"},"reference-count":112,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2019,4,22]],"date-time":"2019-04-22T00:00:00Z","timestamp":1555891200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Wireless Pers Commun"],"published-print":{"date-parts":[[2019,8]]},"DOI":"10.1007\/s11277-019-06373-3","type":"journal-article","created":{"date-parts":[[2019,4,22]],"date-time":"2019-04-22T18:03:25Z","timestamp":1555956205000},"page":"2071-2103","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":40,"title":["Feature Extraction Methods in Language Identification: A Survey"],"prefix":"10.1007","volume":"107","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6893-9856","authenticated-orcid":false,"given":"Deepti","family":"Deshwal","sequence":"first","affiliation":[]},{"given":"Pardeep","family":"Sangwan","sequence":"additional","affiliation":[]},{"given":"Divya","family":"Kumar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,4,22]]},"reference":[{"key":"6373_CR1","unstructured":"Aggarwal, G., & Singh, L. (2018). Classification of intellectual disability using LPC, LPCC, and WLPCC parameterization techniques. International Journal of Computers and Applications,\u00a01\u201310."},{"key":"6373_CR2","doi-asserted-by":"crossref","unstructured":"Agrawal, P., & Ganapathy, S. (2017). Speech representation learning using unsupervised data-driven modulation filtering for robust ASR. In Proceedings of the Interspeech (pp. 2446\u20132450).","DOI":"10.21437\/Interspeech.2017-901"},{"key":"6373_CR3","doi-asserted-by":"publisher","first-page":"15400","DOI":"10.1109\/ACCESS.2017.2728801","volume":"5","author":"AKH Al-Ali","year":"2017","unstructured":"Al-Ali, A. K. H., Dean, D., Senadji, B., Chandran, V., & Naik, G. R. (2017). Enhanced forensic speaker verification using a combination of DWT and MFCC feature warping in the presence of noise and reverberation conditions.\u00a0IEEE Access,\u00a05, 15400\u201315413.","journal-title":"IEEE Access"},{"key":"6373_CR4","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1016\/j.eswa.2016.12.019","volume":"73","author":"JB Alonso","year":"2017","unstructured":"Alonso, J. B., et al. (2017). Automatic anuran identification using noise removal and audio activity detection. Expert Systems with Applications, 73, 83\u201392.","journal-title":"Expert Systems with Applications"},{"issue":"2","key":"6373_CR5","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MCAS.2011.941081","volume":"11","author":"E Ambikairajah","year":"2011","unstructured":"Ambikairajah, E., et al. (2011). Language identification: A tutorial. IEEE Circuits and Systems Magazine, 11(2), 82\u2013108.","journal-title":"IEEE Circuits and Systems Magazine"},{"issue":"4","key":"6373_CR6","doi-asserted-by":"publisher","first-page":"460","DOI":"10.1109\/PROC.1976.10155","volume":"64","author":"BS Atal","year":"1976","unstructured":"Atal, B. S. (1976). Automatic recognition of speakers from their voices. Proceedings of the IEEE, 64(4), 460\u2013475.","journal-title":"Proceedings of the IEEE"},{"key":"6373_CR7","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.specom.2013.07.008","volume":"56","author":"L Besacier","year":"2014","unstructured":"Besacier, L., Barnard, E., Karpov, A., & Schultz, T. (2014). Automatic speech recognition for under resourced languages: A survey. Speech Communication, 56, 85\u2013100.","journal-title":"Speech Communication"},{"issue":"4","key":"6373_CR8","doi-asserted-by":"publisher","first-page":"673","DOI":"10.1007\/s10772-015-9311-7","volume":"18","author":"SS Bharali","year":"2015","unstructured":"Bharali, S. S., & Kalita, S. K. (2015). A comparative study of different features for isolated spoken word recognition using HMM with reference to Assamese language. International Journal of Speech Technology, 18(4), 673\u2013684.","journal-title":"International Journal of Speech Technology"},{"key":"6373_CR9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10772-018-9501-1","volume":"21","author":"SS Bharali","year":"2018","unstructured":"Bharali, S. S., & Kalita, S. K. (2018). Speech recognition with reference to Assamese language using novel fusion technique. International Journal of Speech Technology, 21, 1\u201313.","journal-title":"International Journal of Speech Technology"},{"key":"6373_CR10","doi-asserted-by":"crossref","unstructured":"Bharti, S. S., Gupta, M., & Agarwal, S. (2018). Background noise identification system based on random forest for speech. In International conference on intelligent computing and applications (pp. 323\u2013332). Springer, Singapore.","DOI":"10.1007\/978-981-10-5520-1_30"},{"key":"6373_CR11","unstructured":"Bielefeld, B. (1994). Language identification using shifted delta cepstrum. In Fourteenth annual speech research symposium."},{"issue":"2","key":"6373_CR12","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1109\/TASSP.1979.1163209","volume":"27","author":"S Boll","year":"1979","unstructured":"Boll, S. (1979). Suppression of acoustic noise in speech using spectral subtraction. IEEE Transactions on Acoustics, Speech, and Signal Processing, 27(2), 113\u2013120.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"issue":"2","key":"6373_CR13","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1007\/s10772-014-9257-1","volume":"18","author":"P Borde","year":"2015","unstructured":"Borde, P., Varpe, A., Manza, R., & Yannawar, P. (2015). Recognition of isolated words using Zernike and MFCC features for audio visual speech recognition. International Journal of Speech Technology, 18(2), 167\u2013175.","journal-title":"International Journal of Speech Technology"},{"key":"6373_CR14","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1016\/j.csl.2012.01.004","volume":"26","author":"GR Botha","year":"2012","unstructured":"Botha, G. R., & Etienne, B. (2012). Factors that affect the accuracy of text based language identification. Computer Speech and Language, 26, 302\u2013320.","journal-title":"Computer Speech and Language"},{"issue":"9","key":"6373_CR15","doi-asserted-by":"publisher","first-page":"1029","DOI":"10.1016\/j.specom.2012.04.005","volume":"54","author":"IA Clemente","year":"2012","unstructured":"Clemente, I. A., Heckmann, M., & Wrede, B. (2012). Incremental word learning: Efficient hmm initialization and large margin discriminative adaptation. Speech Communication, 54(9), 1029\u20131048.","journal-title":"Speech Communication"},{"issue":"6","key":"6373_CR16","doi-asserted-by":"publisher","first-page":"599","DOI":"10.1080\/02564602.2017.1357507","volume":"35","author":"RK Das","year":"2017","unstructured":"Das, R. K., & Prasanna, S. M. (2018). Speaker verification from short utterance perspective: a review.\u00a0IETE Technical Review,\u00a035(6), 599\u2013617.","journal-title":"IETE Technical Review"},{"issue":"4","key":"6373_CR17","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","volume":"19","author":"N Dehak","year":"2011","unstructured":"Dehak, N., et al. (2011). Front end factor analysis for speaker verification. IEEE Transactions Audio, Speech, Language Processing, 19(4), 788\u2013798.","journal-title":"IEEE Transactions Audio, Speech, Language Processing"},{"key":"6373_CR18","doi-asserted-by":"crossref","unstructured":"Dehak, N., Torres-Carrasquillo, P. A., Reynolds, D., & Dehak, R. (2011). Language recognition via i-vectors and dimensionality reduction. In\u00a0Twelfth annual conference of the international speech communication association.","DOI":"10.21437\/Interspeech.2011-328"},{"key":"6373_CR19","doi-asserted-by":"crossref","unstructured":"Dey, S., Rajan, R., Padmanabhan, R., & Murthy, H. A. (2011). Feature diversity for emotion, language and speaker verification. In National conference on communications (NCC), IEEE (pp. 1\u20135).","DOI":"10.1109\/NCC.2011.5734774"},{"issue":"34","key":"6373_CR20","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1080\/02564602.2016.1185976","volume":"3","author":"G Di\u015fken","year":"2017","unstructured":"Di\u015fken, G., et al. (2017). A review on feature extraction for speaker recognition under degraded conditions. IETE Technical Review, 3(34), 321\u2013332.","journal-title":"IETE Technical Review"},{"key":"6373_CR21","unstructured":"Dustor, A., & Szwarc, P. (2010). Spoken language identification based on GMM models. In International conference in signals and electronic systems (ICSES), IEEE (pp. 105\u2013108)."},{"issue":"2","key":"6373_CR22","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1504\/IJAPR.2018.092522","volume":"5","author":"SK Dutta","year":"2018","unstructured":"Dutta, S. K., & Singh, L. J. (2018). A comparison of three spectral features for phone recognition in sub-optimal environments. International Journal of Applied Pattern Recognition, 5(2), 137\u2013148.","journal-title":"International Journal of Applied Pattern Recognition"},{"issue":"1","key":"6373_CR23","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1016\/j.eswa.2014.07.035","volume":"42","author":"J Echeverry-Correa","year":"2015","unstructured":"Echeverry-Correa, J., et al. (2015). Topic identification techniques applied to dynamic language model adaptation for automatic speech recognition. Expert System with Applications, 42(1), 101\u2013112.","journal-title":"Expert System with Applications"},{"issue":"1","key":"6373_CR24","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1007\/s10772-013-9205-5","volume":"17","author":"MAA El-Fattah","year":"2014","unstructured":"El-Fattah, M. A. A., et al. (2014). Speech enhancement with an adaptive Wiener filter. International Journal of Speech Technology, 17(1), 53\u201364.","journal-title":"International Journal of Speech Technology"},{"issue":"6","key":"6373_CR25","doi-asserted-by":"publisher","first-page":"1109","DOI":"10.1109\/TASSP.1984.1164453","volume":"32","author":"Y Ephraim","year":"1984","unstructured":"Ephraim, Y., & Malah, D. (1984). Speech enhancement using a minimum-mean square error short-time spectral amplitude estimator. IEEE Transactions on Acoustics, Speech, and Signal Processing, 32(6), 1109\u20131121.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"6373_CR26","doi-asserted-by":"crossref","unstructured":"Fernando, S., Sethu, V., Ambikairajah, E., & Epps, J. (2017). Bidirectional modelling for short duration language identification. In Interspeech (pp. 2809\u20132813).","DOI":"10.21437\/Interspeech.2017-286"},{"issue":"1","key":"6373_CR27","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1109\/TASLP.2015.2496226","volume":"24","author":"L Ferrer","year":"2016","unstructured":"Ferrer, L., Lei, Y., McLaren, M., & Scheffer, N. (2016). Study of senone-based deep neural network approaches for spoken language recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 24(1), 105\u2013116.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"6373_CR28","doi-asserted-by":"publisher","first-page":"252","DOI":"10.1016\/j.csl.2017.06.008","volume":"46","author":"R Fer","year":"2017","unstructured":"Fer, R., et al. (2017). Multilingually trained bottleneck features in spoken language recognition. Computer Speech and Language, 46, 252\u2013267.","journal-title":"Computer Speech and Language"},{"key":"6373_CR29","doi-asserted-by":"crossref","unstructured":"Gehring, J., Miao, Y., Metze, F., & Waibel, A. (2013). Extracting deep bottleneck features using stacked auto-encoders. In IEEE international conference in acoustics, speech and signal processing (ICASSP), IEEE (pp. 3377\u20133381).","DOI":"10.1109\/ICASSP.2013.6638284"},{"key":"6373_CR30","doi-asserted-by":"crossref","unstructured":"Gelly, G., Gauvain, J. L., Le, V. B., & Messaoudi, A. (2016). A divide-and-conquer approach for language identification based on recurrent neural networks. In INTERSPEECH (pp. 3231\u20133235).","DOI":"10.21437\/Interspeech.2016-180"},{"key":"6373_CR31","doi-asserted-by":"crossref","unstructured":"Giwa, O., & Davel, M. H. (2014). Language identification of individual words with joint sequence models. In Fifteenth annual conference of the international speech communication association (pp. 14\u201318).","DOI":"10.21437\/Interspeech.2014-344"},{"key":"6373_CR32","doi-asserted-by":"crossref","unstructured":"Giwa, O., & Davel, M. H. (2015). Text-based language identification of multilingual names. In Pattern recognition association of South Africa and robotics and mechatronics international conference (PRASA-RobMech) (pp. 166\u2013171). IEEE.","DOI":"10.1109\/RoboMech.2015.7359517"},{"key":"6373_CR33","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1016\/j.neunet.2014.08.006","volume":"64","author":"J Gonzalez-Dominguez","year":"2015","unstructured":"Gonzalez-Dominguez, J., Lopez-Moreno, I., Moreno, P. J., & Gonzalez-Rodriguez, J. (2015). Frame-by-frame language identification in short utterances using deep neural networks. Neural Networks, 64, 49\u201358.","journal-title":"Neural Networks"},{"key":"6373_CR34","unstructured":"Gonzalez, D. R., & de Lara, J. R. C. (2009). Speaker verification with shifted delta cepstral features: Its pseudo-prosodic behaviour. In Proceedings of the I Iberian SLTech."},{"key":"6373_CR35","doi-asserted-by":"crossref","unstructured":"Goodman, F. J., Martin, A. F., & Wohlford, R. E. (1989). Improved automatic language identification in noisy speech. In International conference on acoustics, speech, and signal processing ICASSP-89 (pp. 528\u2013531). IEEE.","DOI":"10.1109\/ICASSP.1989.266480"},{"key":"6373_CR36","doi-asserted-by":"crossref","unstructured":"Gupta, K., & Gupta, D. (2016). An analysis on LPC, RASTA and MFCC techniques in automatic speech recognition system. In 6th international conference in cloud system and big data engineering (confluence) (pp. 493\u2013497). IEEE.","DOI":"10.1109\/CONFLUENCE.2016.7508170"},{"key":"6373_CR37","doi-asserted-by":"crossref","unstructured":"Heigold, G. et al. (2013). Multilingual acoustic models using distributed deep neural networks. In IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 8619\u20138623). IEEE.","DOI":"10.1109\/ICASSP.2013.6639348"},{"issue":"4","key":"6373_CR38","doi-asserted-by":"publisher","first-page":"1738","DOI":"10.1121\/1.399423","volume":"87","author":"H Hermansky","year":"1990","unstructured":"Hermansky, H. (1990). Perceptual linear predictive (PLP) analysis of speech. The Journal of the Acoustical Society of America, 87(4), 1738\u20131752.","journal-title":"The Journal of the Acoustical Society of America"},{"issue":"6","key":"6373_CR39","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"G Hinton","year":"2012","unstructured":"Hinton, G., et al. (2012). Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups. IEEE Signal Processing Magazine, 29(6), 82\u201397.","journal-title":"IEEE Signal Processing Magazine"},{"issue":"7\u20138","key":"6373_CR40","doi-asserted-by":"publisher","first-page":"588","DOI":"10.1016\/j.specom.2006.12.006","volume":"49","author":"Y Hu","year":"2007","unstructured":"Hu, Y., & Loizou, P. C. (2007). Subjective comparison and evaluation of speech enhancement algorithms. Speech Communication, 49(7\u20138), 588\u2013601.","journal-title":"Speech Communication"},{"key":"6373_CR41","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.csl.2015.11.003","volume":"38","author":"I Hwang","year":"2016","unstructured":"Hwang, I., Park, H. M., & Chang, J. H. (2016). Ensemble of deep neural networks using acoustic environment classification for statistical model-based voice activity detection. Computer Speech and Language, 38, 1\u201312.","journal-title":"Computer Speech and Language"},{"issue":"4","key":"6373_CR42","doi-asserted-by":"publisher","first-page":"578","DOI":"10.1109\/89.326616","volume":"2","author":"H Hynek","year":"1994","unstructured":"Hynek, H., & Nelson, M. (1994). Rasta processing of speech. IEEE Transactions on Speech Audio Processing, 2(4), 578\u2013589.","journal-title":"IEEE Transactions on Speech Audio Processing"},{"issue":"2","key":"6373_CR43","first-page":"161","volume":"17","author":"M Itrat","year":"2017","unstructured":"Itrat, M., et al. (2017). Automatic language identification for languages of Pakistan. International Journal of Computer Science and Network Security, 17(2), 161\u2013169.","journal-title":"International Journal of Computer Science and Network Security"},{"issue":"31","key":"6373_CR44","doi-asserted-by":"publisher","first-page":"383","DOI":"10.1080\/02564602.2014.961576","volume":"5","author":"S Joudaki","year":"2014","unstructured":"Joudaki, S., et al. (2014). Vision-based sign language classification: A directional review. IETE Technical Review, 5(31), 383\u2013391.","journal-title":"IETE Technical Review"},{"issue":"1","key":"6373_CR45","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1109\/TASLP.2017.2766023","volume":"26","author":"M Jin","year":"2018","unstructured":"Jin, M., et al. (2018). LID-senones and their statistics for language identification. IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP), 26(1), 171\u2013183.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP)"},{"key":"6373_CR46","doi-asserted-by":"crossref","unstructured":"Komatsu, M. (2007). Reviewing human language identification. In\u00a0Speaker classification II (pp. 206\u2013228). Springer, Berlin.","DOI":"10.1007\/978-3-540-74122-0_17"},{"issue":"2","key":"6373_CR47","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1007\/s10772-012-9139-3","volume":"15","author":"SG Koolagudi","year":"2012","unstructured":"Koolagudi, S. G., & Rao, K. S. (2012). Emotion recognition from speech using source, system, and prosodic features. International Journal of Speech Technology, 15(2), 265\u2013289.","journal-title":"International Journal of Speech Technology"},{"key":"6373_CR48","unstructured":"Lehner, B., Sonnleitner, R., & Widmer, G. (2013). Towards light-weight, real-time-capable singing voice detection. In ISMIR (pp. 53\u201358)."},{"issue":"1","key":"6373_CR49","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1109\/TASL.2006.876860","volume":"15","author":"H Li","year":"2007","unstructured":"Li, H., Ma, B., & Lee, C. H. (2007). A vector space modeling approach to spoken language identification. IEEE Transactions on Audio, Speech and Language Processing, 15(1), 271\u2013284.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"issue":"5","key":"6373_CR50","doi-asserted-by":"publisher","first-page":"1136","DOI":"10.1109\/JPROC.2012.2237151","volume":"101","author":"H Li","year":"2013","unstructured":"Li, H., Ma, B., & Lee, K. A. (2013). Spoken language recognition: From fundamentals to practice. Proceedings of IEEE, 101(5), 1136\u20131159.","journal-title":"Proceedings of IEEE"},{"key":"6373_CR51","unstructured":"Li, K. P. (1997). Automatic language identification\/verification system. U.S. Patent 5,689,616 (Google Patents)."},{"issue":"4","key":"6373_CR52","doi-asserted-by":"publisher","first-page":"940","DOI":"10.1016\/j.csl.2014.02.004","volume":"28","author":"M Li","year":"2014","unstructured":"Li, M., & Narayanan, S. (2014). Simplified supervised i-vector modeling with application to robust and efficient language identification and speaker verification. Computer Speech and Language, 28(4), 940\u2013958.","journal-title":"Computer Speech and Language"},{"key":"6373_CR53","doi-asserted-by":"publisher","DOI":"10.1201\/9781420015836","volume-title":"Speech enhancement: Theory and practice","author":"PC Loizou","year":"2007","unstructured":"Loizou, P. C. (2007). Speech enhancement: Theory and practice. Boca Raton: CRC press."},{"key":"6373_CR54","doi-asserted-by":"crossref","unstructured":"Lopez-Moreno, I., Gonzalez-Dominguez, J., Plchot, O., Martinez, D., Gonzalez-Rodriguez, J., & Moreno, P. (2014). Automatic language identification using deep neural networks. In 2014 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 5337\u20135341). IEEE.","DOI":"10.1109\/ICASSP.2014.6854622"},{"key":"6373_CR55","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1016\/j.csl.2016.03.001","volume":"40","author":"I Lopez-Moreno","year":"2016","unstructured":"Lopez-Moreno, I., et al. (2016). On the use of deep feed forward neural networks for automatic language identification. Computer Speech and Language, 40, 46\u201359.","journal-title":"Computer Speech and Language"},{"key":"6373_CR56","doi-asserted-by":"crossref","unstructured":"Lyu, D. C., Chng, E. S., & Li, H. (2013). Language diarization for conversational code-switch speech with pronunciation dictionary adaptation. In IEEE China summit and international conference on signal and information processing (pp. 147\u2013150). IEEE.","DOI":"10.1109\/ChinaSIP.2013.6625316"},{"key":"6373_CR57","doi-asserted-by":"crossref","unstructured":"Malmasi, S., & Cahill, A. (2015). Measuring feature diversity in native language identification. In Proceedings of the tenth workshop on innovative use of NLP for building educational applications (pp. 49\u201355).","DOI":"10.3115\/v1\/W15-0606"},{"key":"6373_CR58","unstructured":"Malmasi, S., Refaee, E., & Dras, M. (2015). Arabic dialect identification using a parallel multidialectal corpus. In International conference of the pacific association for computational linguistics (pp. 35\u201353). Springer, Singapore."},{"key":"6373_CR59","doi-asserted-by":"crossref","unstructured":"Martinez, D., Burget, L., Ferrer, L., & Scheffer, N. (2012). iVector-based prosodic system for language identification. In IEEE international conference in acoustics, speech and signal processing (ICASSP) (pp. 4861\u201348640.","DOI":"10.1109\/ICASSP.2012.6289008"},{"key":"6373_CR60","unstructured":"Matejka, P., Burget, L., Schwarz, P., & Cernocky, J. (2006). Speaker and language recognition workshop in Brno university of technology system for nist 2005 language recognition evaluation. In Odyssey 2006 (pp. 1\u20137). IEEE."},{"key":"6373_CR61","doi-asserted-by":"crossref","unstructured":"Matejka, P., et al. (2014). Neural network bottleneck features for language identification. In Proceedings of Odyssey 2014 speaker and language recognition workshop (pp. 299\u2013304).","DOI":"10.21437\/Odyssey.2014-45"},{"issue":"3","key":"6373_CR62","doi-asserted-by":"publisher","first-page":"277","DOI":"10.1007\/s10772-014-9268-y","volume":"18","author":"M Mehrabani","year":"2015","unstructured":"Mehrabani, M., & Hansen, J. H. (2015). Automatic analysis of dialect\/language sets. International Journal of Speech Technology, 18(3), 277\u2013286.","journal-title":"International Journal of Speech Technology"},{"issue":"1","key":"6373_CR63","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1109\/TASL.2011.2109382","volume":"20","author":"AR Mohamed","year":"2012","unstructured":"Mohamed, A. R., Dahl, G. E., & Hinton, G. (2012). Acoustic modeling using deep belief networks. IEEE Transactions on Audio, Speech and Language Processing, 20(1), 14\u201322.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"key":"6373_CR64","doi-asserted-by":"publisher","first-page":"558","DOI":"10.1016\/j.csl.2016.11.004","volume":"46","author":"N Moritz","year":"2017","unstructured":"Moritz, N., Adilo\u011flu, K., Anem\u00fcller, J., Goetze, S., & Kollmeier, B. (2017). Multi-channel speech enhancement and amplitude modulation analysis for noise robust automatic speech recognition.\u00a0Computer Speech & Language,\u00a046, 558\u2013573.","journal-title":"Computer Speech & Language"},{"key":"6373_CR65","doi-asserted-by":"crossref","unstructured":"Mun, S., Shon, S., Kim, W., & Ko, H. (2016). Deep neural network bottleneck features for acoustic event recognition. In INTERSPEECH (pp. 2954\u20132957).","DOI":"10.21437\/Interspeech.2016-1112"},{"key":"6373_CR66","doi-asserted-by":"crossref","unstructured":"Nayana, P., Mathew, D., & Thomas, A. (2017). Performance comparison of speaker recognition systems using GMM and i-vector methods with PNCC and RASTA PLP features. In International conference on intelligent computing, instrumentation and control technologies(ICICICT) (pp. 438\u2013443).","DOI":"10.1109\/ICICICT1.2017.8342603"},{"key":"6373_CR67","doi-asserted-by":"crossref","unstructured":"Nercessian, S., Torres-Carrasquillo, P., & Martinez-Montes, G. (2016). Approaches for language identification in mismatched environments. Spoken language technology workshop (SLT) (pp. 335\u2013340). IEEE.","DOI":"10.1109\/SLT.2016.7846286"},{"key":"6373_CR68","doi-asserted-by":"crossref","unstructured":"Ng, T., et al. (2012). Developing a speech activity detection system for the DARPA RATS program. In Thirteenth annual conference of the international speech communication association (pp. 1969\u20131972).","DOI":"10.21437\/Interspeech.2012-527"},{"issue":"5","key":"6373_CR69","first-page":"304","volume":"6","author":"MM Olvera","year":"2016","unstructured":"Olvera, M. M., S\u00e1nchez, A., & Escobar, L. H. (2016). Web-based automatic language identification system. International Journal of Information and Electronics Engineering, 6(5), 304\u2013307.","journal-title":"International Journal of Information and Electronics Engineering"},{"issue":"4","key":"6373_CR70","doi-asserted-by":"publisher","first-page":"240","DOI":"10.1080\/02564602.2015.1010611","volume":"32","author":"J Padmanabhan","year":"2015","unstructured":"Padmanabhan, J., & Johnson Premkumar, M. J. (2015). Machine learning in automatic speech recognition: A survey. IETE Technical Review, 32(4), 240\u2013251.","journal-title":"IETE Technical Review"},{"issue":"4","key":"6373_CR71","doi-asserted-by":"publisher","first-page":"426","DOI":"10.1504\/IJCVR.2017.084987","volume":"7","author":"HK Palo","year":"2017","unstructured":"Palo, H. K., Chandra, M., & Mohanty, M. N. (2017). Emotion recognition using MLP and GMM for Oriya language. International Journal of Computational Vision and Robotics, 7(4), 426\u2013442.","journal-title":"International Journal of Computational Vision and Robotics"},{"key":"6373_CR72","doi-asserted-by":"crossref","unstructured":"Phadikar, S., et al. (2017). Bengali phonetics identification using wavelet based signal feature. In International conference on computational intelligence, communications, and business analytics (pp. 253\u2013265). Springer, Singapore.","DOI":"10.1007\/978-981-10-6427-2_21"},{"issue":"11","key":"6373_CR73","first-page":"5671","volume":"2","author":"C Poonkuzhali","year":"2013","unstructured":"Poonkuzhali, C., Karthiprakash, R., Valarmathy, S., & Kalamani, M. (2013). An approach to feature selection algorithm based on ant colony optimization for automatic speech recognition. Journal of Advanced Research in Electrical, Electronics and Instrumentation Engineering, 2(11), 5671\u20135678.","journal-title":"Journal of Advanced Research in Electrical, Electronics and Instrumentation Engineering"},{"key":"6373_CR74","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1016\/j.eswa.2017.05.037","volume":"85","author":"Z Qawaqneh","year":"2017","unstructured":"Qawaqneh, Z., Mallouh, A. A., & Barkana, B. D. (2017). Age and gender classification from speech and face images by jointly fine-tuned deep neural networks. Expert Systems with Applications, 85, 76\u201386.","journal-title":"Expert Systems with Applications"},{"key":"6373_CR75","doi-asserted-by":"crossref","unstructured":"Qi, J., Wang, D., Xu, J., & Tejedor Noguerales, J. (2013). Bottleneck features based on gammatone frequency cepstral coefficients. In Interspeech.","DOI":"10.21437\/Interspeech.2013-435"},{"key":"6373_CR76","doi-asserted-by":"crossref","unstructured":"Rajput, N., & Verma, S. K. (2014). Back propagation feed forward neural network approach for speech recognition. In 2014 3rd international conference on reliability, Infocom technologies and optimization (ICRITO) (trends and future directions) (pp. 1\u20136). IEEE.","DOI":"10.1109\/ICRITO.2014.7014712"},{"issue":"4","key":"6373_CR77","doi-asserted-by":"publisher","first-page":"413","DOI":"10.1007\/s10772-013-9193-5","volume":"16","author":"KS Rao","year":"2013","unstructured":"Rao, K. S., Maity, S., & Reddy, V. R. (2013). Pitch synchronous and glottal closure based speech analysis for language recognition. International Journal of Speech Technology, 16(4), 413\u2013430.","journal-title":"International Journal of Speech Technology"},{"key":"6373_CR78","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-17163-0","volume-title":"Language identification using spectral and prosodic features","author":"KS Rao","year":"2015","unstructured":"Rao, K. S., Reddy, V. R., & Maity, S. (2015). Language identification using spectral and prosodic features. Berlin: Springer."},{"key":"6373_CR79","first-page":"291","volume-title":"Perceptual features based rapid and robust language identification system for various indian classical languages. Computational vision and bio inspired computing","author":"A Revathi","year":"2018","unstructured":"Revathi, A., Jeyalakshmi, C., & Muruganantham, T. (2018). Perceptual features based rapid and robust language identification system for various indian classical languages. Computational vision and bio inspired computing (pp. 291\u2013305). Cham: Springer."},{"key":"6373_CR80","unstructured":"Richardson, F., Reynolds, D., & Dehak, N. (2015). A unified deep neural network for speaker and language recognition. arXiv preprint arXiv:1504.00923 ."},{"issue":"10","key":"6373_CR81","doi-asserted-by":"publisher","first-page":"1671","DOI":"10.1109\/LSP.2015.2420092","volume":"22","author":"F Richardson","year":"2015","unstructured":"Richardson, F., Reynolds, D., & Dehak, N. (2015). Deep neural network approaches to speaker and language recognition. IEEE Signal Processing Letters, 22(10), 1671\u20131675.","journal-title":"IEEE Signal Processing Letters"},{"key":"6373_CR82","doi-asserted-by":"publisher","first-page":"138","DOI":"10.1016\/j.specom.2015.04.005","volume":"72","author":"SO Sadjadi","year":"2015","unstructured":"Sadjadi, S. O., & Hansen, J. H. (2015). Mean Hilbert envelope coefficients (MHEC) for Robust speaker and language identification. Speech Communication, 72, 138\u2013148.","journal-title":"Speech Communication"},{"issue":"4","key":"6373_CR83","doi-asserted-by":"publisher","first-page":"543","DOI":"10.1016\/j.specom.2011.11.004","volume":"54","author":"M Sahidullah","year":"2012","unstructured":"Sahidullah, M., & Saha, G. (2012). Design, analysis and experimental evaluation of block based transformation in MFCC computation for speaker recognition. Speech Communication, 54(4), 543\u2013565.","journal-title":"Speech Communication"},{"issue":"4","key":"6373_CR84","first-page":"1532","volume":"5","author":"E Chandra","year":"2014","unstructured":"Sangwan, P. (2017). Feature Extraction for Speaker Recognition: A Systematic Study.\u00a0Global Journal of Enterprise Information System,\u00a09(4), 19\u201326.","journal-title":"Journal of Computer Technology and Applications"},{"issue":"3","key":"6373_CR85","doi-asserted-by":"publisher","first-page":"53","DOI":"10.18311\/gjeis\/2017\/16123","volume":"9","author":"P Sangwan","year":"2017","unstructured":"Sangwan, P., & Bhardwaj, S. (2017). A structured approach towards robust database collection for speaker recognition. Global Journal of Enterprise Information System, 9(3), 53\u201358.","journal-title":"Global Journal of Enterprise Information System"},{"issue":"3","key":"6373_CR86","doi-asserted-by":"publisher","first-page":"305","DOI":"10.1080\/02564602.2017.1293570","volume":"35","author":"BD Sarma","year":"2018","unstructured":"Sarma, B. D., & Prasanna, S. M. (2018). Acoustic\u2013phonetic analysis for speech recognition: A review. IETE Technical Review, 35(3), 305\u2013327.","journal-title":"IETE Technical Review"},{"key":"6373_CR87","doi-asserted-by":"publisher","first-page":"1118","DOI":"10.1109\/TASLP.2015.2419978","volume":"7","author":"V Segbroeck","year":"2015","unstructured":"Segbroeck, V., Travadi, M. R., & Narayanan, S. S. (2015). Rapid language identification. IEEE Transactions on Audio, Speech and Language Processing, 7, 1118\u20131129.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"issue":"4","key":"6373_CR88","doi-asserted-by":"publisher","first-page":"220","DOI":"10.1504\/IJSISE.2014.066600","volume":"7","author":"DP Sharma","year":"2014","unstructured":"Sharma, D. P., & Atkins, J. (2014). Automatic speech recognition systems: Challenges and recent implementation trends. International Journal of Signal and Imaging Systems Engineering, 7(4), 220\u2013234.","journal-title":"International Journal of Signal and Imaging Systems Engineering"},{"key":"6373_CR89","doi-asserted-by":"publisher","first-page":"1765","DOI":"10.1007\/s00521-011-0620-8","volume":"21","author":"M Sheikhan","year":"2012","unstructured":"Sheikhan, M., Gharavian, D., & Ashoftedel, F. (2012). Using DTW neural\u2013based MFCC warping to improve emotional speech recognition. Neural Computer and Application, 21, 1765\u20131773.","journal-title":"Neural Computer and Application"},{"issue":"5","key":"6373_CR90","doi-asserted-by":"publisher","first-page":"1029","DOI":"10.1109\/TASL.2008.924150","volume":"16","author":"KC Sim","year":"2008","unstructured":"Sim, K. C., & Li, H. (2008). On acoustic diversification front-end for spoken language identification. IEEE Transactions on Audio, Speech and Language Processing, 16(5), 1029\u20131037.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"key":"6373_CR91","unstructured":"Singer, E., et al. (2003). Acoustic phonetic and discriminative approaches to automatic language recognition. In Proceedings of the Eurospeech (pp. 1345\u20131348)."},{"key":"6373_CR92","doi-asserted-by":"crossref","unstructured":"Sivaraman, G., et al. (2016). Vocal tract length normalization for speaker independent acoustic-to-articulatory speech inversion. In INTERSPEECH (pp. 455\u2013459).","DOI":"10.21437\/Interspeech.2016-1399"},{"key":"6373_CR93","doi-asserted-by":"crossref","unstructured":"Song, Y., et al. (2015). Improved language identification using deep bottleneck network. In IEEE international conference in acoustics, speech and signal processing (ICASSP) (pp. 4200\u20134204). IEEE.","DOI":"10.1109\/ICASSP.2015.7178762"},{"key":"6373_CR94","doi-asserted-by":"crossref","unstructured":"Sundermeyer, M., Schl\u00fcter, R., & Ney, H. (2012). LSTM neural networks for language modeling. In Thirteenth annual conference of the international speech communication association, USA (pp. 194\u2013197).","DOI":"10.21437\/Interspeech.2012-65"},{"key":"6373_CR95","doi-asserted-by":"publisher","first-page":"80","DOI":"10.1016\/j.bspc.2014.10.008","volume":"18","author":"Y Sun","year":"2015","unstructured":"Sun, Y., Wen, G., & Wang, J. (2015). Weighted spectral features based on local Hu moments for speech emotion recognition. Journal of Biomedical Signal Processing and Control, 18, 80\u201390.","journal-title":"Journal of Biomedical Signal Processing and Control"},{"issue":"16","key":"6373_CR96","doi-asserted-by":"publisher","first-page":"2077","DOI":"10.1016\/j.patrec.2012.06.012","volume":"33","author":"Hidayet Tak\u00e7\u0131","year":"2012","unstructured":"Tak\u00e7\u0131, Hidayet, & G\u00fcng\u00f6r, T. (2012). A high performance centroid-based classification approach for language identification. Pattern Recognition Letters, 33(16), 2077\u20132084.","journal-title":"Pattern Recognition Letters"},{"issue":"1","key":"6373_CR97","doi-asserted-by":"publisher","first-page":"134","DOI":"10.1109\/TASLP.2017.2764271","volume":"26","author":"Z Tang","year":"2018","unstructured":"Tang, Z., et al. (2018). Phonetic temporal neural model for language identification. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 26(1), 134\u2013144.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"7","key":"6373_CR98","doi-asserted-by":"publisher","first-page":"2837","DOI":"10.1007\/s00034-017-0689-0","volume":"37","author":"R Thirumuru","year":"2018","unstructured":"Thirumuru, R., & Vuppala, A. K. (2018). Automatic detection of retroflex approximants in a continuous tamil speech. Circuits, Systems, and Signal Processing, 37(7), 2837\u20132851.","journal-title":"Circuits, Systems, and Signal Processing"},{"key":"6373_CR99","doi-asserted-by":"crossref","unstructured":"Torres-Carrasquillo, P. A., et al. (2002). Approaches to language identification using Gaussian mixture models and shifted delta cepstral features. In Seventh international conference on spoken language processing.","DOI":"10.21437\/ICSLP.2002-74"},{"key":"6373_CR100","doi-asserted-by":"publisher","first-page":"574","DOI":"10.1016\/j.procs.2015.06.066","volume":"54","author":"N Upadhyay","year":"2015","unstructured":"Upadhyay, N., & Karmakar, A. (2015). Speech enhancement using spectral subtraction-type algorithms: A comparison and simulation study. Procedia Computer Science, 54, 574\u2013584.","journal-title":"Procedia Computer Science"},{"issue":"4","key":"6373_CR101","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1007\/s10772-015-9295-3","volume":"18","author":"P Verma","year":"2015","unstructured":"Verma, P., & Das, P. K. (2015). i-Vectors in speech processing applications: A survey. International Journal of Speech Technology, 18(4), 529\u2013546.","journal-title":"International Journal of Speech Technology"},{"key":"6373_CR102","doi-asserted-by":"crossref","unstructured":"Viana, H. O., & Mello, C. A. (2014). Speech description through MINERS: Model invariant to noise and environment robust for speech. In 2014 IEEE international conference on systems, man and cybernatics (SMC) (pp. 489\u2013494). IEEE.","DOI":"10.1109\/SMC.2014.6973955"},{"key":"6373_CR103","doi-asserted-by":"crossref","unstructured":"Vu, N., Imseng, D., Povey, D., & Molicek, P., Schultz, T., & Bourlard, H. (2014). Multilingual deep neural network based acoustic modelling for rapid language adaptation. In 2014 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 7639\u20137643). IEEE.","DOI":"10.1109\/ICASSP.2014.6855086"},{"issue":"1","key":"6373_CR104","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1109\/LSP.2012.2227312","volume":"20","author":"H Wang","year":"2013","unstructured":"Wang, H., et al. (2013). Shifted-delta mlp features for spoken language recognition. IEEE Signal Processing Letters, 20(1), 15\u201318.","journal-title":"IEEE Signal Processing Letters"},{"key":"6373_CR105","unstructured":"Wang, H., Xu, Y., & Li, M. (2011). Study on the MFCC similarity-based voice activity detection algorithm. In 2nd international conference in artificial intelligence, management science and electronic commerce (AIMSEC) (pp. 4391\u20134394). IEEE."},{"issue":"4","key":"6373_CR106","doi-asserted-by":"publisher","first-page":"888","DOI":"10.1016\/j.csl.2014.01.001","volume":"28","author":"F Weninger","year":"2014","unstructured":"Weninger, F., et al. (2014). Feature enhancement by deep LSTM networks for ASR in reverberant multisource environments. Computer Speech and Language (Elsevier), 28(4), 888\u2013902.","journal-title":"Computer Speech and Language (Elsevier)"},{"key":"6373_CR107","doi-asserted-by":"crossref","unstructured":"Williamson, D. S., & Wang, D. (2017). Speech dereverberation and denoising using complex ratio masks. In 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 5590\u20135594). IEEE.","DOI":"10.1109\/ICASSP.2017.7953226"},{"key":"6373_CR108","unstructured":"Wong, K. Y. E. (2004). Automatic spoken language identification utilizing acoustic and phonetic speech information. Doctoral dissertation, Queensland University of Technology."},{"key":"6373_CR109","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1016\/j.specom.2007.07.006","volume":"50","author":"U Yapanel","year":"2008","unstructured":"Yapanel, U., & Hansen, J. (2008). A new perceptually motivated MVDR-based acoustic front-end (PMVDR) for robust automatic speech recognition. Journal of Speech Communication, 50, 142\u2013152.","journal-title":"Journal of Speech Communication"},{"key":"6373_CR110","doi-asserted-by":"crossref","unstructured":"Yilmaz, E., McLaren, M., van den Heuvel, H., & van Leeuwen, D. A. (2017). Language diarization for semi-supervised bilingual acoustic model training. Automatic speech recognition and understanding workshop (ASRU). 2017 IEEE (pp. 91\u201396). IEEE.","DOI":"10.1109\/ASRU.2017.8268921"},{"issue":"1","key":"6373_CR111","doi-asserted-by":"publisher","first-page":"e0146917","DOI":"10.1371\/journal.pone.0146917","volume":"11","author":"R Zazo","year":"2016","unstructured":"Zazo, R., et al. (2016). Language identification in short utterances using long short-term memory (LSTM) recurrent neural networks. PloS One, 11(1), e0146917.","journal-title":"PloS One"},{"issue":"4","key":"6373_CR112","doi-asserted-by":"publisher","first-page":"697","DOI":"10.1109\/TASL.2012.2229986","volume":"21","author":"XL Zhang","year":"2013","unstructured":"Zhang, X. L., & Wu, J. (2013). Deep belief networks based voice activity detection. IEEE Transactions on Audio, Speech and Language Processing, 21(4), 697\u2013710.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"}],"container-title":["Wireless Personal Communications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11277-019-06373-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11277-019-06373-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11277-019-06373-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,16]],"date-time":"2022-09-16T14:34:33Z","timestamp":1663338873000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11277-019-06373-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,4,22]]},"references-count":112,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2019,8]]}},"alternative-id":["6373"],"URL":"https:\/\/doi.org\/10.1007\/s11277-019-06373-3","relation":{},"ISSN":["0929-6212","1572-834X"],"issn-type":[{"value":"0929-6212","type":"print"},{"value":"1572-834X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,4,22]]},"assertion":[{"value":"22 April 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}