{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,12]],"date-time":"2026-06-12T16:52:37Z","timestamp":1781283157422,"version":"3.54.1"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2022,1,3]],"date-time":"2022-01-03T00:00:00Z","timestamp":1641168000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,3]],"date-time":"2022-01-03T00:00:00Z","timestamp":1641168000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1007\/s11042-021-11439-1","type":"journal-article","created":{"date-parts":[[2022,1,3]],"date-time":"2022-01-03T11:03:37Z","timestamp":1641207817000},"page":"9565-9595","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":40,"title":["Automatic spoken language identification using MFCC based time series features"],"prefix":"10.1007","volume":"82","author":[{"given":"Mainak","family":"Biswas","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Saif","family":"Rahaman","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ali","family":"Ahmadian","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kamalularifin","family":"Subari","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9598-7981","authenticated-orcid":false,"given":"Pawan Kumar","family":"Singh","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,1,3]]},"reference":[{"key":"11439_CR1","unstructured":"Abadi M, Agarwal A, Barham P, Brevdo E, Chen Z, Citro C, Corrado G, Davis A, Dean J, Devin M, Ghemawat S, Goodfellow I, Harp A, Irving G, Isard M, Jia Y, J\u00f3zefowicz R, Kaiser L, Kudlur M, Levenberg J, Man\u00e9 D, Monga R, Moore S, Murray D, Olah C, Schuster M, Shlens J, Steiner B, Sutskever I, Talwar K, Tucker P, Vanhoucke V, Vasudevan V, Vi\u00e9gas F, Vinyals O, Warden P, Wattenberg M, Wicke M, Yu Y, Zheng X (2016) TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems. ArXiv,\u00a0https:\/\/arxiv.org\/abs\/1603.04467."},{"key":"11439_CR2","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0194770","author":"MAA Albadr","year":"2018","unstructured":"Albadr MAA, Tiun S, AL-Dhief FT, Sammour MAM (2018) Spoken language identification based on the enhanced self-adjusting extreme learning machine approach. PLoS ONE. https:\/\/doi.org\/10.1371\/journal.pone.0194770","journal-title":"PLoS ONE"},{"key":"11439_CR3","doi-asserted-by":"publisher","DOI":"10.5772\/intechopen.80419","volume-title":"From natural to artificial intelligence-algorithms and applications","author":"SA Alim","year":"2018","unstructured":"Alim SA, Rashid NKA (2018) Some commonly used speech feature extraction algorithms. In: L\u00f3pez-Ruiz R (ed) From natural to artificial intelligence-algorithms and applications. IntechOpen, London. https:\/\/doi.org\/10.5772\/intechopen.80419"},{"key":"11439_CR4","doi-asserted-by":"publisher","unstructured":"Anjana JS, Poorna SS (2018) Language identification from speech features using SVM and LDA. 2018 International Conference on Wireless Communications, Signal Processing and Networking (WiSPNET), 1\u20134. https:\/\/doi.org\/10.1109\/WiSPNET.2018.8538638","DOI":"10.1109\/WiSPNET.2018.8538638"},{"key":"11439_CR5","unstructured":"Approximate Entropy. (n.d.). https:\/\/en.wikipedia.org\/wiki\/Approximate_entropy"},{"key":"11439_CR6","unstructured":"Baby A, Thomas A, Consortium TTS (2016). Resources for Indian languages. https:\/\/www.iitm.ac.in\/donlab\/tts\/database.php"},{"key":"11439_CR7","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1007\/978-981-13-3702-4_8","volume-title":"Advanced computing and systems for security","author":"B Barai","year":"2019","unstructured":"Barai B, Das D, Das N, Basu S, Nasipuri M (2019) VQ\/GMM-Based Speaker Identification with Emphasis on Language Dependency. Advanced computing and systems for security. Springer, Berlin, pp 125\u2013145"},{"issue":"4","key":"11439_CR8","doi-asserted-by":"publisher","first-page":"1165","DOI":"10.1214\/aos\/1013699998","volume":"29","author":"Y Benjamini","year":"2001","unstructured":"Benjamini Y, Yekutieli D (2001) The control of the false discovery rate in multiple testing under dependency. Ann Stat 29(4):1165\u20131188. https:\/\/doi.org\/10.1214\/aos\/1013699998","journal-title":"Ann Stat"},{"key":"11439_CR9","first-page":"311","volume-title":"Machine learning for intelligent multimedia analytics: techniques and applications","author":"M Biswas","year":"2021","unstructured":"Biswas M, Rahaman S, Kundu S, Singh PK, Sarkar R (2021) Spoken language identification of Indian languages using MFCC features. In: Kumar P, Singh AK (eds) Machine learning for intelligent multimedia analytics: techniques and applications. Springer, Singapore, pp 311\u2013323"},{"key":"11439_CR10","unstructured":"Christ M, Kempa-Liehr A, Feindt M (2016) Distributed and parallel time series feature extraction for industrial big data applications. ArXiv,\u00a0https:\/\/arxiv.org\/abs\/1610.07717."},{"issue":"4","key":"11439_CR11","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"Davis S, Mermelstein P (1980) Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Trans Acoust Speech Signal Process 28(4):357\u2013366. https:\/\/doi.org\/10.1109\/TASSP.1980.1163420","journal-title":"IEEE Trans Acoust Speech Signal Process"},{"key":"11439_CR12","doi-asserted-by":"publisher","unstructured":"Draghici A, Abe\u00dfer J, Lukashevich H (2020) A Study on Spoken Language Identification using Deep Neural Networks. Proceedings of the 15th International Conference on Audio Mostly. https:\/\/doi.org\/10.1145\/3411109.3411123","DOI":"10.1145\/3411109.3411123"},{"key":"11439_CR13","doi-asserted-by":"publisher","first-page":"541","DOI":"10.3390\/e21060541","volume":"21","author":"A Delgado-Bonal","year":"2019","unstructured":"Delgado-Bonal A, Marshak A (2019) Approximate entropy and sample entropy: a comprehensive tutorial. Entropy 21:541. https:\/\/doi.org\/10.3390\/e21060541","journal-title":"Entropy"},{"key":"11439_CR14","doi-asserted-by":"publisher","first-page":"114416","DOI":"10.1016\/j.eswa.2020.114416","volume":"168","author":"A Garain","year":"2021","unstructured":"Garain A, Singh PK, Sarkar R (2021) FuzzyGCP: a deep learning architecture for automatic spoken language identification from speech signals. Expert Systems with Applications 168:114416. https:\/\/doi.org\/10.1016\/j.eswa.2020.114416","journal-title":"Expert Systems with Applications"},{"key":"11439_CR15","doi-asserted-by":"publisher","first-page":"11","DOI":"10.5815\/ijitcs.2018.08.02","volume":"10","author":"V Gazeau","year":"2018","unstructured":"Gazeau V, Varol C (2018) Automatic spoken language recognition with neural networks. Int J Inf. Technol Comput Sci 10:11\u201317. https:\/\/doi.org\/10.5815\/ijitcs.2018.08.02","journal-title":"Int J Inf. Technol Comput Sci"},{"key":"11439_CR16","unstructured":"Ghosh A (2020). Ranked: the 100 most spoken languages worldwide. https:\/\/www.visualcapitalist.com\/100-most-spoken-languages\/"},{"key":"11439_CR17","first-page":"249","volume":"9","author":"X Glorot","year":"2010","unstructured":"Glorot X, Bengio Y (2010) Understanding the difficulty of training deep feedforward neural networks. J Mach Learn Res 9:249\u2013256","journal-title":"J Mach Learn Res"},{"key":"11439_CR18","doi-asserted-by":"crossref","unstructured":"Gupta M, Bharti SS, Agarwal S (2017) Implicit language identification system based on random forest and support vector machine for speech. 2017 4th International Conference on Power, Control & Embedded Systems (ICPCES), 1\u20136.","DOI":"10.1109\/ICPCES.2017.8117624"},{"key":"11439_CR19","doi-asserted-by":"publisher","unstructured":"Heracleous P, Takai K, Yasuda K, Mohammad Y, Yoneyama A (2018) Comparative study on spoken language identification based on deep learning. 2018 26th European Signal Processing Conference (EUSIPCO), 2265\u20132269. https:\/\/doi.org\/10.23919\/EUSIPCO.2018.8553347","DOI":"10.23919\/EUSIPCO.2018.8553347"},{"issue":"4","key":"11439_CR20","doi-asserted-by":"publisher","first-page":"1738","DOI":"10.1121\/1.399423","volume":"87","author":"H Hermansky","year":"1990","unstructured":"Hermansky H (1990) Perceptual linear predictive (PLP) analysis of speech. J Acoust Soc Am 87(4):1738\u20131752. https:\/\/doi.org\/10.1121\/1.399423","journal-title":"J Acoust Soc Am"},{"issue":"4","key":"11439_CR21","doi-asserted-by":"publisher","first-page":"578","DOI":"10.1109\/89.326616","volume":"2","author":"H Hermansky","year":"1994","unstructured":"Hermansky H, Morgan N (1994) RASTA processing of speech. IEEE Trans Speech Audio Process 2(4):578\u2013589. https:\/\/doi.org\/10.1109\/89.326616","journal-title":"IEEE Trans Speech Audio Process"},{"key":"11439_CR22","unstructured":"How many languages. (n.d.). https:\/\/www.ethnologue.com\/guides\/how-many-languages"},{"key":"11439_CR23","doi-asserted-by":"publisher","unstructured":"Jog AH, Jugade OA, Kadegaonkar AS, Birajdar GK (2018) Indian language identification using cochleagram based texture descriptors and ANN Classifier. 2018 15th IEEE India Council International Conference (INDICON), 1\u20136. https:\/\/doi.org\/10.1109\/INDICON45594.2018.8987167","DOI":"10.1109\/INDICON45594.2018.8987167"},{"key":"11439_CR24","unstructured":"Kingma DP, Ba JL (2015) Adam: a method for stochastic optimization. 3rd International Conference on Learning Representations, ICLR 2015 - Conference Track Proceedings, 1\u201315."},{"key":"11439_CR25","unstructured":"Krishna DN, Patil A, Raj M, SaiPrasad H S, Garapati PA (2020) Identification of Indian languages using ghost-VLAD pooling. https:\/\/www.researchgate.net\/publication\/339065645_Identification_of_Indian_Languages_using_Ghost-VLAD_pooling"},{"key":"11439_CR26","doi-asserted-by":"crossref","unstructured":"Korkut C, Haznedaroglu A, Arslan L (2020)\u00a0Comparison of Deep Learning Methods for Spoken Language Identification BT - Speech and Computer (A. Karpov & R. Potapova (Eds.); pp. 223\u2013231). Springer International Publishing.","DOI":"10.1007\/978-3-030-60276-5_23"},{"key":"11439_CR27","unstructured":"Kumar SK (2017) On weight initialization in deep neural networks. ArXiv,\u00a0https:\/\/arxiv.org\/abs\/1704.08863. 1\u20139."},{"key":"11439_CR28","unstructured":"Languages of India. (n.d.). https:\/\/en.wikipedia.org\/wiki\/Languages_of_India Accessed 21 Feb 2021"},{"key":"11439_CR29","doi-asserted-by":"crossref","unstructured":"Lopez-moreno I, Gonzalez-dominguez J, Plchot O, Martinez D, Gonzalez-rodriguez J, Moreno P (2014) Automatic language identification using deep neural networks. 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 5337\u20135341.","DOI":"10.1109\/ICASSP.2014.6854622"},{"issue":"4","key":"11439_CR30","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1109\/PROC.1975.9792","volume":"63","author":"J Makhoul","year":"1975","unstructured":"Makhoul J (1975) Linear prediction: a tutorial review. Proc IEEE 63(4):561\u2013580. https:\/\/doi.org\/10.1109\/PROC.1975.9792","journal-title":"Proc IEEE"},{"issue":"2","key":"11439_CR31","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/s10772-013-9209-1","volume":"17","author":"S Manchala","year":"2014","unstructured":"Manchala S, Prasad VK, Janaki V (2014) GMM based language identification system using robust features. Int J Speech Technol 17(2):99\u2013105. https:\/\/doi.org\/10.1007\/s10772-013-9209-1","journal-title":"Int J Speech Technol"},{"key":"11439_CR32","doi-asserted-by":"crossref","unstructured":"Martin A, Greenberg C (2010). The 2009 NIST language recognition evaluation. Odyssey 2010: Speaker and Language Recognition Workshop, 165\u2013171.","DOI":"10.21437\/Interspeech.2010-722"},{"key":"11439_CR33","doi-asserted-by":"publisher","unstructured":"McFee B, Lostanlen V, McVicar M, Metsai A, Balke S, Thom\u00e9 C, Raffel C, Malek A, Lee D, Zalkow F, Lee K, Nieto O, Mason J, Ellis D, Yamamoto R, Seyfarth S, Battenberg E, Mopo\u0437o\u0432 B, Bittner R et al (2020). librosa\/librosa: 0.7.2. https:\/\/doi.org\/10.5281\/ZENODO.3606573","DOI":"10.5281\/ZENODO.3606573"},{"key":"11439_CR34","first-page":"311","volume-title":"Handw\u00f6rterbuch pattern recognition and artificial intelligence","author":"P Mermelstein","year":"1976","unstructured":"Mermelstein P (1976) Distance measures for speech recognition, psychological and instrumental. Handw\u00f6rterbuch pattern recognition and artificial intelligence. Academic Press, Cambridge, pp 311\u2013323"},{"issue":"12","key":"11439_CR35","doi-asserted-by":"publisher","first-page":"8483","DOI":"10.1007\/s00521-019-04468-3","volume":"31","author":"H Mukherjee","year":"2019","unstructured":"Mukherjee H, Ghosh S, Sen S, SkMd O, Santosh KC, Phadikar S, Roy K (2019) Deep learning for spoken language identification: can we visualize speech signal patterns? Neural Comput Appl 31(12):8483\u20138501. https:\/\/doi.org\/10.1007\/s00521-019-04468-3","journal-title":"Neural Comput Appl"},{"key":"11439_CR36","doi-asserted-by":"publisher","DOI":"10.1007\/s13042-019-00928-3","author":"H Mukherjee","year":"2020","unstructured":"Mukherjee H, Obaidullah SM, Santosh KC, Phadikar S, Roy K (2020) A lazy learning-based language identification from speech using MFCC-2 features. Int J Mach Learn Cybern. https:\/\/doi.org\/10.1007\/s13042-019-00928-3","journal-title":"Int J Mach Learn Cybern"},{"key":"11439_CR37","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-019-08553-6","author":"H Mukherjee","year":"2020","unstructured":"Mukherjee H, Dhar A, Obaidullah SM, Phadikar S, Roy K (2020) Image-based features for speech signal classification. Multimedia Tools and Applications. https:\/\/doi.org\/10.1007\/s11042-019-08553-6","journal-title":"Multimedia Tools and Applications"},{"key":"11439_CR38","doi-asserted-by":"publisher","first-page":"1223","DOI":"10.1109\/TASLP.2020.2983580","volume":"28","author":"B Padi","year":"2020","unstructured":"Padi B, Mohan A, Ganapathy S (2020) Towards relevance and sequence modeling in language recognition. IEEE\/ACM Transact Audio, Speech Lang Process 28:1223\u20131232. https:\/\/doi.org\/10.1109\/TASLP.2020.2983580","journal-title":"IEEE\/ACM Transact Audio, Speech Lang Process"},{"key":"11439_CR39","doi-asserted-by":"crossref","unstructured":"Prahallad K, Kumar E, Keri V, Suyambu R, Black A (2012) The IIIT-H Indic Speech Databases, INTERSPEECH. http:\/\/festvox.org\/databases\/iiit_voices\/","DOI":"10.21437\/Interspeech.2012-659"},{"key":"11439_CR40","unstructured":"Revay S, Teschke M, Novetta (2019) Multiclass language identification using deep learning on spectral images of audio signals. ArXiv,\u00a0https:\/\/arxiv.org\/abs\/1905.04348. 1\u20137."},{"key":"11439_CR41","doi-asserted-by":"publisher","unstructured":"Sarthak, Shukla S, Mittal G (2019) Spoken language identification using convNets. Lecture Notes in Computer Science (Including Subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 11912 LNCS, 252\u2013265. https:\/\/doi.org\/10.1007\/978-3-030-34255-5_17","DOI":"10.1007\/978-3-030-34255-5_17"},{"key":"11439_CR42","volume-title":"Spectral analysis of signals","author":"P Stoica","year":"2005","unstructured":"Stoica P, Moses RL (2005) Spectral analysis of signals. Prentice Hall, Hoboken"},{"key":"11439_CR43","unstructured":"Strang G (2005) Linear algebra and its application. In Linear Algebra 4th Edition, chapter 3.5, pp. 211-221. http:\/\/facultymember.iaukhsh.ac.ir\/images\/Uploaded_files\/[Strang_G.]_Linear_algebra_and_its_applications(4)[5881001].PDF"},{"key":"11439_CR44","doi-asserted-by":"crossref","unstructured":"Titus A, Silovsky J, Chen N, Hsiao R, Young M, Ghoshal A. (2020). Improving Language Identification for Multilingual Speakers. https:\/\/arxiv.org\/pdf\/2001.11019.pdf","DOI":"10.1109\/ICASSP40776.2020.9053057"},{"key":"11439_CR45","unstructured":"van der Merwe R. (2020) Triplet entropy loss: improving the generalisation of short speech language identification systems. ArXiv, abs\/2012.03775"},{"key":"11439_CR46","unstructured":"VoxForge. (n.d.). http:\/\/www.voxforge.org\/ Accessed 25 Jan 2021"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-021-11439-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-021-11439-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-021-11439-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,2]],"date-time":"2023-03-02T16:16:13Z","timestamp":1677773773000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-021-11439-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,3]]},"references-count":46,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2023,3]]}},"alternative-id":["11439"],"URL":"https:\/\/doi.org\/10.1007\/s11042-021-11439-1","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,1,3]]},"assertion":[{"value":"5 February 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 August 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 August 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 January 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}