{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T06:19:40Z","timestamp":1763705980118,"version":"3.37.3"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2021,2,3]],"date-time":"2021-02-03T00:00:00Z","timestamp":1612310400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,2,3]],"date-time":"2021-02-03T00:00:00Z","timestamp":1612310400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100006144","name":"Kerala State Council for Science, Technology and Environment","doi-asserted-by":"publisher","award":["13-39\/BLP\/WSD\/KSCSTE\/2016-17"],"award-info":[{"award-number":["13-39\/BLP\/WSD\/KSCSTE\/2016-17"]}],"id":[{"id":"10.13039\/501100006144","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1007\/s10772-021-09807-1","type":"journal-article","created":{"date-parts":[[2021,2,3]],"date-time":"2021-02-03T16:04:06Z","timestamp":1612368246000},"page":"483-495","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["An acoustic model and linguistic analysis for Malayalam disyllabic words: a low resource language"],"prefix":"10.1007","volume":"24","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4643-5803","authenticated-orcid":false,"given":"K. R.","family":"Lekshmi","sequence":"first","affiliation":[]},{"given":"Elizabeth","family":"Sherly","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,2,3]]},"reference":[{"issue":"10","key":"9807_CR1","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1109\/TASLP.2014.2339736","volume":"22","author":"O Abdel-Hamid","year":"2014","unstructured":"Abdel-Hamid, O., Ar, Mohamed, Jiang, H., Deng, L., Penn, G., & Yu, D. (2014). Convolutional neural networks for speech recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 22(10), 1533\u20131545.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"doi-asserted-by":"crossref","unstructured":"Al-Qatab, B. A., & Ainon, R. N. (2010). Arabic speech recognition using hidden markov model toolkit (HTK). In 2010 international symposium on information technology (Vol.\u00a02, pp. 557\u2013562). IEEE.","key":"9807_CR2","DOI":"10.1109\/ITSIM.2010.5561391"},{"doi-asserted-by":"crossref","unstructured":"Badshah, A. M., Ahmad, J., Rahim, N., & Baik, S. W. (2017). Speech emotion recognition from spectrograms with deep convolutional neural network. In 2017 international conference on platform technology and service (PlatCon) (pp. 1\u20135). IEEE.","key":"9807_CR3","DOI":"10.1109\/PlatCon.2017.7883728"},{"unstructured":"Bae, S. H., Choi, I., & Kim, N. S. (2016). Acoustic scene classification using parallel combination of LSTM and CNN. In Proceedings of the detection and classification of acoustic scenes and events 2016 workshop (DCASE2016) (pp. 11\u201315).","key":"9807_CR4"},{"issue":"12","key":"9807_CR5","first-page":"307","volume":"2","author":"PV Bhaskar","year":"2012","unstructured":"Bhaskar, P. V., Rao, S. R. M., & Gopi, A. (2012). HTK based Telugu speech recognition. International Journal of Advanced Research in Computer Science and Software Engineering, 2(12), 307\u2013314.","journal-title":"International Journal of Advanced Research in Computer Science and Software Engineering"},{"issue":"5","key":"9807_CR6","doi-asserted-by":"publisher","first-page":"587","DOI":"10.1007\/s12046-011-0039-z","volume":"36","author":"P Bhaskararao","year":"2011","unstructured":"Bhaskararao, P. (2011). Salient phonetic features of Indian languages in speech technology. Sadhana, 36(5), 587\u2013599.","journal-title":"Sadhana"},{"doi-asserted-by":"crossref","unstructured":"Cummins, N., Amiriparian, S., Hagerer, G., Batliner, A., Steidl, S., & Schuller, B. W. (2017). An image-based deep spectrum feature representation for the recognition of emotional speech. In Proceedings of the 25th ACM international conference on multimedia (pp. 478\u2013484).","key":"9807_CR7","DOI":"10.1145\/3123266.3123371"},{"issue":"4","key":"9807_CR8","first-page":"359","volume":"9","author":"M Dua","year":"2012","unstructured":"Dua, M., Aggarwal, R., Kadyan, V., & Dua, S. (2012). Punjabi automatic speech recognition using HTK. International Journal of Computer Science Issues (IJCSI), 9(4), 359.","journal-title":"International Journal of Computer Science Issues (IJCSI)"},{"key":"9807_CR9","first-page":"47","volume":"15","author":"J George","year":"2015","unstructured":"George, J., Abraham, A., Arya, G., & Kumaraswami, S. (2015). Acoustic characteristics of stop consonants during fast and normal speaking rate in typically developing Malayalam speaking children. Language in India, 15, 47.","journal-title":"Language in India"},{"unstructured":"Gouws, E., Wolvaardt, K., Kleynhans, N., & Barnard, E. (2004). Appropriate baseline values for hmm-based speech recognition. In Proceedings of PRASA (pp. 169\u2013172).","key":"9807_CR10"},{"unstructured":"Gunawan, A., et\u00a0al. (2010). English digits speech recognition system based on hidden Markov models. In Proceedings of international conference computer and communication engineering (ICCCE) (pp. 1\u20135).","key":"9807_CR11"},{"doi-asserted-by":"crossref","unstructured":"Huang, Z., Dong, M., Mao, Q., & Zhan, Y. (2014). Speech emotion recognition using CNN. In Proceedings of the 22nd ACM international conference on multimedia (pp. 801\u2013804).","key":"9807_CR12","DOI":"10.1145\/2647868.2654984"},{"issue":"6","key":"9807_CR13","doi-asserted-by":"publisher","first-page":"4522","DOI":"10.1121\/1.4984595","volume":"141","author":"Q Hussain","year":"2017","unstructured":"Hussain, Q., Proctor, M., Harvey, M., & Demuth, K. (2017). Acoustic characteristics of Punjabi retroflex and dental stops. The Journal of the Acoustical Society of America, 141(6), 4522\u20134542.","journal-title":"The Journal of the Acoustical Society of America"},{"doi-asserted-by":"crossref","unstructured":"Keselj, V. (2009). Speech and language processing Daniel Jurafsky and James H. Martin (Stanford University and University of Colorado at Boulder) Pearson Prentice Hall, 2009, xxxi+ 988 pp; hardbound, ISBN 978-0-13-187321-6, 115.00","key":"9807_CR14","DOI":"10.1162\/coli.B09-001"},{"issue":"6","key":"9807_CR15","doi-asserted-by":"publisher","first-page":"3221","DOI":"10.1121\/1.5081686","volume":"144","author":"A Kochetov","year":"2018","unstructured":"Kochetov, A., Tabain, M., Sreedevi, N., & Beare, R. (2018). Manner and place differences in Kannada coronal consonants: Articulatory and acoustic results. The Journal of the Acoustical Society of America, 144(6), 3221\u20133235.","journal-title":"The Journal of the Acoustical Society of America"},{"issue":"1","key":"9807_CR16","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1504\/IJCSYSE.2012.044740","volume":"1","author":"K Kumar","year":"2012","unstructured":"Kumar, K., Aggarwal, R., & Jain, A. (2012). A Hindi speech recognition system for connected words using HTK. International Journal of Computational Systems Engineering, 1(1), 25\u201332.","journal-title":"International Journal of Computational Systems Engineering"},{"doi-asserted-by":"crossref","unstructured":"Kurian, C., & Balakrishnan, K. (2009). Speech recognition of Malayalam numbers. In 2009 world congress on nature & biologically inspired computing (NaBIC) (pp. 1475\u20131479). IEEE.","key":"9807_CR17","DOI":"10.1109\/NABIC.2009.5393692"},{"key":"9807_CR18","doi-asserted-by":"publisher","first-page":"1081","DOI":"10.1016\/j.proeng.2012.01.966","volume":"30","author":"C Kurian","year":"2012","unstructured":"Kurian, C., & Balakrishnan, K. (2012). Development & evaluation of different acoustic models for Malayalam continuous speech recognition. Procedia Engineering, 30, 1081\u20131088.","journal-title":"Procedia Engineering"},{"issue":"6","key":"9807_CR19","doi-asserted-by":"publisher","first-page":"1339","DOI":"10.1007\/s12046-013-0160-2","volume":"38","author":"C Kurian","year":"2013","unstructured":"Kurian, C., & Balakrishnan, K. (2013). Connected digit speech recognition system for Malayalam language. Sadhana, 38(6), 1339\u20131346.","journal-title":"Sadhana"},{"issue":"8","key":"9807_CR20","doi-asserted-by":"publisher","first-page":"2203","DOI":"10.1109\/TMM.2014.2360798","volume":"16","author":"Q Mao","year":"2014","unstructured":"Mao, Q., Dong, M., Huang, Z., & Zhan, Y. (2014). Learning salient features for speech emotion recognition using convolutional neural networks. IEEE Transactions on Multimedia, 16(8), 2203\u20132213.","journal-title":"IEEE Transactions on Multimedia"},{"unstructured":"MATLAB. (2019). Matlab. Natick, MA: The MathWorks.","key":"9807_CR21"},{"unstructured":"Maxwell, O., Baker, B., Bundgaard-Nielsen, R., & Fletcher, J. (2015). A comparison of the acoustics of nonsense and real word stimuli: Coronal stops in Bengali. International Phonetics Society.","key":"9807_CR22"},{"unstructured":"Nazer, S., & Suresh, S. (2017). Acoustic analysis of nasal consonants during fast and normal speaking rate in Malayalam speaking adults. International Journal of Advance Research, Ideas and Innovations In Technology.","key":"9807_CR23"},{"key":"9807_CR24","first-page":"265","volume":"31","author":"M Ohala","year":"2001","unstructured":"Ohala, M., & Ohala, J. (2001). Acoustic VC transitions correlate with degree of perceptual confusion of place contrast in Hindi. Travaux du cercle Linguistique de Copenhague, 31, 265\u2013284.","journal-title":"Travaux du cercle Linguistique de Copenhague"},{"key":"9807_CR25","volume-title":"Speech communications: Human and machine","author":"D O\u2019shaughnessy","year":"1987","unstructured":"O\u2019shaughnessy, D. (1987). Speech communications: Human and machine. Piscataway: IEEE, Universities Press."},{"unstructured":"Palaz, D., Collobert, R., & Doss, M. M. (2013). Estimating phoneme class conditional probabilities from raw speech signal using convolutional neural networks. arXiv preprint arXiv:13041018.","key":"9807_CR26"},{"doi-asserted-by":"crossref","unstructured":"Palaz, D., Collobert, R., et al. (2015a). Analysis of CNN-based speech recognition system using raw speech as input. Technical report, Idiap.","key":"9807_CR27","DOI":"10.21437\/Interspeech.2015-3"},{"doi-asserted-by":"crossref","unstructured":"Palaz, D., Doss, M. M., & Collobert R. (2015b). Convolutional neural networks-based continuous speech recognition using raw speech signal. In 2015 IEEE international conference on acoustics (pp. 4295\u20134299). IEEE: Speech and Signal Processing (ICASSP).","key":"9807_CR28","DOI":"10.1109\/ICASSP.2015.7178781"},{"doi-asserted-by":"crossref","unstructured":"Passricha, V., & Aggarwal, R. K. (2018). Convolutional neural networks for raw speech recognition. In From natural to artificial intelligence-algorithms and applications. IntechOpen","key":"9807_CR29","DOI":"10.5772\/intechopen.80026"},{"issue":"8","key":"9807_CR30","doi-asserted-by":"publisher","first-page":"1816","DOI":"10.1109\/TIFS.2017.2689724","volume":"12","author":"H Qin","year":"2017","unstructured":"Qin, H., & El-Yacoubi, M. A. (2017). Deep representation-based feature extraction and recovering for finger-vein verification. IEEE Transactions on Information Forensics and Security, 12(8), 1816\u20131829.","journal-title":"IEEE Transactions on Information Forensics and Security"},{"unstructured":"Rabiner, L. (1993). Fundamentals of speech recognition. Fundamentals of speech recognition.","key":"9807_CR31"},{"doi-asserted-by":"crossref","unstructured":"Ramachandran, L. K., & Elizabeth, S. (2018). Generation of GMM weights by dirichlet distribution and model selection using information criterion for Malayalam speech recognition. In International conference on intelligent human computer interaction (pp 111\u2013122). Springer.","key":"9807_CR32","DOI":"10.1007\/978-3-030-04021-5_11"},{"key":"9807_CR33","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1016\/j.neunet.2014.08.005","volume":"64","author":"TN Sainath","year":"2015","unstructured":"Sainath, T. N., Kingsbury, B., Saon, G., Soltau, H., Ar, Mohamed, Dahl, G., et al. (2015). Deep convolutional neural networks for large-scale speech tasks. Neural Networks, 64, 39\u201348.","journal-title":"Neural Networks"},{"issue":"6","key":"9807_CR34","first-page":"2223","volume":"4","author":"P Saini","year":"2013","unstructured":"Saini, P., Kaur, P., & Dua, M. (2013). Hindi automatic speech recognition using HTK. International Journal of Engineering Trends and Technology (IJETT), 4(6), 2223\u20132229.","journal-title":"International Journal of Engineering Trends and Technology (IJETT)"},{"doi-asserted-by":"crossref","unstructured":"Schl\u00fcter, J., & B\u00f6ck, S. (2014). Improved musical onset detection with convolutional neural networks. In 2014 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 6979\u20136983). IEEE.","key":"9807_CR35","DOI":"10.1109\/ICASSP.2014.6854953"},{"issue":"11","key":"9807_CR36","doi-asserted-by":"publisher","first-page":"4615","DOI":"10.1007\/s12652-018-1146-z","volume":"10","author":"S Singhal","year":"2019","unstructured":"Singhal, S., Passricha, V., Sharma, P., & Aggarwal, R. K. (2019). Multi-level region-of-interest CNNS for end to end speech recognition. Journal of Ambient Intelligence and Humanized Computing, 10(11), 4615\u20134624.","journal-title":"Journal of Ambient Intelligence and Humanized Computing"},{"issue":"9","key":"9807_CR37","doi-asserted-by":"publisher","first-page":"1120","DOI":"10.1109\/LSP.2014.2325781","volume":"21","author":"P Swietojanski","year":"2014","unstructured":"Swietojanski, P., Ghoshal, A., & Renals, S. (2014). Convolutional neural networks for distant speech recognition. IEEE Signal Processing Letters, 21(9), 1120\u20131124.","journal-title":"IEEE Signal Processing Letters"},{"issue":"2","key":"9807_CR38","doi-asserted-by":"publisher","first-page":"890","DOI":"10.1121\/1.4941659","volume":"139","author":"M Tabain","year":"2016","unstructured":"Tabain, M., Butcher, A., Breen, G., & Beare, R. (2016). An acoustic study of nasal consonants in three central Australian languages. The Journal of the Acoustical Society of America, 139(2), 890\u2013903.","journal-title":"The Journal of the Acoustical Society of America"},{"unstructured":"Wikipedia Contributors. (2020a). Malayalam\u2014Wikipedia, the free encyclopedia. Retrieved March 4, 2020, from https:\/\/en.wikipedia.org\/w\/index.php?title=Malayalam&oldid=943360760.","key":"9807_CR39"},{"unstructured":"Wikipedia Contributors. (2020b). Spectrogram\u2014Wikipedia, the free encyclopedia. Retrieved March 4, 2020, from https:\/\/en.wikipedia.org\/w\/index.php?title=Spectrogram&oldid=941764840.","key":"9807_CR40"},{"key":"9807_CR41","volume-title":"Automatic speech recognition","author":"D Yu","year":"2016","unstructured":"Yu, D., & Deng, L. (2016). Automatic speech recognition. Berlin: Springer."},{"doi-asserted-by":"crossref","unstructured":"Zheng, W., Yu, J., Zou, Y. (2015). An experimental study of speech emotion recognition based on deep convolutional neural networks. In 2015 international conference on affective computing and intelligent interaction (ACII) (pp. 827\u2013831). IEEE.","key":"9807_CR42","DOI":"10.1109\/ACII.2015.7344669"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-021-09807-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-021-09807-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-021-09807-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,14]],"date-time":"2022-12-14T15:50:12Z","timestamp":1671033012000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-021-09807-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,2,3]]},"references-count":42,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2021,6]]}},"alternative-id":["9807"],"URL":"https:\/\/doi.org\/10.1007\/s10772-021-09807-1","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"type":"print","value":"1381-2416"},{"type":"electronic","value":"1572-8110"}],"subject":[],"published":{"date-parts":[[2021,2,3]]},"assertion":[{"value":"20 April 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 January 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 February 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}