{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,23]],"date-time":"2025-12-23T15:43:27Z","timestamp":1766504607099},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2018,2,16]],"date-time":"2018-02-16T00:00:00Z","timestamp":1518739200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2018,6]]},"DOI":"10.1007\/s10772-018-9497-6","type":"journal-article","created":{"date-parts":[[2018,2,16]],"date-time":"2018-02-16T11:23:41Z","timestamp":1518780221000},"page":"211-216","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":37,"title":["Continuous Punjabi speech recognition model based on Kaldi ASR toolkit"],"prefix":"10.1007","volume":"21","author":[{"given":"Jyoti","family":"Guglani","sequence":"first","affiliation":[]},{"given":"A. N.","family":"Mishra","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,2,16]]},"reference":[{"key":"9497_CR2","doi-asserted-by":"crossref","unstructured":"Allauzen, C., Riley, M., Schalkwyk, J., Skut, W., & Mohri, M. (2007). OpenFst: A general and efficient weighted finitestate transducer library. In Proc. CIAA.","DOI":"10.1007\/978-3-540-76336-9_3"},{"issue":"4","key":"9497_CR3","doi-asserted-by":"publisher","first-page":"567","DOI":"10.1109\/89.326615","volume":"2","author":"JB Allen","year":"1994","unstructured":"Allen, J. B. (1994). How do humans process and recognize speech. IEEE Transactions on Speech and Audio Processing, 2(4), 567\u2013576.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9497_CR1","doi-asserted-by":"crossref","unstructured":"Becerra, A., de la Rosa, J. I., & Gonz\u00e1lez, E. (2016). A case study of speech recognition in Spanish: From conventional to deep approach. In IEEE ANDESCON.","DOI":"10.1109\/ANDESCON.2016.7836212"},{"key":"9497_CR13","doi-asserted-by":"crossref","unstructured":"Bezoui, M., Elmoutaouakkil, A., & Beni-hssane, A. (2016). Feature extraction of some Quranic recitation using mel-frequency cepstral coeficients (MFCC). In 5th International Conference on Multimedia Computing and Systems ICMCS.","DOI":"10.1109\/ICMCS.2016.7905619"},{"issue":"16","key":"9497_CR26","first-page":"582","volume":"16","author":"W Chen","year":"2009","unstructured":"Chen, W., Zhenjiang, M., & Xiao, M. (2009). Comparison of different implementations of MFCC. Journal of Computer Science and Technology, 16(16), 582\u2013589.","journal-title":"Journal of Computer Science and Technology"},{"key":"9497_CR4","unstructured":"Chourasia, V., Samudravijaya, K., Ingle, M., & Chandwani, M. (2007). Hindi speech recognition under noisy conditions. In International Journal of Acoustic Society India (pp.\u00a041\u201346)."},{"key":"9497_CR28","doi-asserted-by":"crossref","unstructured":"Chow, Y.-L. (1990). Maximum mutual information estimation of HMM parameters for continuous speech recognition using the N-best algorithm. In IEEE 1990 International Conference on Acoustics, Speech, and Signal Processing, 1990 (ICASSP-90) (pp.\u00a0701\u2013704). IEEE.","DOI":"10.1109\/ICASSP.1990.115863"},{"key":"9497_CR14","doi-asserted-by":"crossref","unstructured":"Cosi, P. (2015). A KALDI-DNN-based ASR system for Italian. In International Joint Conference on Neural Networks IJCNN.","DOI":"10.1109\/IJCNN.2015.7280336"},{"key":"9497_CR21","doi-asserted-by":"crossref","unstructured":"Gopinath, R. A. (1998). Maximum likelihood modeling with Gaussian distributions for classification. In Proceedings of the 1998 IEEE International Conference on Acoustics, Speech and Signal Processing, 1998 (Vol.\u00a02, pp.\u00a0661\u2013664). IEEE.","DOI":"10.1109\/ICASSP.1998.675351"},{"key":"9497_CR8","doi-asserted-by":"publisher","first-page":"1738","DOI":"10.1121\/1.399423","volume":"87","author":"H Hermansky","year":"1990","unstructured":"Hermansky, H. (1990). Perceptual linear prediction (PLP) analysis of speech. Journal of Acoustic Society America, 87, 1738\u20131752.","journal-title":"Journal of Acoustic Society America"},{"key":"9497_CR9","doi-asserted-by":"crossref","unstructured":"Kipyatkova, I., & Karpov, A. (2016). DNN-based acoustic modeling for Russian speech recognition using Kaldi. In International Conference on Speech and Computer SPECOM (pp.\u00a0246\u2013253).","DOI":"10.1007\/978-3-319-43958-7_29"},{"key":"9497_CR7","doi-asserted-by":"crossref","unstructured":"Kou, H., & Shang, W. (2014). Parallelized feature extraction and acoustic model training. In Digital Signal Processing. Proceedings ICDSP. IEEE.","DOI":"10.1109\/ICDSP.2014.6900717"},{"key":"9497_CR10","doi-asserted-by":"crossref","unstructured":"Lee, A., Kawahara, T., & Shikano, K. (2001). Julius\u2014An open source realtime, large vocabulary recognition engine. In EUROSPEECH (pp.\u00a01691\u20131694).","DOI":"10.21437\/Eurospeech.2001-396"},{"key":"9497_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/S0167-6393(97)00021-6","volume":"22","author":"RP Lippman","year":"1997","unstructured":"Lippman, R. P. (1997). Speech recognition by machines and humans. Speech Communication, 22, 1\u201315.","journal-title":"Speech Communication"},{"issue":"1","key":"9497_CR12","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1006\/csla.2001.0184","volume":"20","author":"M Mohri","year":"2002","unstructured":"Mohri, M., Pereira, F., & Riley, M. (2002). Weighted finite-state transducers in speech recognition. Computer Speech and Language, 20(1), 69\u201388.","journal-title":"Computer Speech and Language"},{"key":"9497_CR15","unstructured":"Povey, D. (2003). Discriminative training for large vocabulary speech recognition, PhD thesis, Cambridge University Engineering Department."},{"key":"9497_CR6","doi-asserted-by":"crossref","unstructured":"Povey, D., Gales, M. J. F., Kim, D. Y., & Woodland, P. C. (2003). MMI-MAP and MPE-MAP for acoustic model adaptation. In INTERSPEECH.","DOI":"10.21437\/Eurospeech.2003-572"},{"key":"9497_CR16","unstructured":"Povey, D., Ghoshal, A., Boulianne, G., Burget, L., Glembek, O., Goel, N., Hannemann, M., Motlicek, P., Qian, Y., Schwarz, P., & Silovsky, J. (2011). The Kaldi speech recognition toolkit. In IEEE 2011 Workshop on Automatic Speech Recognition and Understanding (No. EPFL-CONF192584). IEEE Signal Processing Society."},{"key":"9497_CR17","doi-asserted-by":"crossref","unstructured":"Povey, D., Hannemann, M., Boulianne, G., Burget, L., Ghoshal, A., Janda, M., Karafit, M., Kombrink, S., Motlek, P., Qian, Y., & Riedhammer, K. (2012). Generating exact lattices in the WFST framework. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp.\u00a04213\u20134216).","DOI":"10.1109\/ICASSP.2012.6288848"},{"key":"9497_CR5","doi-asserted-by":"crossref","unstructured":"Povey, D., Kanevsky, D., Kingsbury, B., Ramabhadran, B., Saon, G., & Visweswariah, K. (2008a). Boosted MMI for model and feature-space discriminative training, In IEEE International Conference on Acoustics, Speech and Signal Processing, 2008 (ICASSP 2008) (pp.\u00a04057\u20134060). IEEE.","DOI":"10.1109\/ICASSP.2008.4518545"},{"key":"9497_CR18","doi-asserted-by":"crossref","unstructured":"Povey, D., Kanevsky, D., Kingsbury, B., Ramabhadran, B., Saon, G., & Visweswariah, K. (2008b). Boosted MMI for model and feature-space discriminative training, In ICASSP.","DOI":"10.1109\/ICASSP.2008.4518545"},{"key":"9497_CR19","volume-title":"Minimum phone error and ismoothing for improved discriminative training","author":"D Povey","year":"2002","unstructured":"Povey, D., & Woodland, P. C. (2002). Minimum phone error and ismoothing for improved discriminative training. Cambridge: Cambridge University Engineering Department."},{"key":"9497_CR20","volume-title":"Fundamental of speech recognition","author":"LR Rabiner","year":"2003","unstructured":"Rabiner, L. R., & Juang, B. H. (2003). Fundamental of speech recognition (1st\u00a0edn.). Delhi: Pearson Education.","edition":"1"},{"key":"9497_CR22","doi-asserted-by":"crossref","unstructured":"Rybach, D., Gollan, C., Heigold, G., Hoffmeister, B., Loof, J., Schluter, R., & Ney, H. (2009) The RWTH Aachen University open source speech recognition system. In INTERSPEECH (pp.\u00a02111\u20132114).","DOI":"10.21437\/Interspeech.2009-604"},{"issue":"4","key":"9497_CR24","doi-asserted-by":"publisher","first-page":"609","DOI":"10.1515\/aoa-2015-0061","volume":"40","author":"P Upadhyaya","year":"2015","unstructured":"Upadhyaya, P., Farooq, O., Abidi, M. R., & Varshney, P. (2015). Comparative study of visual feature for bimodal Hindi speech recognition. Archives of Acoustics, 40(4), 609\u2013619.","journal-title":"Archives of Acoustics"},{"key":"9497_CR25","unstructured":"Walker, W., Lamere, P., Kwok, P., Raj, B., Singh, R., Gouvea, E., Wolf, P., & Woelfel, J. (2004) Sphinx-4: A flexible open source framework for speech recognition. Sun Microsystems Inc., Technical Report SML1 TR20040811."},{"key":"9497_CR23","unstructured":"Yadava, G. T., & Jayanna, H. S. (2016). Development and comparison of ASR models using Kaldi for noisy and enhanced kannada speech. In International Conference on Advances in Computing, Communications and Informatics ICACCI (pp.\u00a0635\u2013644)."},{"key":"9497_CR27","volume-title":"The HTK book (for version 3.4)","author":"G Young","year":"2009","unstructured":"Young, G., Evermann, G., Gales, M., Hain, T., Kershaw, D., Liu, X., Moore, G., Odell, J., Ollason, D., Povey, D., Valtchev, V., & Woodland, P. (2009). The HTK book (for version 3.4). Cambridge: Cambridge University Engineering Department."}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-018-9497-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-018-9497-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-018-9497-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,14]],"date-time":"2022-08-14T12:40:51Z","timestamp":1660480851000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-018-9497-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,2,16]]},"references-count":28,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2018,6]]}},"alternative-id":["9497"],"URL":"https:\/\/doi.org\/10.1007\/s10772-018-9497-6","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,2,16]]},"assertion":[{"value":"4 December 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 February 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 February 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}