{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T22:04:32Z","timestamp":1776809072179,"version":"3.51.2"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2012,2,3]],"date-time":"2012-02-03T00:00:00Z","timestamp":1328227200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2012,6]]},"DOI":"10.1007\/s10772-012-9131-y","type":"journal-article","created":{"date-parts":[[2012,2,2]],"date-time":"2012-02-02T15:01:44Z","timestamp":1328194904000},"page":"165-180","source":"Crossref","is-referenced-by-count":19,"title":["Integration of multiple acoustic and language models for improved Hindi speech recognition system"],"prefix":"10.1007","volume":"15","author":[{"given":"R. K.","family":"Aggarwal","sequence":"first","affiliation":[]},{"given":"M.","family":"Dave","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2012,2,3]]},"reference":[{"key":"9131_CR1","doi-asserted-by":"crossref","first-page":"297","DOI":"10.1007\/s10772-011-9108-2","volume":"14","author":"R. K. Aggarwal","year":"2011","unstructured":"Aggarwal, R. K., & Dave, M. (2011a). Acoustic modeling problem for automatic speech recognition system: conventional methods (Part\u00a0I). International Journal of Speech Technology, 14, 297\u2013308.","journal-title":"International Journal of Speech Technology"},{"key":"9131_CR2","doi-asserted-by":"crossref","first-page":"309","DOI":"10.1007\/s10772-011-9106-4","volume":"14","author":"R. K. Aggarwal","year":"2011","unstructured":"Aggarwal, R. K., & Dave, M. (2011b). Acoustic modeling problem for automatic speech recognition system: advances and refinements (Part II). International Journal of Speech Technology, 14, 309\u2013320.","journal-title":"International Journal of Speech Technology"},{"issue":"1","key":"9131_CR3","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1006\/csla.2001.0185","volume":"16","author":"X. L. Aubert","year":"2002","unstructured":"Aubert, X. L. (2002). An overview of decoding techniques for large vocabulary continuous speech recognition. Computer Speech and Language, 16(1), 89\u2013114.","journal-title":"Computer Speech and Language"},{"key":"9131_CR4","volume-title":"Speech recognition theory and C++ implementation","author":"C. Becchetti","year":"2004","unstructured":"Becchetti, C., & Ricotti, K. P. (2004). Speech recognition theory and C++ implementation. New York: Wiley."},{"key":"9131_CR5","doi-asserted-by":"crossref","first-page":"1742","DOI":"10.1016\/j.patrec.2008.05.008","volume":"29","author":"A. Benouareth","year":"2008","unstructured":"Benouareth, A., Ennaji, A., & Sellami, M. (2008). Semi-continuous HMMs with explicit state duration for unconstrained Arabic word modeling and recognition. Pattern Recognition Letters, 29, 1742\u20131752.","journal-title":"Pattern Recognition Letters"},{"key":"9131_CR6","first-page":"481","volume-title":"Proceedings ICASSP","author":"P. Beyerlein","year":"1998","unstructured":"Beyerlein, P. (1998). Discriminative model combination. In Proceedings ICASSP (pp. 481\u2013484)."},{"key":"9131_CR7","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1016\/0167-6393(92)90018-3","volume":"11","author":"H. Bourlard","year":"1992","unstructured":"Bourlard, H., Morgan, N., & Renals, S. (1992). Neural nets and hidden Markov models: review and generalizations. Speech Communication, 11, 237\u2013246.","journal-title":"Speech Communication"},{"key":"9131_CR8","unstructured":"Chopde, A. (2009). Itrans Indian language transliteration package version, 5.2 source. http:\/\/www.aczone.com\/itrans\/ ."},{"key":"9131_CR9","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S. Davis","year":"1980","unstructured":"Davis, S., & Mermelstein, P. (1980). Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Transactions on Acoustics, Speech, and Signal Processing, 28, 357\u2013366.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"9131_CR10","first-page":"537","volume-title":"Proceeding of IEEE ICASSP","author":"V. V. Digalakis","year":"1994","unstructured":"Digalakis, V. V., & Murveit, H. (1994). Genones: Optimization the degree of tying in a large vocabulary HMM-based speech recognizer. In Proceeding of IEEE ICASSP (pp. 537\u2013540)."},{"key":"9131_CR11","doi-asserted-by":"crossref","first-page":"347","DOI":"10.1109\/ASRU.1997.659110","volume-title":"Proceeding of the IEEE workshop on automatic speech recognition and understanding (ASRU\u201997)","author":"J. Fiscus","year":"1997","unstructured":"Fiscus, J. (1997). A post-processing system to yield reduced word error rates: recognizer output voting error reduction (ROVER). In Proceeding of the IEEE workshop on automatic speech recognition and understanding (ASRU\u201997), Santa Barbara (pp. 347\u2013352)."},{"issue":"3","key":"9131_CR12","doi-asserted-by":"crossref","first-page":"508","DOI":"10.1109\/TASL.2008.916519","volume":"16","author":"G. Garau","year":"2008","unstructured":"Garau, G., & Renals, S. (2008). Combining spectral representations for large-vocabulary continuous speech recognition. IEEE Transactions on Audio, Speech, and Language Processing, 16(3), 508\u2013518.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9131_CR13","first-page":"13","volume-title":"Proceedings of ICASSP","author":"R. Haeb-Umbach","year":"1992","unstructured":"Haeb-Umbach, R., & Ney, H. (1992). Linear discriminant analysis for improved large vocabulary continuous speech recognition. In Proceedings of ICASSP (pp. 13\u201316)."},{"issue":"3","key":"9131_CR14","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1016\/j.csl.2003.12.002","volume":"19","author":"A. Hagen","year":"2005","unstructured":"Hagen, A., & Morris, A. (2005). Recent advances in the multi-stream HMM\/ANN hybrid approach to noise robust ASR. Computer Speech and Language, 19(3), 3\u201330.","journal-title":"Computer Speech and Language"},{"key":"9131_CR15","doi-asserted-by":"crossref","first-page":"1738","DOI":"10.1121\/1.399423","volume":"87","author":"H. Hermansky","year":"1990","unstructured":"Hermansky, H. (1990). Perceptually predictive (PLP) analysis of speech. The Journal of the Acoustical Society of America, 87, 1738\u20131752.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"9131_CR16","volume-title":"Proceeding IEEE ICASSP","author":"K. Kirchhoff","year":"2000","unstructured":"Kirchhoff, K., Fink, G. A., & Sagerer, G. (2000). Conversational speech recognition using acoustic and articulatory input. In Proceeding IEEE ICASSP, Istanbul, Turkey."},{"key":"9131_CR17","volume-title":"ISCA ITRW workshop on automatic speech recognition: challenges for the new mllennium","author":"K. Kirchhoff","year":"2000","unstructured":"Kirchhoff, K., & Bilmes, J. A. (2000). Combination and joint Training of acoustic classifiers for speech Recognition. In ISCA ITRW workshop on automatic speech recognition: challenges for the new mllennium, Paris, France."},{"key":"9131_CR18","doi-asserted-by":"crossref","first-page":"283","DOI":"10.1016\/S0167-6393(98)00061-2","volume":"26","author":"N. Kumar","year":"1998","unstructured":"Kumar, N., & Andreou, A. G. (1998). Heteroscedastic discriminant analysis and reduced rank HMMs for improved speech recognition. Speech Communication, 26, 283\u2013297.","journal-title":"Speech Communication"},{"issue":"2","key":"9131_CR19","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1006\/csla.1995.0010","volume":"9","author":"C. J. Leggetter","year":"1995","unstructured":"Leggetter, C. J., & Woodland, P. (1995). Speaker adaptation using maximum likelihood linear regression. Computer Speech and Language, 9(2), 171\u2013185.","journal-title":"Computer Speech and Language"},{"issue":"4","key":"9131_CR20","doi-asserted-by":"crossref","first-page":"373","DOI":"10.1006\/csla.2000.0152","volume":"14","author":"L. Mangu","year":"2000","unstructured":"Mangu, L., Brill, E., & Stolcke, A. (2000). Finding consensus in speech recognition: word error minimization and other application of confusion network. Computer Speech and Language, 14(4), 373\u2013400.","journal-title":"Computer Speech and Language"},{"key":"9131_CR21","doi-asserted-by":"crossref","first-page":"2965","DOI":"10.1016\/j.patcog.2008.05.008","volume":"41","author":"D. O\u2019Shaughnessy","year":"2008","unstructured":"O\u2019Shaughnessy, D. (2008). Automatic speech recognition: history, methods and challenges. Pattern Recognition, 41, 2965\u20132979. Invited paper.","journal-title":"Pattern Recognition"},{"issue":"4","key":"9131_CR22","doi-asserted-by":"crossref","first-page":"42","DOI":"10.1109\/MC.2002.993770","volume":"35","author":"M. Padmanabhan","year":"2002","unstructured":"Padmanabhan, M., & Picheny, M. (2002). Large vocabulary speech recognition algorithms. IEEE Computer Society, 35(4), 42\u201350.","journal-title":"IEEE Computer Society"},{"key":"9131_CR23","doi-asserted-by":"crossref","first-page":"379","DOI":"10.1016\/0885-2308(91)90005-B","volume":"5","author":"G. V. R. Rao","year":"1991","unstructured":"Rao, G. V. R., & Yegnanarayana, B. (1991). Word boundary hypothesization in Hindi speech. Computer Speech and Language, 5, 379\u2013392.","journal-title":"Computer Speech and Language"},{"key":"9131_CR24","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1007\/s10772-010-9086-9","volume":"14","author":"K. S. Rao","year":"2011","unstructured":"Rao, K. S. (2011). Application of prosody models for developing speech systems in Indian languages. International Journal of Speech Technology, 14, 19\u201333.","journal-title":"International Journal of Speech Technology"},{"key":"9131_CR25","first-page":"2795","volume-title":"Interspeech, ISCA","author":"A. Samir","year":"2008","unstructured":"Samir, A., Duchateau, J., & Vanhamme, H. (2008). Discriminative model combination and language model selection in a reading tutor for children. In Interspeech, ISCA, Brisbane Australia (pp. 2795\u20132798)."},{"key":"9131_CR26","volume-title":"Proceedings IEEE international conference on acoustics, speech, and signal processing","author":"A. Sankar","year":"2005","unstructured":"Sankar, A. (2005). Bayesian model combination (Baycom) for improved recognition. In Proceedings IEEE international conference on acoustics, speech, and signal processing."},{"key":"9131_CR27","doi-asserted-by":"crossref","unstructured":"Saraswathi, S., & Geetha, T. (2007). Comparison of morpheme-based language model with different word-based models for improving the performance of Tamil speech recognition system. ACM Transaction on Asian Language Information Processing, 6(3), article 9.","DOI":"10.1145\/1290002.1290003"},{"key":"9131_CR28","first-page":"915","volume-title":"IEEE international conference on spoken language processing (ICSLP)","author":"H. Schwenk","year":"2000","unstructured":"Schwenk, H., & Gauvain, J.-L. (2000). Combining multiple speech recognizers using voting and language model information. In IEEE international conference on spoken language processing (ICSLP), II Pekin (pp. 915\u2013918)."},{"issue":"5","key":"9131_CR29","doi-asserted-by":"crossref","first-page":"337","DOI":"10.1109\/89.536928","volume":"4","author":"P. Silsbee","year":"1996","unstructured":"Silsbee, P., & Bovik, A. (1996). Computer lip-reading for improved accuracy in ASR. IEEE Transactions on Speech and Audio Processing, 4(5), 337\u2013351.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9131_CR30","first-page":"197","volume-title":"ICASSP","author":"O. Siohan","year":"2005","unstructured":"Siohan, O., Ramabhadran, B., & Kingsbury, B. (2005). Constructing ensembles of ASR systems using randomized decision trees. In ICASSP (Vol.\u00a0I, pp. 197\u2013200)."},{"key":"9131_CR31","volume-title":"Proc. speech transcription workshop","author":"A. Stolcke","year":"2000","unstructured":"Stolcke, A., et al. (2000). The SRI March 2000 Hub-5 conversational speech transcription system. In Proc. speech transcription workshop."},{"key":"9131_CR32","doi-asserted-by":"crossref","first-page":"163","DOI":"10.21437\/Eurospeech.1997-68","volume-title":"Proc. Eurospeech","author":"A. Stolke","year":"1997","unstructured":"Stolke, A., Konig, Y., & Weintraub, M. (1997). Explicit word error minimization in N-best list rescoring. In Proc. Eurospeech (Vol.\u00a01, pp.\u00a0163\u2013166)."},{"key":"9131_CR33","doi-asserted-by":"crossref","first-page":"679","DOI":"10.1006\/brln.2001.2556","volume":"81","author":"J. Vaid","year":"2002","unstructured":"Vaid, J., & Gupta, A. (2002). Exploring word recognition in a semi alphabetic script: the case of devnagari. Brain and Language, 81, 679\u2013690.","journal-title":"Brain and Language"},{"issue":"3","key":"9131_CR34","doi-asserted-by":"crossref","first-page":"247","DOI":"10.1016\/0167-6393(93)90095-3","volume":"12","author":"A. Varga","year":"1993","unstructured":"Varga, A., & Steeneken, H. J. M. (1993). Assessment for automatic recognition: II. NOISEX-92: a database and an experiment to study the effect of additive noise on speech recognition systems. ESCA Journal of Speech Communication, 12(3), 247\u2013251.","journal-title":"ESCA Journal of Speech Communication"},{"issue":"12","key":"9131_CR35","doi-asserted-by":"crossref","first-page":"1888","DOI":"10.1109\/29.45535","volume":"37","author":"A. Waibel","year":"1989","unstructured":"Waibel, A., Sawai, H., & Shikano, K. (1989). Modularity and scaling in large phonemic neural networks. IEEE Transactions on Acoustics, Speech, and Signal Processing, 37(12), 1888\u20131898.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"9131_CR36","first-page":"719","volume-title":"Proceeding of IEEE international conference on acoustics, speech and signal processing, ICASSP","author":"P. Woodland","year":"1997","unstructured":"Woodland, P., Gales, M., Pye, D., & Young, S. (1997). Broadcast news transcription using HTK. In Proceeding of IEEE international conference on acoustics, speech and signal processing, ICASSP, Munich, Germany (Vol.\u00a02, pp.\u00a0719\u2013722)."},{"key":"9131_CR37","unstructured":"Young, S., et al. (2009). The HTK Book. Microsoft Corporation and Cambridge University Engineering Department."},{"key":"9131_CR38","volume-title":"IEEE international conference on spoken language processing","author":"R. Zhang","year":"2006","unstructured":"Zhang, R., & Rudnicky, A. (2006). Investigations of Issues for using multiple acoustic models to improve CSR. In IEEE international conference on spoken language processing, Pitsburgh, PA, USA."},{"key":"9131_CR39","doi-asserted-by":"crossref","first-page":"514","DOI":"10.1016\/j.specom.2007.04.005","volume":"49","author":"A. Zolney","year":"2007","unstructured":"Zolney, A., Kocharov, D., Schluter, R., & Ney, H. (2007). Using multiple acoustic feature sets for speech recognition. Speech Communication, 49, 514\u2013525.","journal-title":"Speech Communication"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-012-9131-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-012-9131-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-012-9131-y","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,14]],"date-time":"2023-06-14T07:36:22Z","timestamp":1686728182000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-012-9131-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,2,3]]},"references-count":39,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2012,6]]}},"alternative-id":["9131"],"URL":"https:\/\/doi.org\/10.1007\/s10772-012-9131-y","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,2,3]]}}}