{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T14:12:15Z","timestamp":1774879935062,"version":"3.50.1"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T00:00:00Z","timestamp":1765497600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T00:00:00Z","timestamp":1765497600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s10772-025-10238-5","type":"journal-article","created":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T12:40:49Z","timestamp":1765543249000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Hybrid CNN-BLSTM support vector machine architecture for E2E speech recognition"],"prefix":"10.1007","volume":"29","author":[{"given":"Jaspreet Kaur","family":"Sandhu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Munish","family":"Kumar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Amitoj","family":"Singh","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,12,12]]},"reference":[{"key":"10238_CR1","doi-asserted-by":"crossref","unstructured":"Abdel-Hamid, O., Deng, L., & Yu, D. (2013). Exploring convolutional neural network structures and optimization techniques for speech recognition. Interspeech, 3366\u20133370.","DOI":"10.21437\/Interspeech.2013-744"},{"key":"10238_CR2","doi-asserted-by":"crossref","unstructured":"Abdel-Hamid, O., Mohamed, A., Jiang, H., & Penn, G. (2012). Applying convolutional neural networks concepts to hybrid NN-HMM model for speech recognition. In 2012 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 4277\u20134280). IEEE.","DOI":"10.1109\/ICASSP.2012.6288864"},{"issue":"10","key":"10238_CR3","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1109\/TASLP.2014.2339736","volume":"22","author":"O. Abdel-Hamid","year":"2014","unstructured":"Abdel-Hamid, O., Mohamed, A. R., Jiang, H., Deng, L., Penn, G., & Yu, D. (2014, Oct). Convolutional neural networks for speech recognition. (In English), IEEE-ACM Transactions on Audio Speech and Language Processing, 22(10), 1533\u20131545.","journal-title":"(In English), Ieee-Acm Transactions on Audio Speech and Language Processing"},{"issue":"2","key":"10238_CR4","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1109\/72.279181","volume":"5","author":"Y. Bengio","year":"1994","unstructured":"Bengio, Y., Simard, P., & Frasconi, P. (1994). Learning long-term dependencies with gradient descent is difficult. IEEE Transactions on Neural Networks, 5(2), 157\u2013166.","journal-title":"IEEE Transactions on Neural Networks"},{"key":"10238_CR5","volume-title":"Lecture notes of EE364b","author":"S. Boyd","year":"2006","unstructured":"Boyd, S., & Mutapcic, A. (2006). Subgradient methods. In Lecture notes of EE364b (Vol. 2007). Stanford University, Winter Quarter."},{"issue":"12","key":"10238_CR6","first-page":"265","volume":"2","author":"K. Crammer","year":"2001","unstructured":"Crammer, K., & Singer, Y. (2001). On the algorithmic implementation of multiclass kernel-based vector machines. Journal of Machine Learning Research, 2(12), 265\u2013292.","journal-title":"Journal of Machine Learning Research"},{"issue":"3","key":"10238_CR7","doi-asserted-by":"publisher","first-page":"509","DOI":"10.1111\/1467-968X.12135","volume":"116","author":"J. Evans","year":"2018","unstructured":"Evans, J., Yeh, W.-C., & Kulkarni, R. (2018). Acoustics of tone in Indian Punjabi. Transactions of the Philological Society, 116(3), 509\u2013528.","journal-title":"Transactions of the Philological Society"},{"issue":"2","key":"10238_CR8","doi-asserted-by":"publisher","first-page":"36","DOI":"10.3390\/computers9020036","volume":"9","author":"T. G. Fantaye","year":"2020","unstructured":"Fantaye, T. G., Yu, J., & Hailu, T. T. (2020). Advanced convolutional neural network-based hybrid acoustic models for low-resource speech recognition. Computers, 9(2), 36.","journal-title":"Computers"},{"issue":"2","key":"10238_CR9","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1006\/csla.1998.0043","volume":"12","author":"M. J. Gales","year":"1998","unstructured":"Gales, M. J. (1998). Maximum likelihood linear transformations for HMM-based speech recognition. Computer Speech & Language, 12(2), 75\u201398.","journal-title":"Computer Speech & Language"},{"key":"10238_CR10","unstructured":"Gehring, J., Auli, M., Grangier, D., Yarats, D., & Dauphin, Y. N. (2017). Convolutional sequence to sequence learning. In International conference on machine learning (ICML)."},{"key":"10238_CR11","unstructured":"Glorot, X., & Bengio, Y. (2010). Understanding the difficulty of training deep feedforward neural networks. In Proceedings of the thirteenth international conference on artificial intelligence and statistics (pp. 249\u2013256)."},{"key":"10238_CR12","doi-asserted-by":"crossref","unstructured":"Graves, A., & Schmidhuber, J. (2005). Framewise phoneme classification with bidirectional LSTM and other neural network architectures. Neural Networks, 18(5\u20136), 602\u2013610.","DOI":"10.1016\/j.neunet.2005.06.042"},{"issue":"6","key":"10238_CR13","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"G. Hinton","year":"2012","unstructured":"Hinton, G., et al. (2012). Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups. IEEE Signal Processing Magazine, 29(6), 82\u201397.","journal-title":"IEEE Signal Processing Magazine"},{"issue":"7\u20138","key":"10238_CR14","doi-asserted-by":"publisher","first-page":"588","DOI":"10.1016\/j.specom.2006.12.006","volume":"49","author":"Y. Hu","year":"2007","unstructured":"Hu, Y., & Loizou, P. C. (2007). Subjective comparison and evaluation of speech enhancement algorithms. Speech Communication, 49(7\u20138), 588\u2013601.","journal-title":"Speech Communication"},{"key":"10238_CR15","unstructured":"I.-T. P.56. (1993). edObjective measurement of active speech level."},{"issue":"1","key":"10238_CR16","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1007\/s10994-009-5108-8","volume":"77","author":"T. Joachims","year":"2009","unstructured":"Joachims, T., Finley, T., & Yu, C. N. J. (2009). Cutting-plane training of structural SVMs. (In English). Machine Learning, 77(1), 27\u201359.","journal-title":"(In English), Machine Learning"},{"key":"10238_CR17","doi-asserted-by":"crossref","unstructured":"Passricha, V., & Aggarwal Rajesh, K. (2019). A hybrid of deep CNN and Bidirectional LSTM for automatic speech recognition. Journal of Intelligent Systems, 29(1).","DOI":"10.1515\/jisys-2018-0372"},{"key":"10238_CR18","doi-asserted-by":"crossref","unstructured":"Passricha, V., & Aggarwal, R. K. (2018). Convolutional support vector machines for speech recognition. International Journal of Speech Technology, 1\u20139.","DOI":"10.1007\/s10772-018-09584-4"},{"key":"10238_CR19","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1016\/B978-0-12-818130-0.00002-7","volume-title":"Intelligent speech signal processing","author":"V. Passricha","year":"2019","unstructured":"Passricha, V., & Aggarwal, R. K. (2019). End-to-end acoustic modeling using convolutional neural networks. In Intelligent speech signal processing (pp. 5\u201337). Elsevier."},{"key":"10238_CR20","doi-asserted-by":"crossref","unstructured":"Passricha, V., & Aggarwal, R. K. A comparative analysis of pooling strategies for convolutional neural network based Hindi ASR. Journal of Ambient Intelligence and Humanized Computing, Journal Article. May 24 2020.","DOI":"10.1007\/s12652-019-01325-y"},{"key":"10238_CR21","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1007\/978-1-4613-1367-0_10","volume-title":"Automatic speech and speaker recognition","author":"T. Robinson","year":"1996","unstructured":"Robinson, T., Hochberg, M., & Renals, S. (1996). The use of recurrent neural networks in continuous speech recognition. In Automatic speech and speaker recognition (pp. 233\u2013258). Springer."},{"key":"10238_CR22","doi-asserted-by":"crossref","unstructured":"Sainath, T. N., et al. (2013). Improvements to deep convolutional neural networks for LVCSR. In 2013 IEEE workshop on automatic speech recognition and understanding (pp. 315\u2013320). IEEE.","DOI":"10.1109\/ASRU.2013.6707749"},{"key":"10238_CR23","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1016\/j.neunet.2014.08.005","volume":"64","author":"T. N. Sainath","year":"2015","unstructured":"Sainath, T. N., et al. (2015). Deep convolutional neural networks for large-scale speech tasks. Neural Networks, 64, 39\u201348.","journal-title":"Neural Networks"},{"issue":"11","key":"10238_CR24","doi-asserted-by":"publisher","first-page":"2267","DOI":"10.1109\/TASL.2013.2284378","volume":"21","author":"T. N. Sainath","year":"2013","unstructured":"Sainath, T. N., Kingsbury, B., Soltau, H., & Ramabhadran, B. (2013). Optimization techniques to improve training speed of deep neural networks for large speech tasks. IEEE Transactions on Audio, Speech, and Language Processing, 21(11), 2267\u20132276.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"10238_CR25","doi-asserted-by":"crossref","unstructured":"Sainath, T. N., Mohamed, A.-R., Kingsbury, B., & Ramabhadran, B. (2013). Deep convolutional neural networks for LVCSR. In 2013 IEEE international conference on acoustics, speech and signal processing (pp. 8614\u20138618). IEEE.","DOI":"10.1109\/ICASSP.2013.6639347"},{"key":"10238_CR26","doi-asserted-by":"crossref","unstructured":"Sainath, T. N., Vinyals, O., Senior, A., & Sak, H. (2015). Convolutional, long short-term memory, fully connected deep neural networks. In Acoustics, speech and signal processing (ICASSP), 2015 IEEE international conference on (pp. 4580\u20134584). IEEE.","DOI":"10.1109\/ICASSP.2015.7178838"},{"key":"10238_CR27","doi-asserted-by":"crossref","unstructured":"Sak, H., Senior, A., & Beaufays, F. (2014). Long short-term memory recurrent neural network architectures for large scale acoustic modeling. Interspeech, 338\u2013342.","DOI":"10.21437\/Interspeech.2014-80"},{"issue":"11","key":"10238_CR28","doi-asserted-by":"publisher","first-page":"2673","DOI":"10.1109\/78.650093","volume":"45","author":"M. Schuster","year":"1997","unstructured":"Schuster, M., & Paliwal, K. K. (1997). Bidirectional recurrent neural networks. IEEE Transactions on Signal Processing, 45(11), 2673\u20132681.","journal-title":"IEEE Transactions on Signal Processing"},{"key":"10238_CR29","unstructured":"Sermanet, P., Chintala, S., & LeCun, Y. (2012). Convolutional neural networks applied to house numbers digit classification. In Pattern recognition (ICPR), 2012 21st international conference on (pp. 3288\u20133291). IEEE."},{"issue":"1","key":"10238_CR30","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s10107-010-0420-4","volume":"127","author":"S. Shalev-Shwartz","year":"2010","unstructured":"Shalev-Shwartz, S., Singer, Y., Srebro, N., & Cotter, A. (2010). Pegasos: Primal estimated sub-gradient solver for SVM. Mathematical Programming, 127(1), 3\u201330, 2010\/10\/16.","journal-title":"Mathematical Programming"},{"key":"10238_CR31","doi-asserted-by":"crossref","unstructured":"Singhal, S., Passricha, V., Sharma, P., & Aggarwal, R. K. (2018, 29). Multi-level region-of-interest CNNs for end to end speech recognition. Journal of Ambient Intelligence and Humanized Computing, Journal Article November.","DOI":"10.1007\/s12652-018-1146-z"},{"key":"10238_CR32","unstructured":"Smith, N., & Gales, M. (2001). Speech recognition using SVMs. Advances in Neural Information Processing Systems, 14."},{"key":"10238_CR33","doi-asserted-by":"crossref","unstructured":"Soltau, H., Kuo, H.-K., Mangu, L., Saon, G., & Beran, T. (2013). Neural network acoustic models for the DARPA rats program. Interspeech, 3092\u20133096.","DOI":"10.21437\/Interspeech.2013-674"},{"key":"10238_CR34","doi-asserted-by":"crossref","unstructured":"Sun, Y., Ten Bosch, L., & Boves, L. (2010). Hybrid HMM\/BLSTM-RNN for robust speech recognition. In Text, speech and dialogue: 13th international conference, proceedings, TSD (Vol. 13, pp. 400\u2013407). Brno, Czech Republic, September, 6\u201310, 2010. Springer.","DOI":"10.1007\/978-3-642-15760-8_51"},{"key":"10238_CR35","doi-asserted-by":"crossref","unstructured":"Toth, L. (2014). Convolutional deep maxout networks for phone recognition. Interspeech, 1078\u20131082.","DOI":"10.21437\/Interspeech.2014-278"},{"key":"10238_CR36","doi-asserted-by":"crossref","unstructured":"Vapnik, V. N. (1995). The nature of statistical learning theory. Springer.","DOI":"10.1007\/978-1-4757-2440-0"},{"issue":"3","key":"10238_CR37","doi-asserted-by":"publisher","first-page":"544","DOI":"10.1109\/TASL.2012.2227734","volume":"21","author":"S. X. Zhang","year":"2013","unstructured":"Zhang, S. X., & Gales, M. J. F. (2013). Structured SVMs for automatic speech recognition. (In English). IEEE Transactions on Audio Speech and Language Processing, 21(3), 544\u2013555.","journal-title":"(In English), Ieee Transactions on Audio Speech and Language Processing"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-025-10238-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-025-10238-5","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-025-10238-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T13:22:18Z","timestamp":1774876938000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-025-10238-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,12]]},"references-count":37,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["10238"],"URL":"https:\/\/doi.org\/10.1007\/s10772-025-10238-5","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,12]]},"assertion":[{"value":"5 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"7"}}