{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T19:54:59Z","timestamp":1743018899991,"version":"3.40.3"},"publisher-location":"Cham","reference-count":17,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031781186"},{"type":"electronic","value":"9783031781193"}],"license":[{"start":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T00:00:00Z","timestamp":1733356800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T00:00:00Z","timestamp":1733356800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-78119-3_23","type":"book-chapter","created":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T02:00:55Z","timestamp":1733277655000},"page":"331-342","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Cost Minimization Approach to\u00a0Fix the\u00a0Vocabulary Size in\u00a0a\u00a0Tokenizer for\u00a0an\u00a0End-to-End ASR System"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0502-527X","authenticated-orcid":false,"given":"Sunil Kumar","family":"Kopparapu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ashish","family":"Panda","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,5]]},"reference":[{"key":"23_CR1","unstructured":"Katare, D., Noguero, D.S., Park, S., Kourtellis, N., Janssen, M., Ding, A.Y.: Analyzing and mitigating bias for vulnerable classes: towards balanced representation in dataset. arXiv (2024)"},{"key":"23_CR2","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: Bengio, Y., LeCun, Y. (eds.) 3rd International Conference on Learning Representations, ICLR 2015, San Diego, 7\u20139 May 2015, Conference Track Proceedings (2015). http:\/\/arxiv.org\/abs\/1412.6980"},{"key":"23_CR3","doi-asserted-by":"publisher","unstructured":"Ko, T., Peddinti, V., Povey, D., Khudanpur, S.: Audio augmentation for speech recognition. In: Proceedings of the Interspeech 2015, pp. 3586\u20133589 (2015). https:\/\/doi.org\/10.21437\/Interspeech.2015-711","DOI":"10.21437\/Interspeech.2015-711"},{"key":"23_CR4","doi-asserted-by":"publisher","unstructured":"Kudo, T.: Subword regularization: improving neural network translation models with multiple subword candidates. In: Gurevych, I., Miyao, Y. (eds.) Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 66\u201375. Association for Computational Linguistics, Melbourne (2018). https:\/\/doi.org\/10.18653\/v1\/P18-1007","DOI":"10.18653\/v1\/P18-1007"},{"key":"23_CR5","doi-asserted-by":"publisher","unstructured":"Kudo, T., Richardson, J.: SentencePiece: a simple and language independent subword tokenizer and detokenizer for neural text processing (2018). https:\/\/doi.org\/10.18653\/v1\/D18-2012","DOI":"10.18653\/v1\/D18-2012"},{"key":"23_CR6","unstructured":"Panayotov, V., Chen, G., Povey, D., Khudanpur, S.: Librispeech ASR corpus: test-clean-100 (2015). https:\/\/www.openslr.org\/resources\/12\/test-clean.tar.gz. Accessed 26 June 2024"},{"key":"23_CR7","unstructured":"Panayotov, V., Chen, G., Povey, D., Khudanpur, S.: Librispeech ASR corpus: train-clean-100 (2015). https:\/\/www.openslr.org\/resources\/12\/train-clean-100.tar.gz. Accessed 26 June 2024"},{"key":"23_CR8","unstructured":"Panayotov, V., Chen, G., Povey, D., Khudanpur, S.: Librispeech ASR corpus: test-other-100 (2015). https:\/\/www.openslr.org\/resources\/12\/test-other.tar.gz. Accessed 26 June 2024"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Papadourakis, V., Mueller, M., Liu, J., Mouchtaris, A., Omologo, M.: Phonetically induced subwords for end-to-end speech recognition. In: Interspeech 2021 (2021). https:\/\/www.amazon.science\/publications\/phonetically-induced-subwords-for-end-to-end-speech-recognition","DOI":"10.21437\/Interspeech.2021-1787"},{"key":"23_CR10","doi-asserted-by":"publisher","unstructured":"Park, D.S., et al.: SpecAugment: a simple data augmentation method for automatic speech recognition. In: Proceedings of the Interspeech 2019, pp. 2613\u20132617 (2019). https:\/\/doi.org\/10.21437\/Interspeech.2019-2680","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"23_CR11","unstructured":"Raissi, T., Beck, E., Schl\u00fcter, R., Ney, H.: Towards consistent hybrid hmm acoustic modeling. arXiv (2021)"},{"key":"23_CR12","doi-asserted-by":"publisher","unstructured":"Schuster, M., Nakajima, K.: Japanese and Korean voice search. In: 2012 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5149\u20135152 (2012). https:\/\/doi.org\/10.1109\/ICASSP.2012.6289079","DOI":"10.1109\/ICASSP.2012.6289079"},{"key":"23_CR13","doi-asserted-by":"publisher","unstructured":"Sennrich, R., Haddow, B., Birch, A.: Neural machine translation of rare words with subword units. In: Erk, K., Smith, N.A. (eds.) Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 1715\u20131725. Association for Computational Linguistics, Berlin (2016). https:\/\/doi.org\/10.18653\/v1\/P16-1162","DOI":"10.18653\/v1\/P16-1162"},{"key":"23_CR14","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Guyon, I., et al. (eds.) Advances in Neural Information Processing Systems, vol.\u00a030. Curran Associates, Inc. (2017)"},{"key":"23_CR15","doi-asserted-by":"publisher","unstructured":"Watanabe, S., et al.: ESPnet: end-to-end speech processing toolkit. In: Proceedings of the Interspeech 2018, pp. 2207\u20132211 (2018). https:\/\/doi.org\/10.21437\/Interspeech.2018-1456","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"23_CR16","doi-asserted-by":"publisher","unstructured":"Xu, H., Ding, S., Watanabe, S.: Improving end-to-end speech recognition with pronunciation-assisted sub-word modeling. In: 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2019), pp. 7110\u20137114 (2019). https:\/\/doi.org\/10.1109\/ICASSP.2019.8682494","DOI":"10.1109\/ICASSP.2019.8682494"},{"key":"23_CR17","doi-asserted-by":"publisher","unstructured":"Zouhar, V., et al.: A formal perspective on byte-pair encoding. In: Rogers, A., Boyd-Graber, J., Okazaki, N. (eds.) Findings of the Association for Computational Linguistics (ACL 2023), pp. 598\u2013614. Association for Computational Linguistics, Toronto (2023). https:\/\/doi.org\/10.18653\/v1\/2023.findings-acl.38","DOI":"10.18653\/v1\/2023.findings-acl.38"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-78119-3_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T02:06:54Z","timestamp":1733278014000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-78119-3_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,5]]},"ISBN":["9783031781186","9783031781193"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-78119-3_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,5]]},"assertion":[{"value":"5 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kolkata","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpr2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icpr2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}