{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:27:04Z","timestamp":1775230024747,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":35,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819981809","type":"print"},{"value":"9789819981816","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,11,27]],"date-time":"2023-11-27T00:00:00Z","timestamp":1701043200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,27]],"date-time":"2023-11-27T00:00:00Z","timestamp":1701043200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8181-6_6","type":"book-chapter","created":{"date-parts":[[2023,11,26]],"date-time":"2023-11-26T23:02:30Z","timestamp":1701039750000},"page":"69-84","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Exploring the\u00a0Integration of\u00a0Large Language Models into\u00a0Automatic Speech Recognition Systems: An Empirical Study"],"prefix":"10.1007","author":[{"given":"Zeping","family":"Min","sequence":"first","affiliation":[]},{"given":"Jinbo","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,27]]},"reference":[{"key":"6_CR1","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR2","doi-asserted-by":"crossref","unstructured":"Bu, H., Du, J., Na, X., Wu, B., Zheng, H.: Aishell-1: an open-source mandarin speech corpus and a speech recognition baseline. In: 2017 20th Conference of the Oriental Chapter of the International Coordinating Committee On Speech Databases and Speech I\/O Systems and Assessment (O-COCOSDA), pp. 1\u20135. IEEE (2017)","DOI":"10.1109\/ICSDA.2017.8384449"},{"key":"6_CR3","doi-asserted-by":"crossref","unstructured":"Chan, W., Jaitly, N., Le, Q., Vinyals, O.: Listen, attend and spell: a neural network for large vocabulary conversational speech recognition. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4960\u20134964. IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"6_CR4","unstructured":"Chan, W., Jaitly, N., Le, Q.V., Vinyals, O.: Listen, attend and spell. arXiv preprint arXiv:1508.01211 (2015)"},{"key":"6_CR5","doi-asserted-by":"crossref","unstructured":"Chiu, S.H., Chen, B.: Innovative bert-based reranking language models for speech recognition. In: 2021 IEEE Spoken Language Technology Workshop (SLT), pp. 266\u2013271. IEEE (2021)","DOI":"10.1109\/SLT48900.2021.9383557"},{"key":"6_CR6","unstructured":"Chorowski, J.K., Bahdanau, D., Serdyuk, D., Cho, K., Bengio, Y.: Attention-based models for speech recognition. In: Advances in Neural Information Processing Systems 28 (2015)"},{"key":"6_CR7","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"6_CR8","doi-asserted-by":"crossref","unstructured":"Dong, L., Xu, S., Xu, B.: Speech-transformer: a no-recurrence sequence-to-sequence model for speech recognition. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5884\u20135888. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8462506"},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Futami, H., Inaguma, H., Ueno, S., Mimura, M., Sakai, S., Kawahara, T.: Distilling the knowledge of bert for sequence-to-sequence asr. arXiv preprint arXiv:2008.03822 (2020)","DOI":"10.21437\/Interspeech.2020-1179"},{"key":"6_CR10","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd international conference on Machine learning, pp. 369\u2013376 (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"6_CR11","unstructured":"Graves, A., Jaitly, N.: Towards end-to-end speech recognition with recurrent neural networks. In: International Conference on Machine Learning, pp. 1764\u20131772. PMLR (2014)"},{"key":"6_CR12","doi-asserted-by":"crossref","unstructured":"Graves, A., Mohamed, A.r., Hinton, G.: Speech recognition with deep recurrent neural networks. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 6645\u20136649. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"6_CR13","doi-asserted-by":"crossref","unstructured":"Gulati, A., et al.: Conformer: convolution-augmented transformer for speech recognition. arXiv preprint arXiv:2005.08100 (2020)","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"6_CR14","doi-asserted-by":"crossref","unstructured":"Han, W., et al.: Contextnet: improving convolutional neural networks for automatic speech recognition with global context. arXiv preprint arXiv:2005.03191 (2020)","DOI":"10.21437\/Interspeech.2020-2059"},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"Kannan, A., Wu, Y., Nguyen, P., Sainath, T.N., Chen, Z., Prabhavalkar, R.: An analysis of incorporating an external language model into a sequence-to-sequence model. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1\u20135828. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8462682"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"Kim, S., Hori, T., Watanabe, S.: Joint ctc-attention based end-to-end speech recognition using multi-task learning. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4835\u20134839. IEEE (2017)","DOI":"10.1109\/ICASSP.2017.7953075"},{"key":"6_CR17","doi-asserted-by":"crossref","unstructured":"Kubo, Y., Karita, S., Bacchiani, M.: Knowledge transfer from large-scale pretrained language models to end-to-end speech recognizers. In: ICASSP 2022\u20132022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 8512\u20138516. IEEE (2022)","DOI":"10.1109\/ICASSP43922.2022.9746801"},{"key":"6_CR18","unstructured":"OpenAI: Gpt-4 technical report (2023)"},{"key":"6_CR19","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang, L., Wu, J., Jiang, X., Almeida, D., Wainwright, C., Mishkin, P., Zhang, C., Agarwal, S., Slama, K., Ray, A., et al.: Training language models to follow instructions with human feedback. Adv. Neural. Inf. Process. Syst. 35, 27730\u201327744 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR20","doi-asserted-by":"crossref","unstructured":"Panayotov, V., Chen, G., Povey, D., Khudanpur, S.: Librispeech: an asr corpus based on public domain audio books. In: 2015 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp. 5206\u20135210. IEEE (2015)","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"6_CR21","unstructured":"Peng, Y., Dalmia, S., Lane, I., Watanabe, S.: Branchformer: Parallel mlp-attention architectures to capture local and global context for speech recognition and understanding. In: International Conference on Machine Learning, pp. 17627\u201317643. PMLR (2022)"},{"key":"6_CR22","doi-asserted-by":"crossref","unstructured":"Sainath, T.N., et al.: Two-pass end-to-end speech recognition. arXiv preprint arXiv:1908.10992 (2019)","DOI":"10.21437\/Interspeech.2019-1341"},{"key":"6_CR23","unstructured":"Scao, T.L., et al.: Bloom: A 176b-parameter open-access multilingual language model. arXiv preprint arXiv:2211.05100 (2022)"},{"key":"6_CR24","unstructured":"Shin, J., Lee, Y., Jung, K.: Effective sentence scoring method using bert for speech recognition. In: Asian Conference on Machine Learning, pp. 1081\u20131093. PMLR (2019)"},{"key":"6_CR25","doi-asserted-by":"crossref","unstructured":"Soltau, H., Liao, H., Sak, H.: Neural speech recognizer: acoustic-to-word lstm model for large vocabulary speech recognition. arXiv preprint arXiv:1610.09975 (2016)","DOI":"10.21437\/Interspeech.2017-1566"},{"key":"6_CR26","doi-asserted-by":"crossref","unstructured":"Tjandra, A., Sakti, S., Nakamura, S.: Listening while speaking: speech chain by deep learning. In: 2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), pp. 301\u2013308. IEEE (2017)","DOI":"10.1109\/ASRU.2017.8268950"},{"key":"6_CR27","unstructured":"Touvron, H., Lavril, T., Izacard, G., Martinet, X., Lachaux, M.A., Lacroix, T., Rozi\u00e8re, B., Goyal, N., Hambro, E., Azhar, F., et al.: Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)"},{"key":"6_CR28","doi-asserted-by":"crossref","unstructured":"Udagawa, T., Suzuki, M., Kurata, G., Itoh, N., Saon, G.: Effect and analysis of large-scale language model rescoring on competitive asr systems. arXiv preprint arXiv:2204.00212 (2022)","DOI":"10.21437\/Interspeech.2022-11123"},{"key":"6_CR29","doi-asserted-by":"crossref","unstructured":"Watanabe, S., et al.: Espnet: end-to-end speech processing toolkit. arXiv preprint arXiv:1804.00015 (2018)","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"6_CR30","doi-asserted-by":"publisher","unstructured":"Weiran, W., et al.: Improving Rare Word Recognition with LM-aware MWER training. In: Proceedings of Interspeech 2022, pp. 1031\u20131035 (2022). https:\/\/doi.org\/10.21437\/Interspeech. 2022\u201310660","DOI":"10.21437\/Interspeech"},{"key":"6_CR31","doi-asserted-by":"crossref","unstructured":"Xu, L., et al.: Rescorebert: discriminative speech recognition rescoring with bert. In: ICASSP 2022\u20132022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6117\u20136121. IEEE (2022)","DOI":"10.1109\/ICASSP43922.2022.9747118"},{"key":"6_CR32","doi-asserted-by":"crossref","unstructured":"Yao, Z., et al.: Wenet: production oriented streaming and non-streaming end-to-end speech recognition toolkit. arXiv preprint arXiv:2102.01547 (2021)","DOI":"10.21437\/Interspeech.2021-1983"},{"key":"6_CR33","unstructured":"Zeng, A., et al.: Glm-130b: an open bilingual pre-trained model. arXiv preprint arXiv:2210.02414 (2022)"},{"key":"6_CR34","unstructured":"Zhang, B., et al.: Unified streaming and non-streaming two-pass end-to-end model for speech recognition. arXiv preprint arXiv:2012.05481 (2020)"},{"key":"6_CR35","unstructured":"Zhang, S., et al.: Opt: Open pre-trained transformer language models. arXiv preprint arXiv:2205.01068 (2022)"}],"container-title":["Communications in Computer and Information Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8181-6_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T11:24:48Z","timestamp":1710329088000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8181-6_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,27]]},"ISBN":["9789819981809","9789819981816"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8181-6_6","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,11,27]]},"assertion":[{"value":"27 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Changsha","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 November 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iconip2023.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1274","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"650","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"51% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.14","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.46","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}