{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T18:26:33Z","timestamp":1743099993776,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":43,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819609130"},{"type":"electronic","value":"9789819609147"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-0914-7_12","type":"book-chapter","created":{"date-parts":[[2025,1,22]],"date-time":"2025-01-22T15:07:36Z","timestamp":1737558456000},"page":"178-186","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Automatic Post-editing of\u00a0Speech Recognition System Output Using Large Language Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7636-3797","authenticated-orcid":false,"given":"Sheng","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4997-3850","authenticated-orcid":false,"given":"Jiyi","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6424-8633","authenticated-orcid":false,"given":"Yang","family":"Cao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,23]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Baevski, A., Mohamed, A.: Effectiveness of self-supervised pre-training for ASR. In: Proceedings of the IEEE-ICASSP, pp. 7694\u20137698 (2020)","DOI":"10.1109\/ICASSP40776.2020.9054224"},{"key":"12_CR2","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: wav2vec 2.0: a framework for self-supervised learning of speech representations. In: Proceedings of the NeurIPS, pp. 12449\u201312460 (2020)"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Chan, W., Jaitly, N., Le, Q., Vinyals, O.: Listen, attend and spell: a neural network for large vocabulary conversational speech recognition. In: Proceedings of the IEEE-ICASSP (2016)","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"12_CR4","unstructured":"Chen, C., Hu, Y., Yang, C.H.H., Siniscalchi, S.M., Chen, P.Y., Chng, E.S.: HyPoradise: an open baseline for generative speech recognition with large language models. arXiv abs\/2309.15701 (2023)"},{"key":"12_CR5","unstructured":"Chorowski, J., Bahdanau, D., Serdyuk, D., Cho, K., Bengio, Y.: Attention-based models for speech recognition. In: Proceedings of the NeurlPS (2015)"},{"key":"12_CR6","doi-asserted-by":"crossref","unstructured":"Chorowski, J., Jaitly, N.: Towards better decoding and language model integration in sequence to sequence models. In: Proceedings of the INTERSPEECH (2016)","DOI":"10.21437\/Interspeech.2017-343"},{"issue":"1","key":"12_CR7","first-page":"30","volume":"20","author":"G Dahl","year":"2012","unstructured":"Dahl, G., Yu, D., Deng, L., Acero, A.: Context dependent pre-trained deep neural networks for large vocabulary speech recognition. IEEE Trans. ASLP 20(1), 30\u201342 (2012)","journal-title":"IEEE Trans. ASLP"},{"key":"12_CR8","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the NAACL, pp. 4171\u20134186 (2019)"},{"key":"12_CR9","doi-asserted-by":"crossref","unstructured":"Dixon, P.R., Hori, C., Kashioka, H.: A specialized WFST approach for class models and dynamic vocabulary. In: Proceedings of the INTERSPEECH, pp. 1075\u20131078 (2012)","DOI":"10.21437\/Interspeech.2012-326"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Dong, L., Xu, S., Xu, B.: Speech-transformer: a no-recurrence sequence-to-sequence model for speech recognition. In: Proceedings of the IEEE-ICASSP (2018)","DOI":"10.1109\/ICASSP.2018.8462506"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Futami, H., Inaguma, H., Ueno, S., Mimura, M., Sakai, S., Kawahara, T.: Distilling the knowledge of BERT for sequence-to-sequence ASR. CoRR abs\/2008.03822 (2020)","DOI":"10.21437\/Interspeech.2020-1179"},{"key":"12_CR12","unstructured":"Graves, A., Jaitly, N.: Towards end-to-end speech recognition with recurrent neural networks. In: Proceedings of the ICML (2014)"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Hadian, H., Sameti, H., Povey, D., Khudanpur, S.: End-to-end speech recognition using lattice-free MMI. In: Proceedings of the INTERSPEECH (2018)","DOI":"10.21437\/Interspeech.2018-1423"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Hsu, W.N., Tsai, Y.H.H., Bolte, B., Salakhutdinov, R., Mohamed, A.: HuBERT: how much can a bad teacher benefit ASR pre-training? In: Proceedings of the IEEE-ICASSP, pp. 6533\u20136537 (2021)","DOI":"10.1109\/ICASSP39728.2021.9414460"},{"key":"12_CR15","unstructured":"Hu, J.E., et al.: LoRA: low-rank adaptation of large language models. arXiv abs\/2106.09685 (2021)"},{"key":"12_CR16","doi-asserted-by":"publisher","first-page":"532","DOI":"10.1109\/PROC.1976.10159","volume":"64","author":"F Jelinek","year":"1976","unstructured":"Jelinek, F.: Continuous speech recognition by statistical methods. Proc. IEEE 64, 532\u2013556 (1976)","journal-title":"Proc. IEEE"},{"key":"12_CR17","unstructured":"Li, L.H., Yatskar, M., Yin, D., Hsieh, C.J., Chang, K.W.: VisualBERT: a simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557 (2019)"},{"key":"12_CR18","doi-asserted-by":"crossref","unstructured":"Li, X., et\u00a0al.: Oscar: object-semantics aligned pre-training for vision-language tasks. In: Proceedings of the ECCV, pp. 121\u2013137 (2020)","DOI":"10.1007\/978-3-030-58577-8_8"},{"key":"12_CR19","doi-asserted-by":"crossref","unstructured":"Ljolje, A., et\u00a0al.: Efficient general lattice generation and rescoring. In: EUROSPEECH (1999)","DOI":"10.21437\/Eurospeech.1999-320"},{"key":"12_CR20","unstructured":"Lu, J., Batra, D., Parikh, D., Lee, S.: ViLBERT: pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. arXiv preprint arXiv:1908.02265 (2019)"},{"key":"12_CR21","doi-asserted-by":"crossref","unstructured":"Miao, Y., Gowayyed, M., Metze, F.: EESEN: end-to-end speech recognition using deep RNN models and WFST-based decoding. In: Proceedings of the IEEE-ASRU, pp. 167\u2013174 (2015)","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"12_CR22","doi-asserted-by":"crossref","unstructured":"Mohri, M., et\u00a0al.: Speech recognition with weighted finite-state transducers (2008)","DOI":"10.1007\/978-3-540-49127-9_28"},{"key":"12_CR23","unstructured":"OpenAI: Whisper: Robust speech recognition via large-scale weak supervision (2023)"},{"key":"12_CR24","doi-asserted-by":"crossref","unstructured":"Peng, Y., et\u00a0al.: Reproducing whisper-style training using an open-source toolkit and publicly available data. In: Proceedings of the IEEE-ASRU, pp.\u00a01\u20138 (2023)","DOI":"10.1109\/ASRU57964.2023.10389676"},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"Peng, Y., et\u00a0al.: OWSM v3.1: better and faster open whisper-style speech models based on e-branchformer. arXiv abs\/2401.16658 (2024)","DOI":"10.21437\/Interspeech.2024-1194"},{"key":"12_CR26","doi-asserted-by":"crossref","unstructured":"Peng, Y., Sudo, Y., Shakeel, M., Watanabe, S.: OWSM-CTC: an open encoder-only speech foundation model for speech recognition, translation, and language identification (2024)","DOI":"10.18653\/v1\/2024.acl-long.549"},{"key":"12_CR27","unstructured":"Pratap, V., et\u00a0al.: Scaling speech technology to 1, 000+ languages. CoRR abs\/2305.13516 (2023)"},{"issue":"2","key":"12_CR28","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/5.18626","volume":"77","author":"L Rabiner","year":"1988","unstructured":"Rabiner, L.: A tutorial on hidden Markov models and selected applications in speech recognition. Proc. IEEE 77(2), 257\u2013286 (1988)","journal-title":"Proc. IEEE"},{"key":"12_CR29","doi-asserted-by":"crossref","unstructured":"Sainath, T.N., et\u00a0al.: Two-pass end-to-end speech recognition. arXiv abs\/1908.10992 (2019)","DOI":"10.21437\/Interspeech.2019-1341"},{"key":"12_CR30","doi-asserted-by":"crossref","unstructured":"Sak, H., et\u00a0al.: On-the-fly lattice rescoring for real-time automatic speech recognition. In: Proceedings of the INTERSPEECH (2010)","DOI":"10.21437\/Interspeech.2010-532"},{"key":"12_CR31","doi-asserted-by":"crossref","unstructured":"Salazar, J., Liang, D., Nguyen, T.Q., Kirchhoff, K.: Masked language model scoring. arXiv:1910.14659 (2019)","DOI":"10.18653\/v1\/2020.acl-main.240"},{"key":"12_CR32","unstructured":"Shin, J., Lee, Y., Jung, K.: Effective sentence scoring method using BERT for speech recognition. In: Proceedings of the ACML, pp. 1081\u20131093 (2019)"},{"key":"12_CR33","doi-asserted-by":"crossref","unstructured":"Sriram, A., et\u00a0al.: Cold fusion: training Seq2Seq models together with language models. In: Proceedings of the INTERSPEECH (2017)","DOI":"10.21437\/Interspeech.2018-1392"},{"key":"12_CR34","doi-asserted-by":"crossref","unstructured":"Zhou, S., Dong, L., Xu, S., Xu, B.: Syllable-based sequence-to-sequence speech recognition with the transformer in mandarin Chinese. In: Proceedings of the INTERSPEECH (2018)","DOI":"10.21437\/Interspeech.2018-1107"},{"key":"12_CR35","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Proceedings of the NeurlPS 30 (2017)"},{"key":"12_CR36","doi-asserted-by":"crossref","unstructured":"Wang, C., Wu, Y., Liu, S., Zhou, M., Yang, Z.: Curriculum pre-training for end-to-end speech translation. arXiv preprint arXiv:2004.10093 (2020)","DOI":"10.18653\/v1\/2020.acl-main.344"},{"key":"12_CR37","doi-asserted-by":"crossref","unstructured":"Watanabe, S., et al.: ESPnet: end-to-end speech processing toolkit. In: Proceedings of the INTERSPEECH (2018)","DOI":"10.21437\/Interspeech.2018-1456"},{"issue":"8","key":"12_CR38","doi-asserted-by":"publisher","first-page":"1240","DOI":"10.1109\/JSTSP.2017.2763455","volume":"11","author":"S Watanabe","year":"2017","unstructured":"Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T.: Hybrid CTC\/attention architecture for end-to-end speech recognition. IEEE J. Sel. Top. Signal Process. 11(8), 1240\u20131253 (2017)","journal-title":"IEEE J. Sel. Top. Signal Process."},{"key":"12_CR39","doi-asserted-by":"crossref","unstructured":"Zhang, S., Huang, H., Liu, J., Li, H.: Spelling error correction with soft-masked BERT. arXiv preprint arXiv:2005.07421 (2020)","DOI":"10.18653\/v1\/2020.acl-main.82"},{"key":"12_CR40","doi-asserted-by":"crossref","unstructured":"Zhang, S., Lei, M., Yan, Z.: Investigation of transformer based spelling correction model for CTC-based end-to-end mandarin speech recognition. In: Proceedings of the Interspeech, pp. 2180\u20132184 (2019)","DOI":"10.21437\/Interspeech.2019-1290"},{"key":"12_CR41","doi-asserted-by":"crossref","unstructured":"Zhou, L., Palangi, H., Zhang, L., Hu, H., Corso, J., Gao, J.: Unified vision-language pre-training for image captioning and VQA. In: Proceedings of the AAAI, pp. 13041\u201313049 (2020)","DOI":"10.1609\/aaai.v34i07.7005"},{"key":"12_CR42","doi-asserted-by":"crossref","unstructured":"Zhou, S., Dong, L., Xu, S., Xu, B.: A comparison of modeling units in sequence-to-sequence speech recognition with the transformer on mandarin Chinese. CoRR abs\/1805.06239 (2018)","DOI":"10.21437\/Interspeech.2018-1107"},{"key":"12_CR43","unstructured":"Zhou, S., Xu, S., Xu, B.: Multilingual end-to-end speech recognition with a single transformer on low-resource languages. CoRR abs\/1806.05059 (2018)"}],"container-title":["Lecture Notes in Computer Science","Database Systems for Advanced Applications. DASFAA 2024 International Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-0914-7_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,22]],"date-time":"2025-01-22T15:08:01Z","timestamp":1737558481000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-0914-7_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819609130","9789819609147"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-0914-7_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"23 January 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DASFAA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Database Systems for Advanced Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Gifu","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 July 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 July 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dasfaa2024a","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.dasfaa2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}