{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T17:58:30Z","timestamp":1772301510217,"version":"3.50.1"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031441943","type":"print"},{"value":"9783031441950","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-44195-0_32","type":"book-chapter","created":{"date-parts":[[2023,9,21]],"date-time":"2023-09-21T12:04:08Z","timestamp":1695297848000},"page":"389-400","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Correction while Recognition: Combining Pretrained Language Model for\u00a0Taiwan-Accented Speech Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7636-3797","authenticated-orcid":false,"given":"Sheng","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4997-3850","authenticated-orcid":false,"given":"Jiyi","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,22]]},"reference":[{"key":"32_CR1","unstructured":"Amodei, D., et al.: Deep speech 2: end-to-end speech recognition in English and mandarin. In: Proceedings of The 33rd International Conference on Machine Learning (ICML). Proceedings of Machine Learning Research, vol. 48, pp. 173\u2013182 (2016). https:\/\/proceedings.mlr.press\/v48\/amodei16.html"},{"key":"32_CR2","unstructured":"Baevski, A., Hsu, W.N., Conneau, A., Auli, M.: Unsupervised speech recognition. In: Advances in Neural Information Processing Systems (NeurIPS), vol. 34, pp. 27826\u201327839 (2021). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2021\/file\/ea159dc9788ffac311592613b7f71fbb-Paper.pdf"},{"key":"32_CR3","doi-asserted-by":"publisher","unstructured":"Baevski, A., Mohamed, A.: Effectiveness of self-supervised pre-training for ASR. In: 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7694\u20137698 (2020). https:\/\/doi.org\/10.1109\/ICASSP40776.2020.9054224","DOI":"10.1109\/ICASSP40776.2020.9054224"},{"key":"32_CR4","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: wav2vec 2.0: a framework for self-supervised learning of speech representations. In: Advances in Neural Information Processing Systems (NeurIPS), vol. 33, pp. 12449\u201312460 (2020). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/92d1e1eb1cd6f9fba3227870bb6d7f07-Paper.pdf"},{"key":"32_CR5","doi-asserted-by":"publisher","first-page":"1897","DOI":"10.1109\/TASLP.2021.3082299","volume":"29","author":"Y Bai","year":"2021","unstructured":"Bai, Y., Yi, J., Tao, J., Tian, Z., Wen, Z., Zhang, S.: Fast end-to-end speech recognition via non-autoregressive models and cross-modal knowledge transferring from BERT. IEEE\/ACM Trans. Audio Speech Lang. Process. 29, 1897\u20131911 (2021). https:\/\/doi.org\/10.1109\/TASLP.2021.3082299","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"32_CR6","doi-asserted-by":"publisher","unstructured":"Chiu, C.C., et al.: State-of-the-art speech recognition with sequence-to-sequence models. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4774\u20134778. IEEE Press (2018). https:\/\/doi.org\/10.1109\/ICASSP.2018.8462105","DOI":"10.1109\/ICASSP.2018.8462105"},{"key":"32_CR7","unstructured":"Chorowski, J., Bahdanau, D., Cho, K., Bengio, Y.: End-to-end continuous speech recognition using attention-based recurrent nn: First results. arXiv preprint arXiv:1412.1602 (2014)"},{"key":"32_CR8","unstructured":"Chorowski, J., Bahdanau, D., Serdyuk, D., Cho, K., Bengio, Y.: Attention-based models for speech recognition. In: Proceedings of the 28th International Conference on Neural Information Processing Systems (NIPS), vol. 1, pp. 577\u2013585 (2015)"},{"key":"32_CR9","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL), vol. 1 (Long and Short Papers), pp. 4171\u20134186 (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423. https:\/\/aclanthology.org\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"32_CR10","doi-asserted-by":"publisher","unstructured":"Dong, L., Xu, B.: CIF: continuous integrate-and-fire for end-to-end speech recognition. In: 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6079\u20136083 (2020). https:\/\/doi.org\/10.1109\/ICASSP40776.2020.9054250","DOI":"10.1109\/ICASSP40776.2020.9054250"},{"key":"32_CR11","doi-asserted-by":"publisher","unstructured":"Futami, H., Inaguma, H., Ueno, S., Mimura, M., Sakai, S., Kawahara, T.: Distilling the knowledge of BERT for sequence-to-sequence ASR. In: Proceedings of Interspeech 2020, pp. 3635\u20133639 (2020). https:\/\/doi.org\/10.21437\/Interspeech.2020-1179","DOI":"10.21437\/Interspeech.2020-1179"},{"key":"32_CR12","doi-asserted-by":"crossref","unstructured":"Futami, H., Inaguma, H., Ueno, S., Mimura, M., Sakai, S., Kawahara, T.: Distilling the knowledge of BERT for sequence-to-sequence ASR. CoRR abs\/2008.03822 (2020). https:\/\/arxiv.org\/abs\/2008.03822","DOI":"10.21437\/Interspeech.2020-1179"},{"key":"32_CR13","doi-asserted-by":"crossref","unstructured":"Graves, A., Fernandez, S., Gomez, F., Shmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of ICML (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"32_CR14","unstructured":"Graves, A., Jaitly, N.: Towards end-to-end speech recognition with recurrent neural networks. In: Proceedings of ICML, pp. 1764\u20131772 (2014)"},{"key":"32_CR15","unstructured":"Heafield, K., Pouzyrevsky, I., Clark, J., Koehn, P.: Scalable modified kneser-ney language model estimation. In: Proceedings of ACL (2013)"},{"key":"32_CR16","unstructured":"Houlsby, N., et al.: Parameter-efficient transfer learning for NLP. In: Proceedings of ICML, pp. 2790\u20132799 (2019)"},{"key":"32_CR17","doi-asserted-by":"crossref","unstructured":"Hsu, W.N., Tsai, Y.H.H., Bolte, B., Salakhutdinov, R., Mohamed, A.: Hubert: how much can a bad teacher benefit ASR pre-training? In: Proceedings of IEEE-ICASSP, pp. 6533\u20136537 (2021)","DOI":"10.1109\/ICASSP39728.2021.9414460"},{"key":"32_CR18","unstructured":"Li, J., Wang, X., Li, Y., et al.: The speechtransformer for large-scale mandarin Chinese speech recognition. In: Proceedings of IEEE-ICASSP, pp. 7095\u20137099 (2019)"},{"key":"32_CR19","unstructured":"Li, L.H., Yatskar, M., Yin, D., Hsieh, C.J., Chang, K.W.: Visualbert: a simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557 (2019)"},{"key":"32_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1007\/978-3-030-58577-8_8","volume-title":"Computer Vision \u2013 ECCV 2020","author":"X Li","year":"2020","unstructured":"Li, X., et al.: Oscar: object-semantics aligned pre-training for vision-language tasks. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12375, pp. 121\u2013137. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58577-8_8"},{"key":"32_CR21","unstructured":"Lu, J., Batra, D., Parikh, D., Lee, S.: Vilbert: pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. arXiv preprint arXiv:1908.02265 (2019)"},{"key":"32_CR22","doi-asserted-by":"crossref","unstructured":"Miao, Y., Gowayyed, M., Na, X., Ko, T., Metze, F., Waibel, A.: An emprical exploration of CTC acoustic models. In: Proceedings of IEEE-ICASSP (2016)","DOI":"10.1109\/ICASSP.2016.7472152"},{"key":"32_CR23","doi-asserted-by":"crossref","unstructured":"Mikolov, T., Karafi\u00e1t, M., Burget, L., Cernock\u1ef3, J., Khudanpur, S.: Recurrent neural network based language model. In: Proceedings of Interspeech, vol. 2, pp. 1045\u20131048 (2010)","DOI":"10.21437\/Interspeech.2010-343"},{"key":"32_CR24","doi-asserted-by":"crossref","unstructured":"Ogawa, A., Delcroix, M., Karita, S., Nakatani, T.: Rescoring n-best speech recognition list based on one-on-one hypothesis comparison using encoder-classifier model. In: Proceedings of IEEE-ICASSP (2018)","DOI":"10.1109\/ICASSP.2018.8461405"},{"issue":"8","key":"32_CR25","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I., et al.: Language models are unsupervised multitask learners. OpenAI Blog 1(8), 9 (2019)","journal-title":"OpenAI Blog"},{"key":"32_CR26","doi-asserted-by":"crossref","unstructured":"Salazar, J., et al.: Masked language model scoring. In: Proceedings of ACL (2020)","DOI":"10.18653\/v1\/2020.acl-main.240"},{"key":"32_CR27","doi-asserted-by":"crossref","unstructured":"Salazar, J., Liang, D., Nguyen, T.Q., Kirchhoff, K.: Masked language model scoring. arXiv preprint arXiv:1910.14659 (2019)","DOI":"10.18653\/v1\/2020.acl-main.240"},{"key":"32_CR28","unstructured":"Shin, J., Lee, Y., Jung, K.: Effective sentence scoring method using BERT for speech recognition. In: Proceedings of ACML, pp. 1081\u20131093 (2019)"},{"key":"32_CR29","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"194","DOI":"10.1007\/978-3-030-32381-3_16","volume-title":"Chinese Computational Linguistics","author":"C Sun","year":"2019","unstructured":"Sun, C., Qiu, X., Xu, Y., Huang, X.: How to fine-tune BERT for text classification? In: Sun, M., Huang, X., Ji, H., Liu, Z., Liu, Y. (eds.) CCL 2019. LNCS (LNAI), vol. 11856, pp. 194\u2013206. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-32381-3_16"},{"key":"32_CR30","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Proceedings of NeurlPS, vol. 30 (2017)"},{"key":"32_CR31","doi-asserted-by":"crossref","unstructured":"Wang, C., Wu, Y., Liu, S., Zhou, M., Yang, Z.: Curriculum pre-training for end-to-end speech translation. arXiv preprint arXiv:2004.10093 (2020)","DOI":"10.18653\/v1\/2020.acl-main.344"},{"key":"32_CR32","doi-asserted-by":"crossref","unstructured":"Xu, L., et al.: Rescorebert: discriminative speech recognition rescoring with BERT. In: Proceedings of IEEE-ICASSP (2022)","DOI":"10.1109\/ICASSP43922.2022.9747118"},{"key":"32_CR33","first-page":"1474","volume":"30","author":"FH Yu","year":"2022","unstructured":"Yu, F.H., Chen, K.Y., Lu, K.H.: Non-autoregressive ASR modeling using pre-trained language models for Chinese speech recognition. IEEE\/ACM Trans. ASLP 30, 1474\u20131482 (2022)","journal-title":"IEEE\/ACM Trans. ASLP"},{"key":"32_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, S., Huang, H., Liu, J., Li, H.: Spelling error correction with soft-masked BERT. arXiv preprint arXiv:2005.07421 (2020)","DOI":"10.18653\/v1\/2020.acl-main.82"},{"key":"32_CR35","doi-asserted-by":"crossref","unstructured":"Zhang, S., Lei, M., Yan, Z.: Investigation of transformer based spelling correction model for CTC-based end-to-end mandarin speech recognition. In: Proceedings of Interspeech, pp. 2180\u20132184 (2019)","DOI":"10.21437\/Interspeech.2019-1290"},{"key":"32_CR36","doi-asserted-by":"crossref","unstructured":"Zhao, Y., et al.: Bart based semantic correction for mandarin automatic speech recognition system. In: Proceedings of Interspeech (2021)","DOI":"10.21437\/Interspeech.2021-739"},{"key":"32_CR37","doi-asserted-by":"crossref","unstructured":"Zheng, G., et al.: Wav-BERT: cooperative acoustic and linguistic representation learning for low-resource speech recognition. In: Proceedings of EMNLP findings (2021)","DOI":"10.18653\/v1\/2021.findings-emnlp.236"},{"key":"32_CR38","doi-asserted-by":"crossref","unstructured":"Zhou, L., Palangi, H., Zhang, L., Hu, H., Corso, J., Gao, J.: Unified vision-language pre-training for image captioning and VQA. In: Proceedings of AAAI, vol. 34, pp. 13041\u201313049 (2020)","DOI":"10.1609\/aaai.v34i07.7005"},{"key":"32_CR39","unstructured":"Zhou, S., Xu, S., Xu, B.: Multilingual end-to-end speech recognition with a single transformer on low-resource languages. arXiv preprint arXiv:1806.05059 (2018)"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks and Machine Learning \u2013 ICANN 2023"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-44195-0_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,21]],"date-time":"2023-09-21T12:09:18Z","timestamp":1695298158000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-44195-0_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031441943","9783031441950"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-44195-0_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"22 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Heraklion","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"32","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icann2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/e-nns.org\/icann2023\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"easyacademia.org","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"947","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"426","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"22","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"45% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"type of other papers accepted  : 9 Abstract","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}