{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T15:56:43Z","timestamp":1774454203070,"version":"3.50.1"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032043382","type":"print"},{"value":"9783032043399","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-032-04339-9_19","type":"book-chapter","created":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T08:06:08Z","timestamp":1761811568000},"page":"294-305","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Whisper-Conformer: A Modified Automatic Speech Recognition for Thai Speech Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-8752-3421","authenticated-orcid":false,"given":"Thanakron","family":"Noppanamas","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0000-3818-9556","authenticated-orcid":false,"given":"Suronapee","family":"Phooomvuthisarn","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,31]]},"reference":[{"key":"19_CR1","unstructured":"Aung, Z.H., et al.: Thonburian whisper: robust fine-tuned and distilled whisper for Thai. In: Proceedings of the 7th International Conference on Natural Language and Speech Processing (ICNLSP 2024) (2024)"},{"key":"19_CR2","unstructured":"Baevski, A., et al.: wav2vec 2.0: a framework for self-supervised learning of speech repre-sentations. Adv. Neural Inform. Process. Syst. 33, 12449\u201312460 (2020)"},{"key":"19_CR3","unstructured":"Chuangsuwanich, E., et al.: Chulalongkorn University, Faculty of Engineering, Computer Engineering Department (2020). https:\/\/github.com\/ekapolc\/gowajee_corpus"},{"key":"19_CR4","unstructured":"Gandhi, S.: Fine-Tune Whisper for Multilingual ASR with Transformers (2024). Accessed 28 Jun 2024"},{"key":"19_CR5","doi-asserted-by":"crossref","unstructured":"Graves, A., Sequence transduction with recurrent neural networks. arXiv preprint arXiv:1211.3711 (2012)","DOI":"10.1007\/978-3-642-24797-2"},{"key":"19_CR6","doi-asserted-by":"crossref","unstructured":"Gulati, A., et al.: Conformer: Convolution-augmented transformer for speech recognition. arXiv preprint arXiv:2005.08100 (2020)","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"19_CR7","doi-asserted-by":"crossref","unstructured":"Han, W., et al.: Contextnet: Improving convolutional neural networks for automatic speech recognition with global context. arXiv preprint arXiv:2005.03191 (2020)","DOI":"10.21437\/Interspeech.2020-2059"},{"key":"19_CR8","doi-asserted-by":"crossref","unstructured":"Jain, R., et al.: Adaptation of Whisper models to child speech recognition. arXiv preprint arXiv:2307.13008 (2023)","DOI":"10.21437\/Interspeech.2023-935"},{"key":"19_CR9","unstructured":"Pipatanakul, K.P.M., Sripaisarnmongkol, S., et al.: Monsoon Whisper Medium Gigaspeech2 (2024)"},{"key":"19_CR10","doi-asserted-by":"crossref","unstructured":"Li, J., et al.: Jasper: an end-to-end convolutional neural acoustic model. arXiv preprint arXiv:1904.03288 (2019)","DOI":"10.21437\/Interspeech.2019-1819"},{"key":"19_CR11","unstructured":"Limited, V.A.C.L.a.D.W.C.: Thai Elderly Speech. https:\/\/github.com\/VISAI-DATAWOW\/Thai-Elderly-Speech-dataset\/releases\/tag\/v1.0.0"},{"key":"19_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.101869","volume":"99","author":"A Mehrish","year":"2023","unstructured":"Mehrish, A., et al.: A review of deep learning techniques for speech processing. Inform. Fusion 99, 101869 (2023)","journal-title":"Inform. Fusion"},{"key":"19_CR13","unstructured":"MozillaFoundation, Common Voice: A Massively-Multilingual Speech Corpus (2024). https:\/\/commonvoice.mozilla.org\/th\/datasets"},{"key":"19_CR14","doi-asserted-by":"crossref","unstructured":"Phatthiyaphaibun, W., et al.: Pythainlp: Thai natural language processing in python. arXiv preprint arXiv:2312.04649 (2023)","DOI":"10.18653\/v1\/2023.nlposs-1.4"},{"key":"19_CR15","unstructured":"Phatthiyaphaibun, W., et al.: Thai wav2vec2. 0 with commonvoice v8. arXiv preprint arXiv:2208.04799 (2022)"},{"issue":"1","key":"19_CR16","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1016\/j.camwa.2006.10.030","volume":"54","author":"C Pisarn","year":"2007","unstructured":"Pisarn, C., Theeramunkong, T.: An HMM-based method for Thai spelling speech recognition. Comput. Math. Appl. 54(1), 76\u201395 (2007)","journal-title":"Comput. Math. Appl."},{"key":"19_CR17","unstructured":"Radford, A., et al.: Robust speech recognition via large-scale weak supervision. In: International Conference on Machine Learning, PMLR (2023)"},{"key":"19_CR18","unstructured":"Ramachandran, P., Zoph, B., Le, Q.V.: Searching for activation functions. arXiv preprint arXiv:1710.05941 (2017)"},{"key":"19_CR19","unstructured":"Rijal, S., et al.: Whisper Finetuning on Nepali Language. arXiv preprint arXiv:2411.12587 (2024)"},{"key":"19_CR20","unstructured":"Suebvisai, S., et al.: Thai automatic speech recognition. In: Proceedings of (ICASSP 2005), IEEE International Conference on Acoustics, Speech, and Signal Processing. IEEE (2005)"},{"key":"19_CR21","doi-asserted-by":"crossref","unstructured":"Suwanbandit, A., et al.: Thai dialect corpus and transfer-based curriculum learning investigation for dialect automatic speech recognition. In: Proceedings of Interspeech (2023)","DOI":"10.21437\/Interspeech.2023-1828"},{"key":"19_CR22","doi-asserted-by":"crossref","unstructured":"Tipakasorn, P., et al.: Comprehensive benchmarking and analysis of open pretrained Thai speech recognition models. In: 2024 27th Conference of the Oriental COCOSDA International Committee for the Co-ordination and Standardisation of Speech Databases and Assessment Techniques (O-COCOSDA). IEEE (2024)","DOI":"10.1109\/O-COCOSDA64382.2024.10800399"},{"key":"19_CR23","unstructured":"Vaswani, A.: Attention is all you need. Advances in Neural Information Processing Systems (2017)"},{"key":"19_CR24","doi-asserted-by":"crossref","unstructured":"Yang, H., et al.: Chinese ASR and NER improvement based on whisper fine-tuning. In: 2023 25th International Conference on Advanced Communication Technology (ICACT). IEEE (2023)","DOI":"10.23919\/ICACT56868.2023.10079686"}],"container-title":["Communications in Computer and Information Science","Deep Learning Theory and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04339-9_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T08:06:26Z","timestamp":1761811586000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04339-9_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783032043382","9783032043399"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04339-9_19","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"31 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DeLTA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Deep Learning Theory and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bilbao","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 June 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 June 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"delta2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/delta.scitevents.org\/?y=2025","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}