{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,31]],"date-time":"2025-12-31T07:26:47Z","timestamp":1767166007717,"version":"3.48.0"},"publisher-location":"Singapore","reference-count":38,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819958368"},{"type":"electronic","value":"9789819958375"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-981-99-5837-5_26","type":"book-chapter","created":{"date-parts":[[2023,9,4]],"date-time":"2023-09-04T01:01:29Z","timestamp":1693789289000},"page":"310-322","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Adapting Code-Switching Language Models with\u00a0Statistical-Based Text Augmentation"],"prefix":"10.1007","author":[{"given":"Chaiyasait","family":"Prachaseree","sequence":"first","affiliation":[]},{"given":"Kshitij","family":"Gupta","sequence":"additional","affiliation":[]},{"given":"Thi Nga","family":"Ho","sequence":"additional","affiliation":[]},{"given":"Yizhou","family":"Peng","sequence":"additional","affiliation":[]},{"given":"Kyaw","family":"Zin Tun","sequence":"additional","affiliation":[]},{"given":"Eng Siong","family":"Chng","sequence":"additional","affiliation":[]},{"given":"G. S. S.","family":"Chalapthi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,5]]},"reference":[{"key":"26_CR1","unstructured":"Srilm - an extensible language modeling toolkit. In: Hansen, J.H.L., Pellom, B.L. (eds.) INTERSPEECH. ISCA (2002). http:\/\/dblp.uni-trier.de\/db\/conf\/interspeech\/interspeech2002.html#Stolcke02"},{"key":"26_CR2","doi-asserted-by":"publisher","unstructured":"Adel, H., Vu, T., Kirchhoff, K., Telaar, D., Schultz, T.: Syntactic and semantic features for code-switching factored language models. IEEE\/ACM Trans. Audio Speech Lang. Process. 23, 1 (2015). https:\/\/doi.org\/10.1109\/TASLP.2015.2389622","DOI":"10.1109\/TASLP.2015.2389622"},{"key":"26_CR3","doi-asserted-by":"publisher","unstructured":"Bene\u0161, K., Burget, L.: Text augmentation for language models in high error recognition scenario. In: Proceedings of Interspeech 2021, pp. 1872\u20131876 (2021). https:\/\/doi.org\/10.21437\/Interspeech.2021-627","DOI":"10.21437\/Interspeech.2021-627"},{"key":"26_CR4","doi-asserted-by":"crossref","unstructured":"Chang, C.T., Chuang, S.P., VI Lee, H.: Code-switching sentence generation by generative adversarial networks and its application to data augmentation. In: Interspeech (2018)","DOI":"10.21437\/Interspeech.2019-3214"},{"key":"26_CR5","doi-asserted-by":"publisher","unstructured":"Ding, B., et al.: DAGA: data augmentation with a generation approach for low-resource tagging tasks. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 6045\u20136057. Association for Computational Linguistics, November 2020. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.488, http:\/\/aclanthology.org\/2020.emnlp-main.488","DOI":"10.18653\/v1\/2020.emnlp-main.488"},{"key":"26_CR6","doi-asserted-by":"publisher","unstructured":"Gao, Y., Feng, J., Liu, Y., Hou, L., Pan, X., Ma, Y.: Code-switching sentence generation by bert and generative adversarial networks, pp. 3525\u20133529, September 2019. https:\/\/doi.org\/10.21437\/Interspeech.2019-2501","DOI":"10.21437\/Interspeech.2019-2501"},{"key":"26_CR7","doi-asserted-by":"crossref","unstructured":"Garg, S., Parekh, T., Jyothi, P.: Code-switched language models using dual RNNs and same-source pretraining. In: Conference on Empirical Methods in Natural Language Processing (2018)","DOI":"10.18653\/v1\/D18-1346"},{"key":"26_CR8","doi-asserted-by":"publisher","unstructured":"Gonen, H., Goldberg, Y.: Language modeling for code-switching: evaluation, integration of monolingual data, and discriminative training. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 4175\u20134185. Association for Computational Linguistics, Hong Kong, China, November 2019. https:\/\/doi.org\/10.18653\/v1\/D19-1427, http:\/\/aclanthology.org\/D19-1427","DOI":"10.18653\/v1\/D19-1427"},{"key":"26_CR9","doi-asserted-by":"publisher","unstructured":"Gumperz, J.J.: Discourse Strategies. Studies in Interactional Sociolinguistics, Cambridge University Press, Cambridge (1982). https:\/\/doi.org\/10.1017\/CBO9780511611834","DOI":"10.1017\/CBO9780511611834"},{"key":"26_CR10","doi-asserted-by":"publisher","unstructured":"Hu, X., Zhang, Q., Yang, L., Gu, B., Xu, X.: Data augmentation for code-switch language modeling by fusing multiple text generation methods. In: Proceedings of Interspeech 2020, pp. 1062\u20131066 (2020). https:\/\/doi.org\/10.21437\/Interspeech.2020-2219","DOI":"10.21437\/Interspeech.2020-2219"},{"key":"26_CR11","doi-asserted-by":"crossref","unstructured":"Huang, W.R., Peyser, C., Sainath, T.N., Pang, R., Strohman, T.D., Kumar, S.: Sentence-select: large-scale language model data selection for rare-word speech recognition (2022). http:\/\/arxiv.org\/abs\/2203.05008","DOI":"10.21437\/Interspeech.2022-10820"},{"key":"26_CR12","unstructured":"Husein, Z.: Malaya-speech (2020), speech-Toolkit library for bahasa Malaysia, powered by Deep Learning Tensorflow https:\/\/github.com\/huseinzol05\/malaya-speech"},{"key":"26_CR13","doi-asserted-by":"publisher","unstructured":"Kitaev, N., Klein, D.: Constituency parsing with a self-attentive encoder. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2676\u20132686. Association for Computational Linguistics, Melbourne, Australia, July 2018. https:\/\/doi.org\/10.18653\/v1\/P18-1249, http:\/\/aclanthology.org\/P18-1249","DOI":"10.18653\/v1\/P18-1249"},{"key":"26_CR14","doi-asserted-by":"crossref","unstructured":"Koh, J.X., et al.: Building the Singapore English national speech corpus. In: Interspeech (2019)","DOI":"10.21437\/Interspeech.2019-1525"},{"key":"26_CR15","doi-asserted-by":"publisher","unstructured":"Lee, G., Yue, X., Li, H.: linguistically motivated parallel data augmentation for code-switch language modeling. In: Proceedings of Interspeech 2019, pp. 3730\u20133734 (2019). https:\/\/doi.org\/10.21437\/Interspeech.2019-1382","DOI":"10.21437\/Interspeech.2019-1382"},{"key":"26_CR16","doi-asserted-by":"publisher","unstructured":"Li, C., Vu, N.T.: Improving code-switching language modeling with artificially generated texts using cycle-consistent adversarial networks. In: Meng, H., Xu, B., Zheng, T.F. (eds.) Interspeech 2020, 21st Annual Conference of the International Speech Communication Association, Virtual Event, Shanghai, China, 25\u201329 October 2020, pp. 1057\u20131061. ISCA (2020). https:\/\/doi.org\/10.21437\/Interspeech.2020-2177","DOI":"10.21437\/Interspeech.2020-2177"},{"key":"26_CR17","doi-asserted-by":"publisher","unstructured":"Li, C.Y., Vu, N.T.: Improving code-switching language modeling with artificially generated texts using cycle-consistent adversarial networks. In: Proceedings of Interspeech 2020, pp. 1057\u20131061 (2020). https:\/\/doi.org\/10.21437\/Interspeech.2020-2177","DOI":"10.21437\/Interspeech.2020-2177"},{"key":"26_CR18","unstructured":"Li, S.S., Murray, K.: Language agnostic code-mixing data augmentation by predicting linguistic patterns. ArXiv abs\/2211.07628 (2022)"},{"key":"26_CR19","doi-asserted-by":"publisher","unstructured":"Li, Y., Fung, P.: Language modeling with functional head constraint for code switching speech recognition. In: EMNLP 2014\u20132014 Conference on Empirical Methods in Natural Language Processing, Proceedings of the Conference, pp. 907\u2013916, January 2014. https:\/\/doi.org\/10.3115\/v1\/D14-1098","DOI":"10.3115\/v1\/D14-1098"},{"key":"26_CR20","unstructured":"Li, Y., Fung, P.: Code-switch language model with inversion constraints for mixed language speech recognition. In: Proceedings of COLING 2012, pp. 1671\u20131680. The COLING 2012 Organizing Committee, Mumbai, India, December 2012).https:\/\/aclanthology.org\/C12-1102"},{"key":"26_CR21","doi-asserted-by":"publisher","unstructured":"Li, Y., Fung, P.: Code switch language modeling with functional head constraint. In: 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4913\u20134917 (2014). https:\/\/doi.org\/10.1109\/ICASSP.2014.6854536","DOI":"10.1109\/ICASSP.2014.6854536"},{"key":"26_CR22","doi-asserted-by":"publisher","unstructured":"Liu, L., Ding, B., Bing, L., Joty, S., Si, L., Miao, C.: MulDA: a multilingual data augmentation framework for low-resource cross-lingual NER. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 5834\u20135846. Association for Computational Linguistics, August 2021. https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.453, https:\/\/aclanthology.org\/2021.acl-long.453","DOI":"10.18653\/v1\/2021.acl-long.453"},{"key":"26_CR23","doi-asserted-by":"crossref","unstructured":"Loper, E., Bird, S.: Nltk: The natural language toolkit. arXiv preprint cs\/0205028 (2002)","DOI":"10.3115\/1118108.1118117"},{"key":"26_CR24","unstructured":"Manning, C.D., Surdeanu, M., Bauer, J., Finkel, J.R., Bethard, S., McClosky, D.: In: ACL (System Demonstrations) (2014)"},{"key":"26_CR25","doi-asserted-by":"crossref","unstructured":"Myers-Scotton, C.: Duelling languages: Grammatical structure in codeswitching (1993)","DOI":"10.1093\/oso\/9780198240594.001.0001"},{"key":"26_CR26","doi-asserted-by":"publisher","unstructured":"Poplack, S.: Sometimes i\u2019ll start a sentence in spanish y termino en espaNol: toward a typology of code-switching 1. Linguistics 18, 581\u2013618 (1980). https:\/\/doi.org\/10.1515\/ling.1980.18.7-8.581","DOI":"10.1515\/ling.1980.18.7-8.581"},{"key":"26_CR27","doi-asserted-by":"publisher","unstructured":"Pratapa, A., Bhat, G., Choudhury, M., Sitaram, S., Dandapat, S., Bali, K.: Language modeling for code-mixing: the role of linguistic theory based synthetic data. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 1543\u20131553. Association for Computational Linguistics, Melbourne, Australia, July 2018. https:\/\/doi.org\/10.18653\/v1\/P18-1143, https:\/\/aclanthology.org\/P18-1143","DOI":"10.18653\/v1\/P18-1143"},{"key":"26_CR28","doi-asserted-by":"crossref","unstructured":"Rizvi, M.S.Z., Srinivasan, A., Ganu, T., Choudhury, M., Sitaram, S.: GCM: a toolkit for generating synthetic code-mixed text. In: Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations, pp. 205\u2013211. Association for Computational Linguistics, April 2021. https:\/\/aclanthology.org\/2021.eacl-demos.24","DOI":"10.18653\/v1\/2021.eacl-demos.24"},{"key":"26_CR29","doi-asserted-by":"publisher","unstructured":"Samanta, B., Nangi, S., Jagirdar, H., Ganguly, N., Chakrabarti, S.: A deep generative model for code switched text, pp. 5175\u20135181, August 2019. https:\/\/doi.org\/10.24963\/ijcai.2019\/719","DOI":"10.24963\/ijcai.2019\/719"},{"key":"26_CR30","doi-asserted-by":"crossref","unstructured":"Solorio, T., Liu, Y.: Learning to predict code-switching points. In: Proceedings of the 2008 Conference on Empirical Methods in Natural Language Processing, pp. 973\u2013981. Association for Computational Linguistics, Honolulu, Hawaii, October 2008. https:\/\/aclanthology.org\/D08-1102","DOI":"10.3115\/1613715.1613841"},{"key":"26_CR31","doi-asserted-by":"publisher","unstructured":"Soto, V., Hirschberg, J.: Improving code-switched language modeling performance using cognate features, pp. 3725\u20133729, September 2019. https:\/\/doi.org\/10.21437\/Interspeech.2019-2681","DOI":"10.21437\/Interspeech.2019-2681"},{"key":"26_CR32","unstructured":"Vinyals, O., Fortunato, M., Jaitly, N.: Pointer networks. In: Cortes, C., Lawrence, N., Lee, D., Sugiyama, M., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 28. Curran Associates, Inc. (2015). https:\/\/proceedings.neurips.cc\/paper\/2015\/file\/29921001f2f04bd3baee84a12e98098f-Paper.pdf"},{"key":"26_CR33","doi-asserted-by":"publisher","unstructured":"Watanabe, S., et al.: ESPnet: end-to-end speech processing toolkit. In: Proceedings of Interspeech, pp. 2207\u20132211 (2018). https:\/\/doi.org\/10.21437\/Interspeech.2018-1456, https:\/\/dx.doi.org\/10.21437\/Interspeech.2018-1456","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"26_CR34","unstructured":"Weintraub, M., Taussig, K., Hunicke-Smith, K., Snodgrass, A.: Effect of speaking style on lvcsr performance. In: Proceedings of ICSLP, vol. 96, pp. 16\u201319. Citeseer (1996)"},{"key":"26_CR35","doi-asserted-by":"publisher","unstructured":"Winata, G.I., Madotto, A., Wu, C.S., Fung, P.: Code-switching language modeling using syntax-aware multi-task learning. In: Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching, pp. 62\u201367. Association for Computational Linguistics, Melbourne, Australia, July 2018. https:\/\/doi.org\/10.18653\/v1\/W18-3207, https:\/\/aclanthology.org\/W18-3207","DOI":"10.18653\/v1\/W18-3207"},{"key":"26_CR36","doi-asserted-by":"publisher","unstructured":"Winata, G.I., Madotto, A., Wu, C.S., Fung, P.: Code-switched language models using neural based synthetic data from parallel sentences. In: Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL), pp. 271\u2013280. Association for Computational Linguistics, Hong Kong, China, November 2019. https:\/\/doi.org\/10.18653\/v1\/K19-1026, https:\/\/aclanthology.org\/K19-1026","DOI":"10.18653\/v1\/K19-1026"},{"key":"26_CR37","doi-asserted-by":"publisher","unstructured":"Y\u0131lmaz, E., van den Heuvel, H., Van Leeuwen, D.: Acoustic and textual data augmentation for improved ASR of code-switching speech, September 2018. https:\/\/doi.org\/10.21437\/Interspeech.2018-52","DOI":"10.21437\/Interspeech.2018-52"},{"key":"26_CR38","doi-asserted-by":"publisher","unstructured":"Zhou, R., et al.: MELM: data augmentation with masked entity language modeling for low-resource NER. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2251\u20132262. Association for Computational Linguistics, Dublin, Ireland, May 2022. https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.160, https:\/\/aclanthology.org\/2022.acl-long.160","DOI":"10.18653\/v1\/2022.acl-long.160"}],"container-title":["Lecture Notes in Computer Science","Intelligent Information and Database Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-5837-5_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,31]],"date-time":"2025-12-31T07:22:33Z","timestamp":1767165753000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-5837-5_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9789819958368","9789819958375"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-5837-5_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"5 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACIIDS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Intelligent Information and Database Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Phuket","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Thailand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 July 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aciids2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aciids.pwr.edu.pl\/2023\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}