{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,16]],"date-time":"2025-07-16T12:16:16Z","timestamp":1752668176871,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,12]]},"DOI":"10.1145\/3568562.3568646","type":"proceedings-article","created":{"date-parts":[[2022,11,29]],"date-time":"2022-11-29T00:25:01Z","timestamp":1669681501000},"page":"276-282","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Improving Khmer-Vietnamese Machine Translation with Data Augmentation methods"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5003-9075","authenticated-orcid":false,"given":"Hanh","family":"Pham Van","sequence":"first","affiliation":[{"name":"School of Information and Communication Technology, Hanoi University of Science and Technology, Viet Nam"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7628-4674","authenticated-orcid":false,"given":"Huong","family":"Le Thanh","sequence":"additional","affiliation":[{"name":"School of Information and Communication Technology, Hanoi University of Science and Technology, Viet Nam"}]}],"member":"320","published-online":{"date-parts":[[2022,12]]},"reference":[{"key":"#cr-split#-e_1_3_2_1_1_1.1","unstructured":"Dzmitry Bahdanau Kyunghyun Cho and Yoshua Bengio. 2014. Neural Machine Translation by Jointly Learning to Align and Translate. https:\/\/doi.org\/10.48550\/ARXIV.1409.0473 10.48550\/ARXIV.1409.0473"},{"key":"#cr-split#-e_1_3_2_1_1_1.2","unstructured":"Dzmitry Bahdanau Kyunghyun Cho and Yoshua Bengio. 2014. Neural Machine Translation by Jointly Learning to Align and Translate. https:\/\/doi.org\/10.48550\/ARXIV.1409.0473"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-4012"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1045"},{"key":"#cr-split#-e_1_3_2_1_4_1.1","unstructured":"Angela Fan Shruti Bhosale Holger Schwenk Zhiyi Ma Ahmed El-Kishky Siddharth Goyal Mandeep Baines Onur Celebi Guillaume Wenzek Vishrav Chaudhary Naman Goyal Tom Birch Vitaliy Liptchinsky Sergey Edunov Edouard Grave Michael Auli and Armand Joulin. 2020. Beyond English-Centric Multilingual Machine Translation. https:\/\/doi.org\/10.48550\/ARXIV.2010.11125 10.48550\/ARXIV.2010.11125"},{"key":"#cr-split#-e_1_3_2_1_4_1.2","unstructured":"Angela Fan Shruti Bhosale Holger Schwenk Zhiyi Ma Ahmed El-Kishky Siddharth Goyal Mandeep Baines Onur Celebi Guillaume Wenzek Vishrav Chaudhary Naman Goyal Tom Birch Vitaliy Liptchinsky Sergey Edunov Edouard Grave Michael Auli and Armand Joulin. 2020. Beyond English-Centric Multilingual Machine Translation. https:\/\/doi.org\/10.48550\/ARXIV.2010.11125"},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the 4th Workshop on Asian Translation (WAT2017)","author":"Imankulova Aizhan","year":"2017","unstructured":"Aizhan Imankulova , Takayuki Sato , and Mamoru Komachi . 2017 . Improving Low-Resource Neural Machine Translation with Filtered Pseudo-Parallel Corpus . In Proceedings of the 4th Workshop on Asian Translation (WAT2017) . Asian Federation of Natural Language Processing, Taipei, Taiwan, 70\u201378. https:\/\/aclanthology.org\/W17-5704 Aizhan Imankulova, Takayuki Sato, and Mamoru Komachi. 2017. Improving Low-Resource Neural Machine Translation with Filtered Pseudo-Parallel Corpus. In Proceedings of the 4th Workshop on Asian Translation (WAT2017). Asian Federation of Natural Language Processing, Taipei, Taiwan, 70\u201378. https:\/\/aclanthology.org\/W17-5704"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00065"},{"key":"e_1_3_2_1_7_1","volume-title":"Recent advances in Apertium, a free\/open-source rule-based machine translation platform for low-resource languages. Machine Translation (01","author":"Khanna Tanmai","year":"2021","unstructured":"Tanmai Khanna , Jonathan\u00a0 N. Washington , Francis\u00a0 M. Tyers , Sevilay Bayatl\u0131 , Daniel\u00a0 G. Swanson , Tommi\u00a0 A. Pirinen , Irene Tang , and H\u00e8ctor Al\u00f2s\u00a0i Font . 2021. Recent advances in Apertium, a free\/open-source rule-based machine translation platform for low-resource languages. Machine Translation (01 Dec 2021 ). Tanmai Khanna, Jonathan\u00a0N. Washington, Francis\u00a0M. Tyers, Sevilay Bayatl\u0131, Daniel\u00a0G. Swanson, Tommi\u00a0A. Pirinen, Irene Tang, and H\u00e8ctor Al\u00f2s\u00a0i Font. 2021. Recent advances in Apertium, a free\/open-source rule-based machine translation platform for low-resource languages. Machine Translation (01 Dec 2021)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.5555\/1557769.1557821"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-6453"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073445.1073462"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-acl.6"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00343"},{"key":"#cr-split#-e_1_3_2_1_13_1.1","unstructured":"Minh-Thang Luong Hieu Pham and Christopher\u00a0D. Manning. 2015. Effective Approaches to Attention-based Neural Machine Translation. https:\/\/doi.org\/10.48550\/ARXIV.1508.04025 10.48550\/ARXIV.1508.04025"},{"key":"#cr-split#-e_1_3_2_1_13_1.2","doi-asserted-by":"crossref","unstructured":"Minh-Thang Luong Hieu Pham and Christopher\u00a0D. Manning. 2015. Effective Approaches to Attention-based Neural Machine Translation. https:\/\/doi.org\/10.48550\/ARXIV.1508.04025","DOI":"10.18653\/v1\/D15-1166"},{"key":"e_1_3_2_1_14_1","volume-title":"BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension. In ACL.","author":"Lewis Mike","year":"2020","unstructured":"Mike Lewis and Yinhan Liu and Naman Goyal and Marjan Ghazvininejad and Abdelrahman Mohamed and Omer Levy and Veselin Stoyanov and Luke Zettlemoyer . 2020 . BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension. In ACL. Mike Lewis and Yinhan Liu and Naman Goyal and Marjan Ghazvininejad and Abdelrahman Mohamed and Omer Levy and Veselin Stoyanov and Luke Zettlemoyer. 2020. BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension. In ACL."},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the ACL 2010 Conference Short Papers. Association for Computational Linguistics","author":"C.","year":"2041","unstructured":"Robert\u00a0 C. Moore and William Lewis. 2010. Intelligent Selection of Language Model Training Data . In Proceedings of the ACL 2010 Conference Short Papers. Association for Computational Linguistics , Uppsala, Sweden, 220\u2013224. https:\/\/aclanthology.org\/P10- 2041 Robert\u00a0C. Moore and William Lewis. 2010. Intelligent Selection of Language Model Training Data. In Proceedings of the ACL 2010 Conference Short Papers. Association for Computational Linguistics, Uppsala, Sweden, 220\u2013224. https:\/\/aclanthology.org\/P10-2041"},{"key":"#cr-split#-e_1_3_2_1_16_1.1","unstructured":"NLLB Team Marta\u00a0R. Costa-juss\u00e0 James Cross Onur \u00c7elebi Maha Elbayad Kenneth Heafield Kevin Heffernan Elahe Kalbassi Janice Lam Daniel Licht Jean Maillard Anna Sun Skyler Wang Guillaume Wenzek Al Youngblood Bapi Akula Loic Barrault Gabriel\u00a0Mejia Gonzalez Prangthip Hansanti John Hoffman Semarley Jarrett Kaushik\u00a0Ram Sadagopan Dirk Rowe Shannon Spruit Chau Tran Pierre Andrews Necip\u00a0Fazil Ayan Shruti Bhosale Sergey Edunov Angela Fan Cynthia Gao Vedanuj Goswami Francisco Guzm\u00e1n Philipp Koehn Alexandre Mourachko Christophe Ropers Safiyyah Saleem Holger Schwenk and Jeff Wang. 2022. No Language Left Behind: Scaling Human-Centered Machine Translation. https:\/\/doi.org\/10.48550\/ARXIV.2207.04672 10.48550\/ARXIV.2207.04672"},{"key":"#cr-split#-e_1_3_2_1_16_1.2","unstructured":"NLLB Team Marta\u00a0R. Costa-juss\u00e0 James Cross Onur \u00c7elebi Maha Elbayad Kenneth Heafield Kevin Heffernan Elahe Kalbassi Janice Lam Daniel Licht Jean Maillard Anna Sun Skyler Wang Guillaume Wenzek Al Youngblood Bapi Akula Loic Barrault Gabriel\u00a0Mejia Gonzalez Prangthip Hansanti John Hoffman Semarley Jarrett Kaushik\u00a0Ram Sadagopan Dirk Rowe Shannon Spruit Chau Tran Pierre Andrews Necip\u00a0Fazil Ayan Shruti Bhosale Sergey Edunov Angela Fan Cynthia Gao Vedanuj Goswami Francisco Guzm\u00e1n Philipp Koehn Alexandre Mourachko Christophe Ropers Safiyyah Saleem Holger Schwenk and Jeff Wang. 2022. No Language Left Behind: Scaling Human-Centered Machine Translation. https:\/\/doi.org\/10.48550\/ARXIV.2207.04672"},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni , Salim Roukos , Todd Ward , and Wei-Jing Zhu . 2002 . Bleu: a Method for Automatic Evaluation of Machine Translation . In Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics , Philadelphia, Pennsylvania, USA, 311\u2013318. https:\/\/doi.org\/10.3115\/1073083.1073135 10.3115\/1073083.1073135 Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a Method for Automatic Evaluation of Machine Translation. In Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics, Philadelphia, Pennsylvania, USA, 311\u2013318. https:\/\/doi.org\/10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_18_1","volume-title":"Gideon Maillette de\u00a0Buy Wenniger, and Andy Way","author":"Poncelas Alberto","year":"2018","unstructured":"Alberto Poncelas , Gideon Maillette de\u00a0Buy Wenniger, and Andy Way . 2018 . Data Selection with Feature Decay Algorithms Using an Approximated Target Side . (2018). https:\/\/doi.org\/10.48550\/ARXIV.1811.03039 10.48550\/ARXIV.1811.03039 Alberto Poncelas, Gideon Maillette de\u00a0Buy Wenniger, and Andy Way. 2018. Data Selection with Feature Decay Algorithms Using an Approximated Target Side. (2018). https:\/\/doi.org\/10.48550\/ARXIV.1811.03039"},{"key":"e_1_3_2_1_19_1","volume-title":"Nguyen Van\u00a0Vinh, Ngo\u00a0Thi Vinh, Nguyen\u00a0Phuong Thai, and Tran\u00a0Hong Viet.","author":"Quan Nguyen\u00a0Hoang","year":"2021","unstructured":"Nguyen\u00a0Hoang Quan , Nguyen\u00a0Thanh Dat , Nguyen Hoang\u00a0Minh Cong , Nguyen Van\u00a0Vinh, Ngo\u00a0Thi Vinh, Nguyen\u00a0Phuong Thai, and Tran\u00a0Hong Viet. 2021 . ViNMT: Neural Machine Translation Toolkit . https:\/\/doi.org\/10.48550\/ARXIV.2112.15272 10.48550\/ARXIV.2112.15272 Nguyen\u00a0Hoang Quan, Nguyen\u00a0Thanh Dat, Nguyen Hoang\u00a0Minh Cong, Nguyen Van\u00a0Vinh, Ngo\u00a0Thi Vinh, Nguyen\u00a0Phuong Thai, and Tran\u00a0Hong Viet. 2021. ViNMT: Neural Machine Translation Toolkit. https:\/\/doi.org\/10.48550\/ARXIV.2112.15272"},{"key":"#cr-split#-e_1_3_2_1_20_1.1","doi-asserted-by":"crossref","unstructured":"Nils Reimers and Iryna Gurevych. 2020. Making Monolingual Sentence Embeddings Multilingual using Knowledge Distillation. https:\/\/doi.org\/10.48550\/ARXIV.2004.09813 10.48550\/ARXIV.2004.09813","DOI":"10.18653\/v1\/2020.emnlp-main.365"},{"key":"#cr-split#-e_1_3_2_1_20_1.2","doi-asserted-by":"crossref","unstructured":"Nils Reimers and Iryna Gurevych. 2020. Making Monolingual Sentence Embeddings Multilingual using Knowledge Distillation. https:\/\/doi.org\/10.48550\/ARXIV.2004.09813","DOI":"10.18653\/v1\/2020.emnlp-main.365"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00313"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1009"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-6323"},{"key":"#cr-split#-e_1_3_2_1_24_1.1","unstructured":"Yuqing Tang Chau Tran Xian Li Peng-Jen Chen Naman Goyal Vishrav Chaudhary Jiatao Gu and Angela Fan. 2020. Multilingual Translation with Extensible Multilingual Pretraining and Finetuning. https:\/\/doi.org\/10.48550\/ARXIV.2008.00401 10.48550\/ARXIV.2008.00401"},{"key":"#cr-split#-e_1_3_2_1_24_1.2","unstructured":"Yuqing Tang Chau Tran Xian Li Peng-Jen Chen Naman Goyal Vishrav Chaudhary Jiatao Gu and Angela Fan. 2020. Multilingual Translation with Extensible Multilingual Pretraining and Finetuning. https:\/\/doi.org\/10.48550\/ARXIV.2008.00401"},{"key":"#cr-split#-e_1_3_2_1_25_1.1","doi-asserted-by":"crossref","unstructured":"Marlies van\u00a0der Wees Arianna Bisazza and Christof Monz. 2017. Dynamic Data Selection for Neural Machine Translation. https:\/\/doi.org\/10.48550\/ARXIV.1708.00712 10.48550\/ARXIV.1708.00712","DOI":"10.18653\/v1\/D17-1147"},{"key":"#cr-split#-e_1_3_2_1_25_1.2","doi-asserted-by":"crossref","unstructured":"Marlies van\u00a0der Wees Arianna Bisazza and Christof Monz. 2017. Dynamic Data Selection for Neural Machine Translation. https:\/\/doi.org\/10.48550\/ARXIV.1708.00712","DOI":"10.18653\/v1\/D17-1147"},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the 13th Conference on Language Resources and Evaluation (LREC","author":"Nguyen Van-Vinh","year":"2022","unstructured":"Van-Vinh Nguyen , Ha Nguyen-Tien , Huong Le-Thanh , Phuong-Thai Nguyen , Van-Tan Bui Nghia-Luan Pham , Tuan-Anh Phan , Minh-Cong Nguyen Hoang , Hong-Viet Tran , Huu-Anh Tran . 2022 . KC4MT: A High-Quality Corpus for Multilingual Machine Translation . In Proceedings of the 13th Conference on Language Resources and Evaluation (LREC 2022). 5494\u20135502. Van-Vinh Nguyen, Ha Nguyen-Tien, Huong Le-Thanh, Phuong-Thai Nguyen, Van-Tan Bui Nghia-Luan Pham, Tuan-Anh Phan, Minh-Cong Nguyen Hoang, Hong-Viet Tran, Huu-Anh Tran. 2022. KC4MT: A High-Quality Corpus for Multilingual Machine Translation. In Proceedings of the 13th Conference on Language Resources and Evaluation (LREC 2022). 5494\u20135502."},{"key":"e_1_3_2_1_27_1","volume-title":"Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani , Noam Shazeer , Niki Parmar , Jakob Uszkoreit , Llion Jones , Aidan\u00a0 N Gomez , \u0141\u00a0ukasz Kaiser , and Illia Polosukhin . 2017. Attention is All you Need . In Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates , Inc .https:\/\/proceedings.neurips.cc\/paper\/ 2017 \/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141\u00a0ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates, Inc.https:\/\/proceedings.neurips.cc\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"e_1_3_2_1_28_1","unstructured":"Zhu Jinhua and Xia Yingce and Wu Lijun and He Di and Qin Tao and Zhou Wengang and Li Houqiang and Liu Tie-Yan. 2020. Incorporating BERT into Neural Machine Translation.  Zhu Jinhua and Xia Yingce and Wu Lijun and He Di and Qin Tao and Zhou Wengang and Li Houqiang and Liu Tie-Yan. 2020. Incorporating BERT into Neural Machine Translation."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1163"}],"event":{"name":"SoICT 2022: The 11th International Symposium on Information and Communication Technology","acronym":"SoICT 2022","location":"Hanoi Vietnam"},"container-title":["The 11th International Symposium on Information and Communication Technology"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3568562.3568646","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3568562.3568646","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:00:40Z","timestamp":1750186840000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3568562.3568646"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12]]},"references-count":36,"alternative-id":["10.1145\/3568562.3568646","10.1145\/3568562"],"URL":"https:\/\/doi.org\/10.1145\/3568562.3568646","relation":{},"subject":[],"published":{"date-parts":[[2022,12]]},"assertion":[{"value":"2022-12-01","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}