{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,2]],"date-time":"2025-11-02T05:04:44Z","timestamp":1762059884185,"version":"build-2065373602"},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,6,22]],"date-time":"2022-06-22T00:00:00Z","timestamp":1655856000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,6,22]],"date-time":"2022-06-22T00:00:00Z","timestamp":1655856000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,6,22]]},"DOI":"10.1109\/jcsse54890.2022.9836268","type":"proceedings-article","created":{"date-parts":[[2022,7,28]],"date-time":"2022-07-28T19:47:39Z","timestamp":1659037659000},"page":"1-6","source":"Crossref","is-referenced-by-count":5,"title":["Tokenization-based data augmentation for text classification"],"prefix":"10.1109","author":[{"given":"Patawee","family":"Prakrankamanant","sequence":"first","affiliation":[{"name":"Chulalongkorn University,Department of Computer Engineering,Bangkok,Thailand"}]},{"given":"Ekapol","family":"Chuangsuwanich","sequence":"additional","affiliation":[{"name":"Chulalongkorn University,Department of Computer Engineering,Bangkok,Thailand"}]}],"member":"263","reference":[{"journal-title":"Wongnai\/wongnai-corpus Collection of wongnai's datasets","year":"2018","author":"wongnai","key":"ref39"},{"journal-title":"Pythainlp\/wisesight-sentiment First release","year":"2019","author":"suriyawongkul","key":"ref38"},{"key":"ref33","article-title":"Improving neural networks by preventing co-adaptation of feature detectors","author":"hinton","year":"2012","journal-title":"ArXiv Preprint"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/732"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.740"},{"key":"ref30","article-title":"Roberta: A robustly optimized BERT pretraining approach","author":"liu","year":"2019","journal-title":"ArXiv"},{"journal-title":"Thai natural language processing in Python","year":"2016","author":"phatthiyaphaibun","key":"ref37"},{"journal-title":"Pythainlp\/classification-benchmarks v0 1-alpha","year":"2020","key":"ref36"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461347"},{"key":"ref34","first-page":"1929","article-title":"Dropout: a simple way to prevent neural networks from overfitting","volume":"15","author":"srivastava","year":"2014","journal-title":"JMLR"},{"journal-title":"Deep Learning","year":"2016","author":"goodfellow","key":"ref10"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3010828"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ECTICON.2008.4600388"},{"key":"ref12","first-page":"1871","article-title":"Liblinear: A library for large linear classification","volume":"9","author":"fan","year":"2008","journal-title":"JMLR"},{"key":"ref13","article-title":"Efficient estimation of word representations in vector space","author":"mikolov","year":"2013","journal-title":"ArXiv"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953264"},{"key":"ref15","article-title":"Neural machine translation of rare words with subword units","author":"sennrich","year":"2015","journal-title":"ArXiv"},{"key":"ref16","article-title":"Google's neural machine translation system: Bridging the gap between human and machine translation","author":"wu","year":"2016","journal-title":"ArXiv"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1007"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-2012"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1158"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.3390\/sym11111393"},{"journal-title":"Convolutional neural networks for sentence classification","year":"2015","author":"chen","key":"ref4"},{"key":"ref27","first-page":"2191","article-title":"Aug-bert: An efficient data augmentation algorithm for text classification","author":"shi","year":"2019","journal-title":"CSPS2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P15-1162"},{"key":"ref6","first-page":"68","article-title":"How robust are character-based word embeddings in tagging and MT against wrod scramlbing or randdm nouse?","author":"heigold","year":"2018","journal-title":"Proceedings of AMTA"},{"key":"ref29","article-title":"Wangchanberta: Pretraining transformer-based thai language models","author":"lowphansirikul","year":"2021","journal-title":"ArXiv"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2016.7511605"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6356"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W16-2613"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/E17-2068"},{"key":"ref9","article-title":"Word shape matters: Robust machine translation with visual embedding","author":"wang","year":"2020","journal-title":"ArXiv"},{"key":"ref1","first-page":"649","article-title":"Character-level convolutional networks for text classification","volume":"28","author":"zhang","year":"2015","journal-title":"NIPS"},{"journal-title":"A Thai word tokenization library using Deep Neural Network","year":"2018","author":"kittinaradorn","key":"ref20"},{"journal-title":"Tokenization of japanese text Using a morphological transducer","year":"2018","author":"hanlon","key":"ref22"},{"journal-title":"Thai word segmentation with bi-directional RNN","year":"2017","author":"jousimo","key":"ref21"},{"journal-title":"thai2fit Thai language implementation of ulmfit","year":"2021","author":"polpanumas","key":"ref42"},{"key":"ref24","article-title":"Attacut: A fast and accurate neural thai word segmenter","author":"chormai","year":"2019","journal-title":"ArXiv"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1031"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.3115\/1557769.1557791"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-22747-0_7"},{"key":"ref25","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2019","journal-title":"Proceedings of NAACL-HLT"}],"event":{"name":"2022 19th International Joint Conference on Computer Science and Software Engineering (JCSSE)","start":{"date-parts":[[2022,6,22]]},"location":"Bangkok, Thailand","end":{"date-parts":[[2022,6,25]]}},"container-title":["2022 19th International Joint Conference on Computer Science and Software Engineering (JCSSE)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9836023\/9836237\/09836268.pdf?arnumber=9836268","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,15]],"date-time":"2022-08-15T20:03:45Z","timestamp":1660593825000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9836268\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,22]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/jcsse54890.2022.9836268","relation":{},"subject":[],"published":{"date-parts":[[2022,6,22]]}}}