{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T15:46:09Z","timestamp":1776786369842,"version":"3.51.2"},"reference-count":74,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,9,17]],"date-time":"2022-09-17T00:00:00Z","timestamp":1663372800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,9,17]],"date-time":"2022-09-17T00:00:00Z","timestamp":1663372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1007\/s00521-022-07745-w","type":"journal-article","created":{"date-parts":[[2022,9,17]],"date-time":"2022-09-17T10:02:36Z","timestamp":1663408956000},"page":"573-594","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":23,"title":["Vietnamese hate and offensive detection using PhoBERT-CNN and social media streaming data"],"prefix":"10.1007","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1288-8003","authenticated-orcid":false,"given":"Khanh","family":"Quoc Tran","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7782-8389","authenticated-orcid":false,"given":"An","family":"Trong Nguyen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7341-1981","authenticated-orcid":false,"given":"Phu Gia","family":"Hoang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0833-3681","authenticated-orcid":false,"given":"Canh Duc","family":"Luu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5997-4983","authenticated-orcid":false,"given":"Trong-Hop","family":"Do","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8456-2742","authenticated-orcid":false,"given":"Kiet","family":"Van Nguyen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,9,17]]},"reference":[{"key":"7745_CR1","doi-asserted-by":"crossref","unstructured":"Mohan S, Guha A, Harris M, Popowich F, Schuster A, Priebe C (2017) The impact of toxic language on the health of reddit communities. In: Canadian conference on artificial intelligence. Springer, pp 51\u201356","DOI":"10.1007\/978-3-319-57351-9_6"},{"issue":"6","key":"7745_CR2","doi-asserted-by":"publisher","first-page":"394","DOI":"10.1111\/edt.12429","volume":"34","author":"S Abu-Ghazaleh","year":"2018","unstructured":"Abu-Ghazaleh S, Hassona Y, Hattar S (2018) Dental trauma in social media-analysis of facebook content and public engagement. Dent Traumatol 34(6):394\u2013400","journal-title":"Dent Traumatol"},{"key":"7745_CR3","unstructured":"Statista: Global number of hate speech-containing content removed by Facebook from 4th quarter 2017 to 2nd quarter 2021 (2018). https:\/\/www.statista.com\/statistics\/1013804\/facebook-hate-speech- content-deletion-quarter"},{"key":"7745_CR4","unstructured":"Seetharaman D (2018) Facebook throws more money at wiping out hate speech and bad actors. https:\/\/www.wsj.com\/articles\/facebook-throws-more-cash-at-tough-problem-stamping-out-bad-content-15263932"},{"key":"7745_CR5","unstructured":"Microsoft: Global number of hate speech-containing content removed by Facebook from 4th quarter 2017 to 2nd quarter 2021 (2020). https:\/\/www.microsoft.com\/en-us\/online-safety\/digital-civility"},{"key":"7745_CR6","first-page":"237","volume-title":"Posttraumatic stress disorder","author":"TM Keane","year":"1994","unstructured":"Keane TM, Fisher LM, Krinsley KE, Niles BL (1994) Posttraumatic stress disorder. Springer, Berlin, pp 237\u2013260"},{"key":"7745_CR7","doi-asserted-by":"publisher","unstructured":"Malmasi S, Zampieri M (2017) Detecting hate speech in social media. In: Proceedings of the international conference recent advances in natural language processing. INCOMA Ltd., Varna, pp 467\u2013472. https:\/\/doi.org\/10.26615\/978-954-452-049-6_062","DOI":"10.26615\/978-954-452-049-6_062"},{"key":"7745_CR8","doi-asserted-by":"crossref","unstructured":"Schmidt A, Wiegand M (2017) A survey on hate speech detection using natural language processing. In: Proceedings of the fifth international workshop on natural language processing for social media, pp 1\u201310","DOI":"10.18653\/v1\/W17-1101"},{"key":"7745_CR9","unstructured":"Vu X-S, Vu T, Tran M-V, Le-Cong T, Nguyen H (2020) HSD shared task in VLSP campaign 2019: hate speech detection for social good. arXiv preprint. arXiv:2007.06493"},{"key":"7745_CR10","doi-asserted-by":"crossref","unstructured":"Luu ST, Nguyen KV, Nguyen NL-T (2021) A large-scale dataset for hate speech detection on Vietnamese social media texts. In: Fujita H, Selamat A, Lin JC-W, Ali M (eds) Advances and trends in artificial intelligence. Artificial intelligence practices. Springer, Cham, pp 415\u2013426","DOI":"10.1007\/978-3-030-79457-6_35"},{"issue":"28","key":"7745_CR11","doi-asserted-by":"publisher","first-page":"35239","DOI":"10.1007\/s11042-020-10082-6","volume":"80","author":"U Naseem","year":"2021","unstructured":"Naseem U, Razzak I, Eklund PW (2021) A survey of pre-processing techniques to improve short-text quality: a case study on hate speech detection on twitter. Multimed Tools Appl 80(28):35239\u201335266","journal-title":"Multimed Tools Appl"},{"key":"7745_CR12","doi-asserted-by":"crossref","unstructured":"Nguyen KP-Q, Van\u00a0Nguyen K (2020) Exploiting Vietnamese social media characteristics for textual emotion recognition in Vietnamese. In: International conference on Asian language processing (IALP). IEEE, pp 276\u2013281","DOI":"10.1109\/IALP51396.2020.9310495"},{"key":"7745_CR13","doi-asserted-by":"publisher","unstructured":"Vu T, Nguyen DQ, Nguyen DQ, Dras M, Johnson M (2018) VnCoreNLP: a Vietnamese natural language processing toolkit. In: Proceedings of the 2018 conference of the North American Chapter of the Association for computational linguistics: demonstrations. Association for Computational Linguistics, New Orleans, pp 56\u201360. https:\/\/doi.org\/10.18653\/v1\/N18-5012","DOI":"10.18653\/v1\/N18-5012"},{"issue":"4","key":"7745_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3232676","volume":"51","author":"P Fortuna","year":"2018","unstructured":"Fortuna P, Nunes S (2018) A survey on automatic detection of hate speech in text. ACM Comput Surv (CSUR) 51(4):1\u201330","journal-title":"ACM Comput Surv (CSUR)"},{"key":"7745_CR15","doi-asserted-by":"crossref","unstructured":"Alrehili A (2019) Automatic hate speech detection on social media: a brief survey. In: IEEE\/ACS 16th International conference on computer systems and applications (AICCSA). IEEE, pp. 1\u20136","DOI":"10.1109\/AICCSA47632.2019.9035228"},{"key":"7745_CR16","doi-asserted-by":"crossref","unstructured":"Waseem Z, Hovy D (2016) Hateful symbols or hateful people? Predictive features for hate speech detection on twitter. In: Proceedings of the NAACL student research workshop, pp 88\u201393","DOI":"10.18653\/v1\/N16-2013"},{"key":"7745_CR17","doi-asserted-by":"publisher","first-page":"10809","DOI":"10.1007\/s00521-018-3442-0","volume":"32","author":"J Chen","year":"2018","unstructured":"Chen J, Yan S, Wong K-C (2018) Verbal aggression detection on twitter comments: convolutional neural network for short-text sentiment analysis. Neural Comput Appl 32:10809\u201310818","journal-title":"Neural Comput Appl"},{"key":"7745_CR18","doi-asserted-by":"crossref","unstructured":"Davidson T, Warmsley D, Macy M, Weber I (2017) Automated hate speech detection and the problem of offensive language. In: Proceedings of the international AAAI conference on web and social media, vol 11","DOI":"10.1609\/icwsm.v11i1.14955"},{"key":"7745_CR19","unstructured":"Do HT-T, Huynh HD, Van\u00a0Nguyen K, Nguyen NL-T, Nguyen AG-T (2019) Hate speech detection on Vietnamese social media text using the bidirectional-lstm model. arXiv preprint. arXiv:1911.03648"},{"key":"7745_CR20","unstructured":"Huu QP, Trung SN, Pham HA (2019) Automated hate speech detection on Vietnamese social networks. Technical report, EasyChair"},{"key":"7745_CR21","unstructured":"Huynh HD, Do HT-T, Nguyen KV, Nguyen NT-L (2020) A simple and efficient ensemble classifier combining multiple neural network models on social media datasets in Vietnamese. In: Proceedings of the 34th Pacific Asia conference on language, information and computation. Association for Computational Linguistics, Hanoi, pp 420\u2013429"},{"key":"7745_CR22","doi-asserted-by":"crossref","unstructured":"Luu ST, Nguyen HP, Van\u00a0Nguyen K, Nguyen NL-T (2020) Comparison between traditional machine learning models and neural network models for Vietnamese hate speech detection. In: RIVF international conference on computing and communication technologies (RIVF). IEEE, pp 1\u20136","DOI":"10.1109\/RIVF48685.2020.9140745"},{"key":"7745_CR23","unstructured":"Nguyen TB, Nguyen QM, Nguyen TH, Pham NP, Nguyen TL, Do QT (2019) Vais hate speech detection system: a deep learning based approach for system combination. arXiv preprint. arXiv:1910.05608"},{"key":"7745_CR24","first-page":"3","volume":"5","author":"D Van Thin","year":"2019","unstructured":"Van Thin D, Le LS, Nguyen NL-T (2019) Nlp@ uit: Exploring feature engineer and ensemble model for hate speech detection at vlsp 2019. Training 5:3\u201351","journal-title":"Training"},{"key":"7745_CR25","doi-asserted-by":"crossref","unstructured":"Martins R, Gomes M, Almeida JJ, Novais P, Henriques P (2018) Hate speech classification in social media using emotional analysis. In: 7th Brazilian conference on intelligent systems (BRACIS). IEEE, pp 61\u201366","DOI":"10.1109\/BRACIS.2018.00019"},{"key":"7745_CR26","doi-asserted-by":"publisher","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2019) BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, vol 1 (Long and Short Papers). Association for Computational Linguistics, Minneapolis, pp 4171\u20134186. https:\/\/doi.org\/10.18653\/v1\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"7745_CR27","unstructured":"Liu Y, Ott M, Goyal N, Du J, Joshi M, Chen D, Levy O, Lewis M, Zettlemoyer L, Stoyanov V (2019) Roberta: a robustly optimized bert pretraining approach. arXiv preprint. arXiv:1907.11692"},{"key":"7745_CR28","doi-asserted-by":"publisher","unstructured":"Conneau A, Khandelwal K, Goyal N, Chaudhary V, Wenzek G, Guzm\u00e1n F, Grave E, Ott M, Zettlemoyer L, Stoyanov V (2020) Unsupervised cross-lingual representation learning at scale. In: Proceedings of the 58th annual meeting of the association for computational linguistics. Association for Computational Linguistics, pp 8440\u20138451 (Online). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.747","DOI":"10.18653\/v1\/2020.acl-main.747"},{"key":"7745_CR29","doi-asserted-by":"crossref","unstructured":"Safaya A, Abdullatif M, Yuret D (2020) Kuisail at semeval-2020 task 12: Bert-cnn for offensive speech identification in social media. In: Proceedings of the fourteenth workshop on semantic evaluation, pp 2054\u20132059","DOI":"10.18653\/v1\/2020.semeval-1.271"},{"key":"7745_CR30","doi-asserted-by":"crossref","unstructured":"Liu Y, Liu H, Wong L-P, Lee L-K, Zhang H, Hao T (2020) A hybrid neural network rbert-c based on pre-trained roberta and cnn for user intent classification. In: International conference on neural computing for advanced applications. Springer, pp 306\u2013319","DOI":"10.1007\/978-981-15-7670-6_26"},{"key":"7745_CR31","unstructured":"Saha D, Paharia N, Chakraborty D, Saha P, Mukherjee A (2021) Hate-alert@DravidianLangTech-EACL2021: ensembling strategies for transformer-based offensive language detection. In: Proceedings of the first workshop on speech and language technologies for Dravidian languages. Association for Computational Linguistics, Kyiv, pp 270\u2013276"},{"issue":"8","key":"7745_CR32","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"issue":"11","key":"7745_CR33","doi-asserted-by":"publisher","first-page":"2673","DOI":"10.1109\/78.650093","volume":"45","author":"M Schuster","year":"1997","unstructured":"Schuster M, Paliwal KK (1997) Bidirectional recurrent neural networks. IEEE Trans Signal Process 45(11):2673\u20132681","journal-title":"IEEE Trans Signal Process"},{"key":"7745_CR34","unstructured":"Chung J, Gulcehre C, Cho K, Bengio Y (2014) Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint. arXiv:1412.3555"},{"key":"7745_CR35","doi-asserted-by":"crossref","unstructured":"He C, Chen S, Huang S, Zhang J, Song X (2019) Using convolutional neural network with bert for intent determination. In: International conference on Asian language processing (IALP). IEEE, pp 65\u201370","DOI":"10.1109\/IALP48816.2019.9037668"},{"key":"7745_CR36","doi-asserted-by":"publisher","unstructured":"Kim Y (2014) Convolutional neural networks for sentence classification. In: Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP). Association for Computational Linguistics, Doha, pp 1746\u20131751. https:\/\/doi.org\/10.3115\/v1\/D14-1181","DOI":"10.3115\/v1\/D14-1181"},{"key":"7745_CR37","doi-asserted-by":"publisher","unstructured":"Nguyen DQ, Tuan\u00a0Nguyen A (2020) PhoBERT: pre-trained language models for Vietnamese. In: Findings of the association for computational linguistics: EMNLP 2020. Association for Computational Linguistics, pp 1037\u20131042 (Online). https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.92","DOI":"10.18653\/v1\/2020.findings-emnlp.92"},{"issue":"5","key":"7745_CR38","doi-asserted-by":"publisher","first-page":"1425","DOI":"10.1007\/s00521-018-3476-3","volume":"31","author":"SM Nagarajan","year":"2019","unstructured":"Nagarajan SM, Gandhi UD (2019) Classifying streaming of twitter data based on sentiment analysis using hybridization. Neural Comput Appl 31(5):1425\u20131433","journal-title":"Neural Comput Appl"},{"issue":"4","key":"7745_CR39","doi-asserted-by":"publisher","first-page":"1411","DOI":"10.11591\/eei.v9i4.1897","volume":"9","author":"ND Zaki","year":"2020","unstructured":"Zaki ND, Hashim NY, Mohialden YM, Mohammed MA, Sutikno T, Ali AH (2020) A real-time big data sentiment analysis for iraqi tweets using spark streaming. Bull Electric Eng Inform 9(4):1411\u20131419","journal-title":"Bull Electric Eng Inform"},{"issue":"2","key":"7745_CR40","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1002\/poi3.85","volume":"7","author":"P Burnap","year":"2015","unstructured":"Burnap P, Williams ML (2015) Cyber hate speech on twitter: an application of machine classification and statistical modeling for policy and decision making. Policy Internet 7(2):223\u2013242","journal-title":"Policy Internet"},{"key":"7745_CR41","doi-asserted-by":"crossref","unstructured":"Anagnostou A, Mollas I, Tsoumakas, G (2018) Hatebusters: a web application for actively reporting youtube hate speech. In: IJCAI, pp 5796\u20135798","DOI":"10.24963\/ijcai.2018\/841"},{"key":"7745_CR42","doi-asserted-by":"crossref","unstructured":"Bird S (2006) Nltk: the natural language toolkit. In: Proceedings of the COLING\/ACL 2006 interactive presentation sessions, pp 69\u201372","DOI":"10.3115\/1225403.1225421"},{"key":"7745_CR43","unstructured":"Le V-D (2017) Stopwords: Vietnamese. GitHub"},{"key":"7745_CR44","unstructured":"Luu S, Nguyen K, Nguyen N (2020) Empirical study of text augmentation on social media text in Vietnamese. In: Proceedings of the 34th Pacific Asia conference on language, information and computation. Association for Computational Linguistics, Hanoi, pp 462\u2013470"},{"issue":"5","key":"7745_CR45","doi-asserted-by":"publisher","first-page":"429","DOI":"10.3233\/IDA-2002-6504","volume":"6","author":"N Japkowicz","year":"2002","unstructured":"Japkowicz N, Stephen S (2002) The class imbalance problem: a systematic study. Intell Data Anal 6(5):429\u2013449","journal-title":"Intell Data Anal"},{"key":"7745_CR46","doi-asserted-by":"publisher","unstructured":"Wei J, Zou K (2019) EDA: easy data augmentation techniques for boosting performance on text classification tasks. In: Proceedings of the 2019 conference on empirical methods in natural language processing and the 9th international joint conference on natural language processing (EMNLP-IJCNLP). Association for Computational Linguistics, Hong Kong, pp 6382\u20136388. https:\/\/doi.org\/10.18653\/v1\/D19-1670","DOI":"10.18653\/v1\/D19-1670"},{"key":"7745_CR47","doi-asserted-by":"crossref","unstructured":"Pham-Hong B-T, Chokshi S (2020) PGSG at SemEval-2020 task 12: BERT-LSTM with tweets\u2019 pretrained model and noisy student training method. In: Proceedings of the fourteenth workshop on semantic evaluation, pp 2111\u20132116","DOI":"10.18653\/v1\/2020.semeval-1.280"},{"key":"7745_CR48","doi-asserted-by":"publisher","unstructured":"Li X, Bing L, Zhang W, Lam W (2019) Exploiting BERT for end-to-end aspect-based sentiment analysis. In: Proceedings of the 5th workshop on noisy user-generated text (W-NUT 2019). Association for Computational Linguistics, Hong Kong, pp 34\u201341. https:\/\/doi.org\/10.18653\/v1\/D19-5505","DOI":"10.18653\/v1\/D19-5505"},{"key":"7745_CR49","doi-asserted-by":"crossref","unstructured":"Yi R, Hu W (2019) Pre-trained BERT-GRU model for relation extraction. In: Proceedings of the 2019 8th international conference on computing and pattern recognition, pp 453\u2013457","DOI":"10.1145\/3373509.3373533"},{"issue":"11","key":"7745_CR50","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1145\/2934664","volume":"59","author":"M Zaharia","year":"2016","unstructured":"Zaharia M, Xin RS, Wendell P, Das T, Armbrust M, Dave A, Meng X, Rosen J, Venkataraman S, Franklin MJ et al (2016) Apache spark: a unified engine for big data processing. Commun ACM 59(11):56\u201365","journal-title":"Commun ACM"},{"key":"7745_CR51","unstructured":"Rish I et al (2001) An empirical study of the naive Bayes classifier. In: IJCAI 2001 Workshop on empirical methods in artificial intelligence, vol 3, pp 41\u201346"},{"key":"7745_CR52","doi-asserted-by":"crossref","unstructured":"Kim S-B, Rim H-C, Yook D, Lim H-S (2002) Effective methods for improving naive Bayes text classifiers. In: Pacific rim international conference on artificial intelligence. Springer, pp 414\u2013423","DOI":"10.1007\/3-540-45683-X_45"},{"key":"7745_CR53","doi-asserted-by":"crossref","unstructured":"Liu S, Forss T (2014) Combining N-gram based similarity analysis with sentiment analysis in web content classification. In: KDIR, pp 530\u2013537","DOI":"10.5220\/0005170305300537"},{"issue":"3","key":"7745_CR54","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1198\/004017007000000245","volume":"49","author":"A Genkin","year":"2007","unstructured":"Genkin A, Lewis DD, Madigan D (2007) Large-scale Bayesian logistic regression for text categorization. Technometrics 49(3):291\u2013304","journal-title":"Technometrics"},{"key":"7745_CR55","doi-asserted-by":"publisher","DOI":"10.1002\/9781118548387","volume-title":"Applied logistic regression","author":"DW Hosmer Jr","year":"2013","unstructured":"Hosmer DW Jr, Lemeshow S, Sturdivant RX (2013) Applied logistic regression, vol 398. Wiley, Hoboken"},{"issue":"2","key":"7745_CR56","doi-asserted-by":"publisher","first-page":"221","DOI":"10.22364\/bjmc.2017.5.2.05","volume":"5","author":"T Pranckevi\u010dius","year":"2017","unstructured":"Pranckevi\u010dius T, Marcinkevi\u010dius V (2017) Comparison of naive Bayes, random forest, decision tree, support vector machines, and logistic regression classifiers for text reviews classification. Baltic J Mod Comput 5(2):221","journal-title":"Baltic J Mod Comput"},{"issue":"8","key":"7745_CR57","first-page":"966","volume":"4","author":"M Ikonomakis","year":"2005","unstructured":"Ikonomakis M, Kotsiantis S, Tampakas V (2005) Text classification using machine learning techniques. WSEAS Trans Comput 4(8):966\u2013974","journal-title":"WSEAS Trans Comput"},{"key":"7745_CR58","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1140\/epjds\/s13688-016-0072-6","volume":"5","author":"P Burnap","year":"2016","unstructured":"Burnap P, Williams ML (2016) Us and them: identifying cyber hate on twitter across multiple protected characteristics. EPJ Data Sci 5:1\u201315","journal-title":"EPJ Data Sci"},{"issue":"3","key":"7745_CR59","first-page":"18","volume":"2","author":"A Liaw","year":"2002","unstructured":"Liaw A, Wiener M et al (2002) Classification and regression by randomforest. R news 2(3):18\u201322","journal-title":"R news"},{"key":"7745_CR60","doi-asserted-by":"crossref","unstructured":"Islam MZ, Liu J, Li J, Liu L, Kang W (2019) A semantics aware random forest for text classification. In: Proceedings of the 28th ACM international conference on information and knowledge management, pp 1061\u20131070","DOI":"10.1145\/3357384.3357891"},{"key":"7745_CR61","doi-asserted-by":"crossref","unstructured":"Badjatiya P, Gupta S, Gupta M, Varma V (2017) Deep learning for hate speech detection in tweets. In: Proceedings of the 26th international conference on world wide web companion, pp 759\u2013760","DOI":"10.1145\/3041021.3054223"},{"key":"7745_CR62","doi-asserted-by":"publisher","DOI":"10.1201\/9781420049176","volume-title":"Recurrent neural networks: design and applications","author":"L Medsker","year":"1999","unstructured":"Medsker L, Jain LC (1999) Recurrent neural networks: design and applications. CRC Press, Boca Raton"},{"key":"7745_CR63","doi-asserted-by":"publisher","unstructured":"Tenney I, Das D, Pavlick E (2019) BERT rediscovers the classical NLP pipeline. In: Proceedings of the 57th annual meeting of the association for computational linguistics. Association for Computational Linguistics, Florence, pp 4593\u20134601. https:\/\/doi.org\/10.18653\/v1\/P19-1452","DOI":"10.18653\/v1\/P19-1452"},{"key":"7745_CR64","unstructured":"Michel P, Levy O, Neubig G (2019) Are sixteen heads really better than one? In: Wallach H, Larochelle H, Beygelzimer A, d\u2019 Alch\u00e9-Buc F, Fox E, Garnett R (eds) Advances in neural information processing systems, vol 32. Curran Associates, Inc., Red Hook"},{"key":"7745_CR65","doi-asserted-by":"publisher","first-page":"842","DOI":"10.1162\/tacl_a_00349","volume":"8","author":"A Rogers","year":"2020","unstructured":"Rogers A, Kovaleva O, Rumshisky A (2020) A primer in bertology: what we know about how bert works. Trans Assoc Comput Linguist 8:842\u2013866","journal-title":"Trans Assoc Comput Linguist"},{"key":"7745_CR66","unstructured":"Sigurbergsson GI, Derczynski L (2019) Offensive language and hate speech detection for Danish. arXiv preprint. arXiv:1908.04531"},{"issue":"1","key":"7745_CR67","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12864-019-6413-7","volume":"21","author":"D Chicco","year":"2020","unstructured":"Chicco D, Jurman G (2020) The advantages of the Matthews correlation coefficient (MCC) over F1 score and accuracy in binary classification evaluation. BMC Genomics 21(1):1\u201313","journal-title":"BMC Genomics"},{"key":"7745_CR68","doi-asserted-by":"publisher","unstructured":"Vu\u00a0Xuan S, Vu T, Tran S, Jiang L (2019) ETNLP: a visual-aided systematic approach to select pre-trained embeddings for a downstream task. In: Proceedings of the international conference on recent advances in natural language processing (RANLP 2019). INCOMA Ltd., Varna, pp 1285\u20131294. https:\/\/doi.org\/10.26615\/978-954-452-056-4_147","DOI":"10.26615\/978-954-452-056-4_147"},{"key":"7745_CR69","doi-asserted-by":"crossref","unstructured":"Nguyen AT, Dao MH, Nguyen DQ (2020) A pilot study of text-to-SQL semantic parsing for Vietnamese. In: Findings of the association for computational linguistics: EMNLP 2020, pp 4079\u20134085","DOI":"10.18653\/v1\/2020.findings-emnlp.364"},{"key":"7745_CR70","unstructured":"Datareportal: Digital 2021: Vietnam (2021). https:\/\/datareportal.com\/reports\/digital-2021-vietnam"},{"issue":"1","key":"7745_CR71","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1177\/001316446002000104","volume":"20","author":"J Cohen","year":"1960","unstructured":"Cohen J (1960) A coefficient of agreement for nominal scales. Educ Psychol Meas 20(1):37\u201346","journal-title":"Educ Psychol Meas"},{"key":"7745_CR72","doi-asserted-by":"crossref","unstructured":"Mozafari M, Farahbakhsh R, Crespi N (2019) A bert-based transfer learning approach for hate speech detection in online social media. In: International conference on complex networks and their applications. Springer, pp 928\u2013940","DOI":"10.1007\/978-3-030-36687-2_77"},{"issue":"17","key":"7745_CR73","first-page":"14867","volume":"35","author":"B Mathew","year":"2021","unstructured":"Mathew B, Saha P, Yimam SM, Biemann C, Goyal P, Mukherjee A (2021) Hatexplain: a benchmark dataset for explainable hate speech detection. Proc AAAI Conf Artif Intell 35(17):14867\u201314875","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"7745_CR74","doi-asserted-by":"crossref","unstructured":"Pavlopoulos J, Sorensen J, Laugier L, Androutsopoulos I (2021) Semeval-2021 task 5: toxic spans detection. In: Proceedings of the 15th international workshop on semantic evaluation (SemEval-2021), pp 59\u201369","DOI":"10.18653\/v1\/2021.semeval-1.6"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-07745-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-022-07745-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-07745-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,7]],"date-time":"2023-01-07T06:11:22Z","timestamp":1673071882000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-022-07745-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9,17]]},"references-count":74,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,1]]}},"alternative-id":["7745"],"URL":"https:\/\/doi.org\/10.1007\/s00521-022-07745-w","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,9,17]]},"assertion":[{"value":"18 January 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 August 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 September 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}