{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T22:32:41Z","timestamp":1773268361962,"version":"3.50.1"},"reference-count":48,"publisher":"Ubiquity Press, Ltd.","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,23]]},"DOI":"10.5334\/dsj-2024-031","type":"journal-article","created":{"date-parts":[[2024,5,23]],"date-time":"2024-05-23T06:13:30Z","timestamp":1716444810000},"source":"Crossref","is-referenced-by-count":12,"title":["The Optimization of n-Gram Feature Extraction Based on Term Occurrence for Cyberbullying Classification"],"prefix":"10.5334","volume":"23","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8660-8508","authenticated-orcid":false,"given":"Yudi","family":"Setiawan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6624-4153","authenticated-orcid":false,"given":"Nur","family":"Ulfa Maulidevi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1705-1202","authenticated-orcid":false,"given":"Kridanto","family":"Surendro","sequence":"additional","affiliation":[]}],"member":"3285","reference":[{"key":"key20240523061132_B1","doi-asserted-by":"crossref","first-page":"60403","DOI":"10.1109\/ACCESS.2018.2875135","article-title":"Key concept identification: A sentence parse tree-based technique for candidate feature extraction from unstructured texts","volume":"6","year":"2018","journal-title":"IEEE Access"},{"key":"key20240523061132_B2","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1016\/j.csl.2019.01.005","article-title":"Unsupervised sentence representations as word information series: Revisiting TF\u2013IDF","volume":"56","year":"2019","journal-title":"Computer Speech & Language"},{"key":"key20240523061132_B3","article-title":"Machine learning algorithms for social media analysis: A survey","volume":"40","year":"2021","journal-title":"Computer Science Review"},{"issue":"2","key":"key20240523061132_B4","first-page":"15","article-title":"The relationship between cyberbullying and school bullying","volume":"1","year":"2007","journal-title":"Journal of Student Wellbeing"},{"issue":"2","key":"key20240523061132_B5","article-title":"Cyberbullying on social networking sites: A literature review and future research directions","volume":"58","year":"2021","journal-title":"Information & Management"},{"issue":"6","key":"key20240523061132_B6","doi-asserted-by":"crossref","first-page":"901","DOI":"10.1109\/TNN.2009.2014161","article-title":"Probabilistic classification vector machines","volume":"20","year":"2009","journal-title":"IEEE Transactions on Neural Networks"},{"key":"key20240523061132_B7","doi-asserted-by":"crossref","first-page":"245","DOI":"10.1016\/j.eswa.2016.09.009","article-title":"Turning from TF-IDF to TF-IGM for term weighting in text classification","volume":"66","year":"2016","journal-title":"Expert Systems with Applications"},{"issue":"4","key":"key20240523061132_B8","article-title":"Machine learning and feature engineering-based study into sarcasm and irony classification with application to cyberbullying detection","volume":"58","year":"2021","journal-title":"Information Processing & Management"},{"issue":"4","key":"key20240523061132_B9","article-title":"Machine learning and feature engineering-based study into sarcasm and irony classification with application to cyberbullying detection","volume":"58","year":"2021","journal-title":"Information Processing & Management"},{"key":"key20240523061132_B10","doi-asserted-by":"crossref","first-page":"345","DOI":"10.1016\/j.neunet.2020.05.011","article-title":"Progressive learning: A deep learning framework for continual learning","volume":"128","year":"2020","journal-title":"Neural Networks"},{"key":"key20240523061132_B11","article-title":"Analysis of sentiment in tweets addressed to a single domain-specific Twitter account: Comparison of model performance and explainability of predictions","volume":"186","year":"2021","journal-title":"Expert Systems with Applications"},{"key":"key20240523061132_B12","article-title":"NLP-assisted software testing: A systematic mapping of the literature","volume":"126","year":"2020","journal-title":"Information and Software Technology"},{"key":"key20240523061132_B13","article-title":"NLP-assisted software testing: A systematic mapping of the literature","volume":"126","year":"2020","journal-title":"Information and Software Technology"},{"key":"key20240523061132_B14","doi-asserted-by":"crossref","first-page":"558","DOI":"10.1016\/j.procs.2021.01.040","article-title":"Extractive hotel review summarization based on tf\/idf and adjective-noun pairing by considering annual sentiment trends","volume":"179","year":"2021","journal-title":"Procedia Computer Science"},{"key":"key20240523061132_B15","first-page":"1","article-title":"Sentiment Analysis with NLP on Twitter Data","year":"2019"},{"key":"key20240523061132_B16","first-page":"31","article-title":"Comparing SVM and na\u00efve Bayes classifiers for text categorization with Wikitology as knowledge enrichment","year":"2011"},{"issue":"3","key":"key20240523061132_B17","doi-asserted-by":"crossref","first-page":"206","DOI":"10.1080\/13811118.2010.494133","article-title":"Bullying, cyberbullying, and suicide","volume":"14","year":"2010","journal-title":"Archives of Suicide Research: Official Journal of the International Academy for Suicide Research"},{"key":"key20240523061132_B18","first-page":"33","article-title":"A corpus preprocessing method for syllable-level tibetan text classification","year":"2021"},{"issue":"1","key":"key20240523061132_B19","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1027\/1864-1105.21.1.25","article-title":"Cyberbullying: Who are the victims? A comparison of victimization in internet chatrooms and victimization in school","volume":"21","year":"2009","journal-title":"Journal of Media Psychology: Theories, Methods, and Applications"},{"issue":"4","key":"key20240523061132_B20","doi-asserted-by":"crossref","first-page":"1073","DOI":"10.1037\/a0035618","article-title":"Bullying in the digital age: A critical review and meta-analysis of cyberbullying research among youth","volume":"140","year":"2014","journal-title":"Psychological Bulletin"},{"issue":"4","key":"key20240523061132_B21","doi-asserted-by":"crossref","first-page":"1777","DOI":"10.1016\/j.chb.2005.10.005","article-title":"New bottle but old wine: A research of cyberbullying in schools","volume":"23","year":"2007","journal-title":"Computers in Human Behavior"},{"issue":"3","key":"key20240523061132_B22","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1080\/00131880802309333","article-title":"A cross-cultural comparison of adolescents\u2019 experience related to cyberbullying","volume":"50","year":"2008","journal-title":"Educational Research"},{"issue":"7","key":"key20240523061132_B23","doi-asserted-by":"crossref","first-page":"1602","DOI":"10.1109\/TKDE.2013.108","article-title":"An efficient approach for outlier detection with imperfect data labels","volume":"26","year":"2014","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"key20240523061132_B24","doi-asserted-by":"crossref","first-page":"205","DOI":"10.1016\/j.engappai.2017.11.007","article-title":"Multiple kernel approach to semi-supervised fuzzy clustering algorithm for land-cover classification","volume":"68","year":"2018","journal-title":"Engineering Applications of Artificial Intelligence"},{"issue":"2","key":"key20240523061132_B25","doi-asserted-by":"crossref","first-page":"178","DOI":"10.1109\/TKDE.2008.131","article-title":"Decompositional rule extraction from support vector machines by active learning","volume":"21","year":"2009","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"key20240523061132_B26","article-title":"Sentiment analysis using TF\u2013IDF weighting of UK MPs\u2019 tweets on Brexit","volume":"228","year":"2021","journal-title":"Knowledge-Based Systems"},{"issue":"3","key":"key20240523061132_B27","doi-asserted-by":"crossref","first-page":"362","DOI":"10.1111\/j.1939-0025.2010.01040.x","article-title":"Cyber bullying behaviors among middle and high school students","volume":"80","year":"2010","journal-title":"The American Journal of Orthopsychiatry"},{"key":"key20240523061132_B28","volume-title":"Machine Learning","year":"1997"},{"key":"key20240523061132_B29","first-page":"641","volume-title":"Intelligent Systems 2014","year":"2015"},{"key":"key20240523061132_B30","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1016\/j.cose.2018.02.016","article-title":"Machine learning and semantic analysis of in-game chat for cyberbullying","volume":"76","year":"2018","journal-title":"Computers & Security"},{"key":"key20240523061132_B31","article-title":"N-Gram based language processing using Twitter dataset to identify COVID-19 patients","volume":"72","year":"2021","journal-title":"Sustainable Cities and Society"},{"key":"key20240523061132_B32","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1016\/B978-0-12-819043-2.00003-4","volume-title":"Innovation in Health Informatics","year":"2020"},{"key":"key20240523061132_B33","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1016\/B978-0-12-815917-0.00003-4","volume-title":"The Dark Side of Social Media","year":"2019"},{"key":"key20240523061132_B34","first-page":"88","article-title":"Review on candidate feature extraction and categorization for unstructured text document","year":"2020"},{"issue":"2","key":"key20240523061132_B35","doi-asserted-by":"crossref","first-page":"205","DOI":"10.1109\/TETC.2015.2418716","article-title":"Wikipedia-Based semantic similarity measurements for noisy short texts using extended naive bayes","volume":"3","year":"2015","journal-title":"IEEE Transactions on Emerging Topics in Computing"},{"issue":"5","key":"key20240523061132_B36","doi-asserted-by":"crossref","first-page":"40","DOI":"10.1016\/S1005-8885(16)60056-0","article-title":"Mining microblog user interests based on TextRank with TF-IDF factor","volume":"23","year":"2016","journal-title":"The Journal of China Universities of Posts and Telecommunications"},{"key":"key20240523061132_B37","first-page":"169","article-title":"A text preprocessing framework for text mining on big data infrastructure","year":"2018"},{"key":"key20240523061132_B38","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1016\/B978-0-12-821379-7.00002-3","volume-title":"Practical Machine Learning for Data Analysis Using Python","year":"2020"},{"issue":"4","key":"key20240523061132_B39","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1016\/S0306-4573(01)00045-0","article-title":"The use of bigrams to enhance text categorization","volume":"38","year":"2002","journal-title":"Information Processing & Management"},{"key":"key20240523061132_B40","doi-asserted-by":"crossref","first-page":"13624","DOI":"10.1109\/ACCESS.2018.2810198","article-title":"An improved intrusion detection algorithm based on GA and SVM","volume":"6","year":"2018","journal-title":"IEEE Access"},{"key":"key20240523061132_B41","doi-asserted-by":"crossref","first-page":"35208","DOI":"10.1109\/ACCESS.2019.2904602","article-title":"Composite feature extraction and selection for text classification","volume":"7","year":"2019","journal-title":"IEEE Access"},{"key":"key20240523061132_B42","doi-asserted-by":"crossref","first-page":"12","DOI":"10.1016\/j.jbi.2018.09.008","article-title":"A comparison of word embeddings for the biomedical natural language processing","volume":"87","year":"2018","journal-title":"Journal of Biomedical Informatics"},{"key":"key20240523061132_B43","doi-asserted-by":"crossref","first-page":"138162","DOI":"10.1109\/ACCESS.2020.3012595","article-title":"COVID-19 Sensing: Negative sentiment analysis on social media in china via bert model","volume":"8","year":"2020","journal-title":"IEEE Access"},{"issue":"10","key":"key20240523061132_B44","doi-asserted-by":"crossref","first-page":"2357","DOI":"10.1109\/TNNLS.2014.2382123","article-title":"Linear regression-based efficient SVM learning for large-scale classification","volume":"26","year":"2015","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"issue":"6","key":"key20240523061132_B45","doi-asserted-by":"crossref","first-page":"886","DOI":"10.26599\/TST.2020.9010051","article-title":"News keyword extraction algorithm based on semantic clustering and word graph model","volume":"26","year":"2021","journal-title":"Tsinghua Science and Technology"},{"key":"key20240523061132_B46","doi-asserted-by":"crossref","first-page":"51377","DOI":"10.1109\/ACCESS.2020.2973331","article-title":"Correlation-Based Weight Adjusted Naive Bayes","volume":"8","year":"2020","journal-title":"IEEE Access"},{"issue":"11","key":"key20240523061132_B47","doi-asserted-by":"crossref","first-page":"3212","DOI":"10.1109\/TNNLS.2018.2876865","article-title":"Object detection with deep learning: A review","volume":"30","year":"2019","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"key20240523061132_B48","article-title":"Antisocial online behavior detection using deep learning","volume":"138","year":"2020","journal-title":"Decision Support Systems"}],"container-title":["Data Science Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/storage.googleapis.com\/jnl-up-j-dsj-files\/journals\/1\/articles\/1591\/664f14f991b01.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T08:02:23Z","timestamp":1761552143000},"score":1,"resource":{"primary":{"URL":"https:\/\/datascience.codata.org\/articles\/10.5334\/dsj-2024-031\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":48,"alternative-id":["10.5334\/dsj-2024-031"],"URL":"https:\/\/doi.org\/10.5334\/dsj-2024-031","relation":{},"ISSN":["1683-1470"],"issn-type":[{"value":"1683-1470","type":"print"}],"subject":[],"published":{"date-parts":[[2024]]},"article-number":"31"}}