{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,5]],"date-time":"2026-02-05T06:38:33Z","timestamp":1770273513647,"version":"3.49.0"},"reference-count":118,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Institute for Advanced Research Publication Grant of United International University","award":["IAR-2025-Pub-044"],"award-info":[{"award-number":["IAR-2025-Pub-044"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/access.2025.3574234","type":"journal-article","created":{"date-parts":[[2025,5,27]],"date-time":"2025-05-27T17:22:25Z","timestamp":1748366545000},"page":"112428-112445","source":"Crossref","is-referenced-by-count":1,"title":["An Analytical Review of Preprocessing Techniques in Bengali Natural Language Processing"],"prefix":"10.1109","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0765-1060","authenticated-orcid":false,"given":"Sovon","family":"Chakraborty","sequence":"first","affiliation":[{"name":"C2SG Research Group, United International University, Dhaka, Bangladesh"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4705-9229","authenticated-orcid":false,"given":"Protiva","family":"Das","sequence":"additional","affiliation":[{"name":"C2SG Research Group, United International University, Dhaka, Bangladesh"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2704-118X","authenticated-orcid":false,"given":"Shakib","family":"Mahmud Dipto","sequence":"additional","affiliation":[{"name":"C2SG Research Group, United International University, Dhaka, Bangladesh"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4049-3332","authenticated-orcid":false,"given":"Md. Aktaruzzaman","family":"Pramanik","sequence":"additional","affiliation":[{"name":"C2SG Research Group, United International University, Dhaka, Bangladesh"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9669-151X","authenticated-orcid":false,"given":"Jannatun","family":"Noor","sequence":"additional","affiliation":[{"name":"C2SG Research Group, United International University, Dhaka, Bangladesh"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-024-00956-z"},{"key":"ref2","first-page":"2227","article-title":"BengaliLCP: A dataset for lexical complexity prediction in the Bengali texts","volume-title":"Proc. Joint Int. Conf. Comput. Linguistics, Lang. Resources Eval. (LREC-COLING)","author":"Ayman"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.3390\/su14137598"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3690634"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.arabicnlp-1.101"},{"key":"ref6","first-page":"4542","article-title":"Deep learning based named entity recognition models for recipes","volume-title":"Proc. Joint Int. Conf. Comput. Linguistics, Lang. Resources Eval. (LREC-COLING)","author":"Goel"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijcce.2023.01.001"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-44689-5_10"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-16-7076-3_19"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1142\/S0218213020500141"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1515\/lingvan-2021-0054"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2024.02.162"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.115394"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-024-52261-7"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3197662"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICICT57646.2023.10134311"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.3115\/1626516.1626525"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-48309-7_34"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s10772-024-10124-6"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.59400\/fls.v6i2.1188"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.aacl-main.4"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1201\/9781003244332-5"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCIT57492.2022.10055290"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1515\/jisys-2020-0060"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3134154"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/COMNETSAT59769.2023.10420673"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.11591\/ijai.v13.i1.pp358-367"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.3390\/app13010522"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.9734\/ajrcos\/2023\/v16i4398"},{"key":"ref30","article-title":"BanFakeNews: A dataset for detecting fake news in Bangla","author":"Zobaer Hossain","year":"2020","journal-title":"arXiv:2004.08789"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3511601"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0287818"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CIET.2018.8660795"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.35444\/IJANA.2021.12611"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/STI47673.2019.9068063"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/IMCOM53663.2022.9721634"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.128255"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.nlp.2023.100017"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.3390\/app14031078"},{"key":"ref40","first-page":"703","article-title":"Parts of speech (PoS) and universal parts of speech (UPoS) tagging: A critical review with special reference to low resource languages","volume-title":"Proc. 20th Int. Conf. Natural Lang. Process. (ICON)","author":"Kuwali"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1002\/tesj.690"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159725"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1201\/9781003256083-1"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3572783"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-76776-1_10"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.1981"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0273156"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-10083-3"},{"key":"ref49","volume-title":"Word Formation In Bengali: A Whole Word Morphological Description And Its Theoretical Implications","author":"Bhattacharja","year":"2007"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCNT49239.2020.9225565"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/s12652-022-04471-y"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.3758\/s13428-016-0704-6"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.3390\/bs10100155"},{"key":"ref54","article-title":"BNLP: Natural language processing toolkit for Bengali language","author":"Sarker","year":"2021","journal-title":"arXiv:2102.00405"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1145\/3377049.3377078"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.11591\/ijeecs.v23.i1.pp463-470"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00171"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICCIT60459.2023.10441631"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2021.05.096"},{"key":"ref60","first-page":"190","article-title":"Transformers at HSD-2Lang 2024: Hate speech detection in Arabic and Turkish tweets using BERT based architectures","volume-title":"Proc. 7th Workshop Challenges Appl. Automated Extraction Socio-political Events from Text (CASE)","author":"Singhal"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3165563"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.14419\/ijet.v10i2.31538"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ICBSLP.2018.8554497"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-68154-8_93"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/EICT48899.2019.9068797"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/EICT48899.2019.9068834"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.26615\/978-954-452-072-4_007"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/s13369-020-04388-8"},{"key":"ref69","article-title":"Fake news detection by fine tuning of bidirectional encoder representations from transformers","author":"Sathvik","year":"2023","journal-title":"TechRxiv"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.57197\/JDR-2024-0032"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.3390\/s22114157"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3195236"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-33-4367-2_4"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/ICBSLP47725.2019.201533"},{"key":"ref75","first-page":"400","article-title":"A rule based lightweight Bengali Stemmer","volume-title":"Proc. 17th Int. Conf. Natural Lang. Process. (ICON)","author":"Das"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-33-4367-2_30"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCNT49239.2020.9225358"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.2991\/nlpr.d.210316.001"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1145\/3510419"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.3390\/app12062848"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/ICBSLP.2018.8554679"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2015.7364114"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-52856-0_35"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1016\/j.tele.2020.101345"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.3390\/s20071884"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/ICBSLP.2018.8554681"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/ICCITECHN.2016.7860236"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.5121\/ijnlc.2019.8501"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/ICIEVicIVPR48672.2020.9306652"},{"key":"ref90","first-page":"20","article-title":"Anwesha: A tool for semantic search in Bangla","volume-title":"Proc. ALTNLP","author":"Das"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.banglalp-1.42"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1007\/s42979-022-01028-w"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/EICT61409.2023.10427766"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1111\/1468-2427.00309"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.38032\/jea.2021.03.001"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3119627"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2024.101703"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/ICCITECHN.2016.7860228"},{"key":"ref99","article-title":"A comparative analysis of noise reduction methods in sentiment analysis on noisy Bangla texts","author":"Toufique Elahi","year":"2024","journal-title":"arXiv:2401.14360"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN54540.2023.10191392"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.4236\/jsip.2014.52006"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1186\/s13634-020-00707-1"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.21437\/IberSPEECH.2024-20"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/DASA54658.2022.9765043"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/TNSRE.2020.2980223"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2013.04.003"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.2196\/46105"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-022-34326-1"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-53025-8_17"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2016.02.011"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.3390\/rs16122222"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.3390\/diagnostics14020137"},{"key":"ref113","article-title":"Large language models meet NLP: A survey","author":"Qin","year":"2024","journal-title":"arXiv:2405.12819"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-16665-3"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1109\/RTEICT52294.2021.9573847"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2019.103301"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.116648"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.633"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10820123\/11016021.pdf?arnumber=11016021","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,5]],"date-time":"2025-07-05T04:32:47Z","timestamp":1751689967000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11016021\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":118,"URL":"https:\/\/doi.org\/10.1109\/access.2025.3574234","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}