{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T04:14:32Z","timestamp":1743394472178,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031850660","type":"print"},{"value":"9783031850677","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-85067-7_12","type":"book-chapter","created":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T08:11:11Z","timestamp":1743322271000},"page":"122-132","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Text Categorization Can Enhance Domain-Agnostic Stopword Extraction"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3492-2014","authenticated-orcid":false,"given":"Houcemeddine","family":"Turki","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7772-1103","authenticated-orcid":false,"given":"Naome A.","family":"Etori","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2786-8913","authenticated-orcid":false,"given":"Mohamed Ali","family":"Hadj Taieb","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4558-0356","authenticated-orcid":false,"given":"Abdul-Hakeem","family":"Omotayo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3533-6829","authenticated-orcid":false,"given":"Chris Chinenye","family":"Emezue","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2277-5814","authenticated-orcid":false,"given":"Mohamed","family":"Ben Aouicha","sequence":"additional","affiliation":[]},{"given":"Ayodele","family":"Awokoya","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3310-0326","authenticated-orcid":false,"given":"Falalu Ibrahim","family":"Lawan","sequence":"additional","affiliation":[]},{"given":"Doreen","family":"Nixdorf","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,3,28]]},"reference":[{"key":"12_CR1","doi-asserted-by":"publisher","unstructured":"Abdi, A.A.: Oral societies and colonial experiences: Sub-Saharan Africa and the de-facto power of the written word. In: Education, Decolonization and Development, pp. 39\u201356. BRILL (2009). https:\/\/doi.org\/10.1163\/9789087909260_004","DOI":"10.1163\/9789087909260_004"},{"key":"12_CR2","doi-asserted-by":"publisher","unstructured":"Adelani, D.I., Masiak, M., Azime, I.A., et\u00a0al.: MasakhaNEWS: News Topic Classification for African languages (2023). https:\/\/doi.org\/10.48550\/ARXIV.2304.09972","DOI":"10.48550\/ARXIV.2304.09972"},{"key":"12_CR3","unstructured":"Asubiaro, T.V.: Entropy-based generic stopwords list for Yoruba texts. Int. J. Comput. Inf. Technol. 2(5) (2013). https:\/\/www.academia.edu\/9153147\/Entropy_Based_Generic_Stopwords_List_for_Yoruba_Texts"},{"issue":"1","key":"12_CR4","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1080\/10228195.2013.857362","volume":"45","author":"O Babarinde","year":"2014","unstructured":"Babarinde, O.: Linguistic analysis of the structure of yoruba numerals. Lang. Matters 45(1), 127\u2013147 (2014). https:\/\/doi.org\/10.1080\/10228195.2013.857362","journal-title":"Lang. Matters"},{"key":"12_CR5","doi-asserted-by":"publisher","unstructured":"Bamba Dione, C.M., Adelani, D.I., Nabende, P., et al.: MasakhaPOS: part-of-speech tagging for typologically diverse African languages. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 10883\u201310900. Association for Computational Linguistics (2023). https:\/\/doi.org\/10.18653\/v1\/2023.acl-long.609","DOI":"10.18653\/v1\/2023.acl-long.609"},{"key":"12_CR6","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"180","DOI":"10.1007\/978-981-10-2777-2_16","volume-title":"Soft Computing in Data Science","author":"K Chekima","year":"2016","unstructured":"Chekima, K., Alfred, R.: An automatic construction of malay stop words based on aggregation method. In: Berry, M.W., Hj. Mohamed, A., Yap, B.W. (eds.) SCDS 2016. CCIS, vol. 652, pp. 180\u2013189. Springer, Singapore (2016). https:\/\/doi.org\/10.1007\/978-981-10-2777-2_16"},{"key":"12_CR7","doi-asserted-by":"publisher","unstructured":"Dolamic, L., Savoy, J.: When stopword lists make the difference. J. Am. Soc. Inf. Sci. Technol. 61(1), 200\u2013203 (2009). https:\/\/doi.org\/10.1002\/asi.21186","DOI":"10.1002\/asi.21186"},{"key":"12_CR8","doi-asserted-by":"publisher","unstructured":"Emezue, C., Nigatu, H., Thinwa, C., et\u00a0al.: The African Stopwords project: curating stopwords for African languages (2023). https:\/\/doi.org\/10.48550\/ARXIV.2304.12155","DOI":"10.48550\/ARXIV.2304.12155"},{"key":"12_CR9","unstructured":"Emezue, C., et al.: The african stopwords project: curating stopwords for african languages. arXiv preprint arXiv:2304.12155 (2023)"},{"key":"12_CR10","doi-asserted-by":"publisher","unstructured":"Ferilli, S., Esposito, F., Grieco, D.: Automatic learning of linguistic resources for stopword removal and stemming from text. Procedia Comput. Sci. 38, 116\u2013123 (2014). https:\/\/doi.org\/10.1016\/j.procs.2014.10.019","DOI":"10.1016\/j.procs.2014.10.019"},{"key":"12_CR11","series-title":"Advances in Intelligent Systems and Computing","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1007\/978-981-10-8228-3_5","volume-title":"Proceedings of the Second International Conference on Computational Intelligence and Informatics","author":"BR Ganesh","year":"2018","unstructured":"Ganesh, B.R., Gupta, D., Sasikala, T.: Grammar error detection tool for medical transcription using stop words parts-of-speech tags ngram based model. In: Bhateja, V., Tavares, J.M.R.S., Rani, B.P., Prasad, V.K., Raju, K.S. (eds.) Proceedings of the Second International Conference on Computational Intelligence and Informatics. AISC, vol. 712, pp. 37\u201349. Springer, Singapore (2018). https:\/\/doi.org\/10.1007\/978-981-10-8228-3_5"},{"key":"12_CR12","doi-asserted-by":"publisher","unstructured":"Gerlach, M., Shi, H., Amaral, L.A.N.: A universal information theoretic approach to the identification of stopwords. Nat. Mach. Intell. 1(12), 606\u2013612 (2019). https:\/\/doi.org\/10.1038\/s42256-019-0112-6","DOI":"10.1038\/s42256-019-0112-6"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Gorro, K.D., Ali, M.F., Lawas, L.A., Ilano, A.S.: Stop words detection using a long short term memory recurrent neural network. In: Proceedings of the 2021 9th International Conference on Information Technology: IoT and Smart City, pp. 199\u2013202 (2021)","DOI":"10.1145\/3512576.3512612"},{"key":"12_CR14","doi-asserted-by":"publisher","unstructured":"J\u00f3hannsd\u00f3ttir, K.M.: Temporal adverbs in icelandic: adverbs of quantification vs. frequency adverbs. Nordic J. Linguist. 30(2), 157\u2013183 (2007). https:\/\/doi.org\/10.1017\/s0332586507001734","DOI":"10.1017\/s0332586507001734"},{"issue":"2","key":"12_CR15","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1017\/s002222670001389x","volume":"20","author":"F Katamba","year":"1984","unstructured":"Katamba, F.: A nonlinear analysis of vowel harmony in luganda. J. Linguist. 20(2), 257\u2013275 (1984). https:\/\/doi.org\/10.1017\/s002222670001389x","journal-title":"J. Linguist."},{"issue":"3","key":"12_CR16","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1007\/bf00630273","volume":"9","author":"EL Keenan","year":"1986","unstructured":"Keenan, E.L., Stavi, J.: A semantic characterization of natural language determiners. Linguist. Philos. 9(3), 253\u2013326 (1986). https:\/\/doi.org\/10.1007\/bf00630273","journal-title":"Linguist. Philos."},{"key":"12_CR17","doi-asserted-by":"publisher","unstructured":"Ladani, D.J., Desai, N.P.: Stopword identification and removal techniques on TC and IR applications: a survey. In: 2020 6th International Conference on Advanced Computing and Communication Systems (ICACCS). IEEE (2020). https:\/\/doi.org\/10.1109\/icaccs48705.2020.9074166","DOI":"10.1109\/icaccs48705.2020.9074166"},{"key":"12_CR18","doi-asserted-by":"crossref","unstructured":"Miretie, S.G., Khedkar, V.: Automatic generation of stopwords in the Amharic text. Int. J. Comput. Appl. 975, 8887 (2018). https:\/\/www.ijcaonline.org\/archives\/volume180\/number10\/miretie-2018-ijca-916161.pdf","DOI":"10.5120\/ijca2018916161"},{"key":"12_CR19","doi-asserted-by":"publisher","unstructured":"Niyongabo, R.A., Hong, Q., Kreutzer, J., Huang, L.: KINNEWS and KIRNEWS: benchmarking cross-lingual text classification for Kinyarwanda and Kirundi. In: Proceedings of the 28th International Conference on Computational Linguistics, pp. 5507\u20135521. International Committee on Computational Linguistics, Barcelona, Spain (Online) (2020). https:\/\/doi.org\/10.18653\/v1\/2020.coling-main.480","DOI":"10.18653\/v1\/2020.coling-main.480"},{"issue":"4","key":"12_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3314942","volume":"18","author":"IE Onyenwe","year":"2019","unstructured":"Onyenwe, I.E., Hepple, M., Chinedu, U., Ezeani, I.: Toward an effective igbo part-of-speech tagger. ACM Trans. Asian Low-Resour. Lang. Inf. Process. 18(4), 1\u201326 (2019). https:\/\/doi.org\/10.1145\/3314942","journal-title":"ACM Trans. Asian Low-Resour. Lang. Inf. Process."},{"key":"12_CR21","unstructured":"Orife, I., Kreutzer, J., Sibanda, B., et\u00a0al.: Masakhane - Machine Translation For Africa. arXiv preprint arXiv: 2003.11529 (2020). https:\/\/arxiv.org\/abs\/2003.11529v1"},{"key":"12_CR22","unstructured":"Panckhurst, R.: Texting in three European languages : does the linguistic typology differ? In: i-Mean 2009 Issues in Meaning in Interaction, Bristol, United Kingdom, pp. 119\u2013136 (2009). https:\/\/hal.science\/hal-00443016"},{"key":"12_CR23","doi-asserted-by":"publisher","unstructured":"Qiao, Y., Xiong, C., Liu, Z., Liu, Z.: Understanding the Behaviors of BERT in Ranking (2019). https:\/\/doi.org\/10.48550\/ARXIV.1904.07531","DOI":"10.48550\/ARXIV.1904.07531"},{"issue":"17","key":"12_CR24","doi-asserted-by":"publisher","first-page":"50047","DOI":"10.1007\/s11042-023-17205-9","volume":"83","author":"S Rajwal","year":"2023","unstructured":"Rajwal, S.: Lihisto: a comprehensive list of hindi stopwords. Multimedia Tools Appl. 83(17), 50047\u201350059 (2023)","journal-title":"Multimedia Tools Appl."},{"key":"12_CR25","doi-asserted-by":"publisher","first-page":"362","DOI":"10.1016\/j.procs.2018.05.196","volume":"132","author":"R Rani","year":"2018","unstructured":"Rani, R., Lobiyal, D.: Automatic construction of generic stop words list for Hindi text. Procedia Comput. Sci. 132, 362\u2013370 (2018). https:\/\/doi.org\/10.1016\/j.procs.2018.05.196","journal-title":"Procedia Comput. Sci."},{"issue":"8","key":"12_CR26","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0254937","volume":"16","author":"S Sarica","year":"2021","unstructured":"Sarica, S., Luo, J.: Stopwords in technical language processing. PLoS ONE 16(8), e0254937 (2021). https:\/\/doi.org\/10.1371\/journal.pone.0254937","journal-title":"PLoS ONE"},{"issue":"2","key":"12_CR27","doi-asserted-by":"publisher","first-page":"206","DOI":"10.2307\/1420127","volume":"77","author":"AM Treisman","year":"1964","unstructured":"Treisman, A.M.: Verbal cues, language, and meaning in selective attention. Am. J. Psychol. 77(2), 206 (1964). https:\/\/doi.org\/10.2307\/1420127","journal-title":"Am. J. Psychol."},{"issue":"3","key":"12_CR28","doi-asserted-by":"publisher","first-page":"1294","DOI":"10.3390\/app12031294","volume":"12","author":"T Yeshambel","year":"2022","unstructured":"Yeshambel, T., Mothe, J., Assabie, Y.: Amharic Adhoc information retrieval system based on morphological features. Appl. Sci. 12(3), 1294 (2022). https:\/\/doi.org\/10.3390\/app12031294","journal-title":"Appl. Sci."}],"container-title":["Lecture Notes in Networks and Systems","Advancements in Machine Learning and Natural Language Processing: Innovations and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-85067-7_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T08:11:25Z","timestamp":1743322285000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-85067-7_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031850660","9783031850677"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-85067-7_12","relation":{},"ISSN":["2367-3370","2367-3389"],"issn-type":[{"value":"2367-3370","type":"print"},{"value":"2367-3389","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"28 March 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"This work is done within the framework of Masakhane, the African grassroots community for natural language processing.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"The source code is available at  for reproducibility purposes.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Source Code"}},{"value":"LPKM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Language Processing and Knowledge Management","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Sfax","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tunisia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 June 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 June 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"lpkm2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/sites.google.com\/view\/lpkm-2024","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}