{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,27]],"date-time":"2026-05-27T16:45:13Z","timestamp":1779900313940,"version":"3.53.1"},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2024,7,2]],"date-time":"2024-07-02T00:00:00Z","timestamp":1719878400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,7,2]],"date-time":"2024-07-02T00:00:00Z","timestamp":1719878400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Lang Resources &amp; Evaluation"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s10579-024-09719-x","type":"journal-article","created":{"date-parts":[[2024,7,2]],"date-time":"2024-07-02T12:02:20Z","timestamp":1719921740000},"page":"1157-1185","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Construction of Amharic information retrieval resources and corpora"],"prefix":"10.1007","volume":"58","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0599-262X","authenticated-orcid":false,"given":"Tilahun","family":"Yeshambel","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Josiane","family":"Mothe","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yaregal","family":"Assabie","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,7,2]]},"reference":[{"key":"9719_CR1","volume-title":"Computational linguistics and intelligent text processing","author":"I Abeba","year":"2014","unstructured":"Abeba, I., & Assabie, Y. (2014). Amharic sentence parsing using phrase chunking. In A. Gelbukh (Ed.), Computational linguistics and intelligent text processing. Springer."},{"key":"9719_CR2","doi-asserted-by":"crossref","unstructured":"Abate, M., and Assabie, Y. (2014). Development of Amharic morphological analyzer using memory-based learning. In Proceedings. of the 9th International. Conference. on Natural Language Processing, pp. 1\u201313, Warsaw.","DOI":"10.1007\/978-3-319-10888-9_1"},{"key":"9719_CR3","unstructured":"Abate, S., Melese, M., Tachbelie, M., Meshesha, M., Atinafu, S., Mulugeta, W., Assabie, Y., Abera, H., Seyoum, B., Abebe, T., Tsegaye, W., Lemma, A., Andargie, T. and Shifaw, S. (2018). Parallel corpora for bi-lingual english-Ethiopian languages statistical machine translation. In Proceedings. of the 27th International. Conference. on Computational Linguistics, pp. 3102\u20133111, New Mexico, USA."},{"key":"9719_CR4","volume-title":"Amharic-english dictionary","author":"A Aklilu","year":"1987","unstructured":"Aklilu, A. (1987). Amharic-english dictionary. Kuraz Printing Press."},{"issue":"1","key":"9719_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1093\/llc\/17.1.1","volume":"17","author":"N Alemayehu","year":"2002","unstructured":"Alemayehu, N., & Willett, P. (2002). Stemming of Amharic words for information retrieval. Literary and Linguistic Computing, 17(1), 1\u201317.","journal-title":"Literary and Linguistic Computing"},{"issue":"4","key":"9719_CR6","doi-asserted-by":"publisher","first-page":"254","DOI":"10.1108\/00330330310500748","volume":"37","author":"N Alemayehu","year":"2003","unstructured":"Alemayehu, N., & Willett, P. (2003). The effectiveness of stemming for information retrieval in Amharic. Program: Electronic Library and Information Systems, 37(4), 254\u2013259.","journal-title":"Program: Electronic Library and Information Systems"},{"key":"9719_CR7","doi-asserted-by":"publisher","unstructured":"Alemu, A. and Lars, A. (2006). Amharic-English information retrieval. Workshop of the Cross Language Evaluation Forum for European Languages, 43\u201350, Berlin, Heidelberg. https:\/\/doi.org\/10.1007\/978-3-540-74999-8_5.","DOI":"10.1007\/978-3-540-74999-8_5"},{"key":"9719_CR8","unstructured":"Amsalu, S. and Gibbon, D. (2006). Finite state morphology of Amharic. 5th Recent Advances in Natural Language Processing, pp. 47\u201351, Borovets, Bulgaria."},{"key":"9719_CR9","volume-title":"Development of Amharic morphological analyser","author":"Y Assabie","year":"2017","unstructured":"Assabie, Y. (2017). Development of Amharic morphological analyser. Technical Report Ministry of Communication and Information Technology."},{"key":"9719_CR10","volume-title":"Amharic-Fran\u00e7ais dictionnaire","author":"A Berhanu","year":"2004","unstructured":"Berhanu, A. (2004). Amharic-Fran\u00e7ais dictionnaire (1st ed.). Shama Books.","edition":"1"},{"key":"9719_CR11","first-page":"53","volume-title":"Retrieval system evaluation, In TREC: Experiment and evaluation in information retrieval","author":"C Buckley","year":"2005","unstructured":"Buckley, C., & Voorhees, E. (2005). Retrieval system evaluation, In TREC: Experiment and evaluation in information retrieval (pp. 53\u201375). MIT Press."},{"issue":"2","key":"9719_CR12","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1016\/j.ipm.2014.10.007","volume":"51","author":"AG Chifu","year":"2015","unstructured":"Chifu, A. G., Hristea, F., Mothe, J., & Popescu, M. (2015). Word sense discrimination in information retrieval: A spectral clustering-based approach. Information Processing and Management, 51(2), 16\u201331.","journal-title":"Information Processing and Management"},{"key":"9719_CR13","unstructured":"Cleverdon, C. W. (1959). The evaluation of systems used in information retrieval. In Proceedings of the International Conference on Scientific Information. pp. 687\u2013698. Washington, DC."},{"key":"9719_CR14","unstructured":"Countrymeters. (2021) Ethiopian Population. Retrived August, 17, 2021, from. http:\/\/countrymeters.info\/en\/ethiopia."},{"key":"9719_CR15","first-page":"1","volume":"2","author":"G Demeke","year":"2006","unstructured":"Demeke, G., & Getachew, M. (2006). Manual annotation of Amharic news items with part-of-speech tags and its challenges. In Proceedings of Ethiopian Languages Research Center Working, 2, 1\u201316.","journal-title":"In Proceedings of Ethiopian Languages Research Center Working"},{"key":"9719_CR16","doi-asserted-by":"crossref","unstructured":"Diaz, F., Mitra, B. and Craswell, N. (2016). Query expansion with locally-trained word embeddings. In Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics, pp. 367\u2013377. Berlin, Germany.","DOI":"10.18653\/v1\/P16-1035"},{"key":"9719_CR17","volume-title":"Amharic dictionary","author":"ELRC","year":"2006","unstructured":"ELRC. (2006). Amharic dictionary (3rd ed.). Addis Ababa University Press.","edition":"3"},{"key":"9719_CR18","unstructured":"Ephrem, B., Miyao, Y. and Yimam, B. (2016). Morpho-syntactically annotated Amharic treebank. CLEF. Proceedings of Corpus Linguistics Fest 2016, pp. 48\u201357, Bloomington, USA."},{"key":"9719_CR19","unstructured":"Ephrem, B., Yusuke, M. and Yimam, B. (2018). Universal dependencies for Amharic. In Proceedings of the 11th International Conferance on Language Resources and Evaluation (LREC 2018), pp. 2216\u20132222, European Language Resources Association (ELRA), Miyazaki, Japan."},{"key":"9719_CR20","doi-asserted-by":"publisher","unstructured":"Eyasu, .E and Gamback, B. (2005). Classifying Amharic news text using self-organizing maps. In Proceedings of the ACL Workshop on Computational Approaches to Semitic Languages, pp. 71\u201378, Ann Arbor, Michigan. https:\/\/doi.org\/10.3115\/1621787.1621801.","DOI":"10.3115\/1621787.1621801"},{"issue":"4","key":"9719_CR21","first-page":"67","volume":"8","author":"Y Farhan","year":"2020","unstructured":"Farhan, Y., Noah, S., & Mohd, M. (2020). Survey of automatic query expansion for Arabic text retrieval. Journal of Information Science Theory and Practice, 8(4), 67\u201386.","journal-title":"Journal of Information Science Theory and Practice"},{"key":"9719_CR22","unstructured":"Feng, Z., Lee, W., Xiaotie, D., Song, H. and Sheng, W. (2006). Automatic construction of Chinese stopword list. In Proceedings of the 5th WSEAS International Conference on Applied Computer Science, pp. 1010\u20131015, Hangzhou."},{"issue":"2","key":"9719_CR23","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1145\/2701583.2701587","volume":"48","author":"N Ferro","year":"2014","unstructured":"Ferro, N. (2014). CLEF 15th birthday: Past, present, and future. ACM SIGIR Forum, 48(2), 31\u201355.","journal-title":"ACM SIGIR Forum"},{"key":"9719_CR24","unstructured":"Gamback, B. (2012). Tagging and verifying an Amharic news corpus. In Proceedings of Language Technology for Normalization of Less-Resourced Languages, pp. 79\u201384, Istanbul, Turkey."},{"key":"9719_CR25","unstructured":"Gamback, B., Sahlgren, M., Atelach, A., and Lars, A. (2006). Applying machine learning to Amharic text classification. In WOCAL 5: 5th World Congress of African linguistics, Addis Ababa, Ethiopia"},{"key":"9719_CR26","volume-title":"Corpus annotation: Linguistics information from computer text corpora","author":"R Garside","year":"2013","unstructured":"Garside, R., Leech, G., & McEnery, T. (2013). Corpus annotation: Linguistics information from computer text corpora (2nd ed.). Routledge.","edition":"2"},{"key":"9719_CR27","unstructured":"Gasser, M. (2010). A Dependency Grammar for Amharic. Workshop on Language Resources and Human Language Technologies for Semitic Languages, pp. 12\u201318,Valleta."},{"key":"9719_CR28","unstructured":"Gasser, M. (2011). HornMorpho: A system for morphological processing of Amharic, Oromo, and Tigrinya. In proceedings of Conference on Human Language Technology for Development, pp 94\u201399, Alexandria, Egypt."},{"key":"9719_CR29","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-019-0112-6","author":"M Gerlach","year":"2019","unstructured":"Gerlach, M., Shi, H., & Amaral, L. A. N. (2019). A universal information theoretic approach to the identification of stopwords. Nature Machine Intelligence. https:\/\/doi.org\/10.1038\/s42256-019-0112-6","journal-title":"Nature Machine Intelligence"},{"issue":"3","key":"9719_CR30","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1016\/0306-4573(94)00047-7","volume":"31","author":"D Harman","year":"1995","unstructured":"Harman, D. (1995). Overview of the second text retrieval conference (TREC-2). Information Processing and Management, 31(3), 271\u2013289.","journal-title":"Information Processing and Management"},{"issue":"1","key":"9719_CR31","first-page":"13","volume":"22","author":"E Hovy","year":"2010","unstructured":"Hovy, E., & Lavid, J. (2010). Towards a science of corpus annotation: A new methodological challenge for corpus linguistics. International Journal of Translation, 22(1), 13\u201336.","journal-title":"International Journal of Translation"},{"issue":"3","key":"9719_CR32","first-page":"119","volume":"4","author":"A Ibrahim","year":"2006","unstructured":"Ibrahim, A. (2006). Effects of stopwords elimination for Arabic information retrieval: a comparative paper. International Journal of Computing and Information Sciences, 4(3), 119\u2013133.","journal-title":"International Journal of Computing and Information Sciences"},{"issue":"4","key":"9719_CR33","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1145\/582415.582418","volume":"20","author":"K J\u00e4rvelin","year":"2002","unstructured":"J\u00e4rvelin, K., & Kek\u00e4l\u00e4inen, J. (2002). Cumulated gain-based evaluation of ir techniques. ACM Transactions on Information Systems, 20(4), 422\u2013446.","journal-title":"ACM Transactions on Information Systems"},{"key":"9719_CR34","volume-title":"Natural language processing and information retrieval methods for intellectual property analysis","author":"C Jochim","year":"2013","unstructured":"Jochim, C. (2013). Natural language processing and information retrieval methods for intellectual property analysis. University of Stuttgart."},{"issue":"1","key":"9719_CR35","doi-asserted-by":"publisher","first-page":"102","DOI":"10.1145\/3274784.3274791","volume":"52","author":"Y Liu","year":"2018","unstructured":"Liu, Y., Kato, M., Clarke, C., Kando, N., & Sakai, T. (2018). Report on NTCIR-13: The thirteenth round of NII test beds and community for information access research. ACM SIGIR Forum, 52(1), 102\u2013110.","journal-title":"ACM SIGIR Forum"},{"key":"9719_CR36","first-page":"3111","volume":"2","author":"T Mikolov","year":"2013","unstructured":"Mikolov, T., Sutskever, I., Chen, K., Corrado, G. S., & Dean, J. (2013). Distributed representations of words and phrases and theircompositionality. In Advances in Neural Information Processing Systems, Proceedings of the 26th International Conference on Neural Information Processing Systems, 2, 3111\u20133119.","journal-title":"In Advances in Neural Information Processing Systems, Proceedings of the 26th International Conference on Neural Information Processing Systems"},{"key":"9719_CR37","doi-asserted-by":"publisher","unstructured":"Mindaye, T., Redewan, H. and Atnafu, S. (2010). Design and implementation of Amharic search engine. In Proc. of the 5th Int. Conf. on Signal Image Technology and Internet Based Systems, 318\u2013325. doi:https:\/\/doi.org\/10.1109\/SITIS.2009.58.","DOI":"10.1109\/SITIS.2009.58"},{"key":"9719_CR38","unstructured":"Mulugeta, W. and Gasser, M. (2012). Learning morphological rules for Amharic verbs using inductive logic programming. In proceedings of the Workshop on Language Technology for Normalisation of Less-Resourced Languages (SALTMIL8\/AfLaT2012), pp. 7\u201312, Istanbul, Turkey."},{"key":"9719_CR39","doi-asserted-by":"crossref","unstructured":"Nunzio, G., Ferro, N., Mandl, T., and Peters, C. (2007). CLEF 2007: Ad Hoc Track Overview. Advances in Multilingual and Multimodal Information Retrieval: 8th Workshop of the Cross-Lingual Evaluation form, CLEF2007, pp. 13\u201332, Budapest, Hungary.","DOI":"10.1007\/978-3-540-85760-0_2"},{"key":"9719_CR40","doi-asserted-by":"crossref","unstructured":"Ounis, I., Amati, G., Plachouras, V., Macdonald, C. and Johnson, D. (2005). Terrier: A high performance and scalable information retrieval platform. In proceedings of SIGIR Open Source Workshop \u201906 Seattle, Washington.","DOI":"10.1007\/978-3-540-31865-1_37"},{"key":"9719_CR41","volume-title":"Natural language annotation for machine learning","author":"J Pustejovsky","year":"2013","unstructured":"Pustejovsky, J., & Stubbs, Am. (2013). Natural language annotation for machine learning (1st ed.). O\u2019Reilly Media.","edition":"1"},{"issue":"1","key":"9719_CR42","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1108\/EUM0000000007186","volume":"53","author":"SE Robertson","year":"1997","unstructured":"Robertson, S. E. (1997). Overview of the okapi projects. Journal of Documentation, 53(1), 3\u20137. https:\/\/doi.org\/10.1108\/EUM0000000007186","journal-title":"Journal of Documentation"},{"issue":"2","key":"9719_CR43","doi-asserted-by":"publisher","first-page":"336","DOI":"10.1109\/TKDE.2012.224","volume":"26","author":"E Ruiz","year":"2014","unstructured":"Ruiz, E., Hristidis, V., & Ipeirotis, P. (2014). Facilitating document annotation using content and querying value. IEEE Transactions on Knowledge and Data Engineering, 26(2), 336\u2013349.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"4","key":"9719_CR44","doi-asserted-by":"publisher","first-page":"476","DOI":"10.1177\/0165551514530655","volume":"40","author":"M Sadeghi","year":"2014","unstructured":"Sadeghi, M., & Vegas, J. (2014). Automatic identification of light stopwords for Persian information retrieval systems. Journal of Information Science, 40(4), 476\u2013548.","journal-title":"Journal of Information Science"},{"key":"9719_CR45","doi-asserted-by":"publisher","first-page":"1444","DOI":"10.1109\/JPROC.2012.2189916","volume":"100","author":"M Sanderson","year":"2012","unstructured":"Sanderson, M., & Croft, W. (2012). The history of information retrieval research. In Proceedings of the IEEE, 100, 1444\u20131451. https:\/\/doi.org\/10.1109\/JPROC.2012.2189916","journal-title":"In Proceedings of the IEEE"},{"key":"9719_CR46","doi-asserted-by":"crossref","unstructured":"Sisay, F. (2005). Part-of-speech tagging for Amharic using conditional random fields. In proeedings. of ACL-2005 Workshop on Computational Approaches to Semitic languages, pp. 47\u201354, Ann Arbor, Mich.","DOI":"10.3115\/1621787.1621797"},{"issue":"3","key":"9719_CR47","doi-asserted-by":"publisher","first-page":"268","DOI":"10.1093\/comjnl\/35.3.268","volume":"35","author":"A Smeaton","year":"1992","unstructured":"Smeaton, A. (1992). Progress in the application of natural language processing to information retrieval tasks. The Computer Journal, 35(3), 268\u2013278.","journal-title":"The Computer Journal"},{"issue":"1","key":"9719_CR48","doi-asserted-by":"publisher","first-page":"127","DOI":"10.15517\/rk.v40i1.24143","volume":"40","author":"I Vinogradov","year":"2016","unstructured":"Vinogradov, I. (2016). Linguistic corpora of understudied languages: Do they make sense? K\u00e1\u00f1ina, 40(1), 127\u2013141.","journal-title":"K\u00e1\u00f1ina"},{"key":"9719_CR49","unstructured":"Voorhees, E. M., and Harman, D. K. (Eds.). (2005). TREC: Experiment and evaluation in information retrieval. Vol. 63. Cambridge: MIT press, ISBN 0-262-22073-3."},{"key":"9719_CR50","volume-title":"Reference grammar of Amharic","author":"L Wolf","year":"1995","unstructured":"Wolf, L. (1995). Reference grammar of Amharic. Otto Harrassowitz."},{"key":"9719_CR51","doi-asserted-by":"crossref","unstructured":"Yeshambel,T., Mothe, J. and Assabie, Y. (2020). Amharic document representation for ad-hoc retrieval. In Proceedings of the 12th International Conference on knowledge discovery and information retrieval, pp. 118\u2013128, Budapest, Hungary.","DOI":"10.5220\/0010177301180128"},{"key":"9719_CR52","unstructured":"Yeshambel, T., Mothe, J., and Assabie, Y. (2021). Evaluation of corpora, resources and tools for Amharic information retrieval, In Proceedings of the 8th EAI International Conference on Advancements of Science and Technology, Bahir Dar, Ethiopia."},{"key":"9719_CR53","unstructured":"Yifiru, M., Teferra, S. and Laurent, B. (2011). Part-of-speech tagging for under-resourced and morphologically rich languages: the case of Amharic. Conference on Human Language Technology for Development, pp. 50\u201355, Alexandria, Egypt."},{"key":"9719_CR54","volume-title":"Yamarigna Sewasiw (Amharic Grammar)","author":"B Yimam","year":"2001","unstructured":"Yimam, B. (2001). Yamarigna Sewasiw (Amharic Grammar) (2nd ed.). CASE.","edition":"2"}],"container-title":["Language Resources and Evaluation"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-024-09719-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10579-024-09719-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-024-09719-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T10:10:06Z","timestamp":1730542206000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10579-024-09719-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,2]]},"references-count":54,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["9719"],"URL":"https:\/\/doi.org\/10.1007\/s10579-024-09719-x","relation":{},"ISSN":["1574-020X","1574-0218"],"issn-type":[{"value":"1574-020X","type":"print"},{"value":"1574-0218","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,7,2]]},"assertion":[{"value":"8 January 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 July 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have not disclosed any competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Documents are collected from primary sources. We have got permission from the owners to share documents to the research community.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"We authors assert that this research work is our original work. We also grant and assign the publisher of this article to reproduce, publish, distribute, make available to the public, store, and sell in all versions (including soft copy and printed) in all forms (including offline and online use, use in databases). We will agree, at the request of the publisher, to execute all documents and do all things reasonably required by publisher in order to confer to publisher all rights intended to be granted under the agreement we will make.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}]}}