{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:13:58Z","timestamp":1742912038827,"version":"3.40.3"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031282409"},{"type":"electronic","value":"9783031282416"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-28241-6_48","type":"book-chapter","created":{"date-parts":[[2023,3,16]],"date-time":"2023-03-16T01:02:20Z","timestamp":1678928540000},"page":"429-435","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Text Information Retrieval in\u00a0Tetun"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4392-2382","authenticated-orcid":false,"given":"Gabriel","family":"de Jesus","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,3,16]]},"reference":[{"key":"48_CR1","volume-title":"An Introduction to Information Retrieval","author":"C-D Manning","year":"2009","unstructured":"Manning, C.-D., Raghavan, P., Sch\u00fctze, H.: An Introduction to Information Retrieval. Cambridge University Press, Cambridg (2009)"},{"key":"48_CR2","unstructured":"van-Klinken, C.-W., Hajek, J., Nordlinger R.: Tetun Dili: a grammar of an East Timorese language, Pacific Linguistics, Canberra, Australia (2002)"},{"key":"48_CR3","unstructured":"The standard orthography of the tetum language. https:\/\/archive.org\/details\/the-standard-orthography-of-the-tetum-language. Accessed 31 Oct 2022"},{"key":"48_CR4","unstructured":"Government decree-law No. 1\/2004 of 14 April 2004 - the standard orthography of the tetun language. https:\/\/mj.gov.tl\/jornal\/lawsTL\/RDTL-Law\/RDTL-Gov-Decrees\/Gov-Decree-2004-01.pdf. Accessed 31 Oct 2022"},{"key":"48_CR5","doi-asserted-by":"crossref","unstructured":"Constitution of the democratic republic of timor-leste. https:\/\/timor-leste.gov.tl\/wp-content\/uploads\/2010\/03\/Constitution_RDTL_ENG.pdf\/. Accessed 31 Oct 2022","DOI":"10.5089\/9798400219627.002"},{"key":"48_CR6","unstructured":"Timor-leste population and housing Census 2015. General directorate of statistics, ministry of finance, democratic republic of timor-leste. https:\/\/www.statistics.gov.tl\/category\/publications\/census-publications Accessed 31 Oct 2022"},{"key":"48_CR7","doi-asserted-by":"publisher","unstructured":"Hajek, J., van-Klinken., C.-W.: language contact and gender in Tetun Dili: what happens when Austronesian meets romance?. University of Hawai\u2019i Press 58, 59\u201391 (2019). https:\/\/doi.org\/10.1353\/ol.2019.0003","DOI":"10.1353\/ol.2019.0003"},{"key":"48_CR8","unstructured":"Zuzana, G.: Tetun in Timor-Leste: The role of language contact in its development. PhD thesis, Universidade de Coimbra, Portugal (2018). https:\/\/hdl.handle.net\/10316\/80665"},{"key":"48_CR9","doi-asserted-by":"crossref","unstructured":"van-Klinken, C. W., Hajek, J.: Language contact and functional expansion in Tetun Dili: the evolution of a new press register. Multilingual 37, 613\u2013647 (2018)","DOI":"10.1515\/multi-2017-0109"},{"key":"48_CR10","unstructured":"Timor news: an online news agency based in Dili, Timor-Leste, https:\/\/www.timornews.tl"},{"key":"48_CR11","unstructured":"The registered and licensed social communication agencies in press council of timor-Leste. https:\/\/conselhoimprensa.tl\/baze-de-dadus\/registu-media. Accessed 31 Oct 2022"},{"key":"48_CR12","unstructured":"Magueresse, A., Carles, V., Heetderks, E.: Low-resource languages: a review of past work and future challenges. CoRR, abs\/2006.07264 (2020). https:\/\/arxiv.org\/abs\/2006.07264"},{"key":"48_CR13","unstructured":"Cieri, C., Maxwell, M., Strassel, M.-S., Tracey, J.: Selection criteria for low resource language programs. In: Calzolari, N., et al. (eds.) Proceedings of the Tenth International Conference on Language Resources and Evaluation LREC 2016, Portoro\u017e, Slovenia, 23\u201328 May 2016. European Language Resources Association (ELRA) (2016). https:\/\/www.lrec-conf.org\/proceedings\/lrec2016\/summaries\/1254.html\u2019"},{"issue":"8","key":"48_CR14","doi-asserted-by":"publisher","first-page":"342","DOI":"10.6092\/issn.2532-8816\/9931","volume":"4","author":"A Hoenen","year":"2020","unstructured":"Hoenen, A., Koc, C., Rahn, M.-D.: A manual for web corpus crawling of low resource languages. Umanistica Digitale 4(8), 342\u2013344 (2020). https:\/\/doi.org\/10.6092\/issn.2532-8816\/9931","journal-title":"Umanistica Digitale"},{"key":"48_CR15","doi-asserted-by":"crossref","unstructured":"Artetxe, M., Aldabe, I., Agerri, R., Perez-de-Vi\u00f1aspre, O., Soroa, A.: Does corpus quality really matter for low-resource languages?. CoRR abs\/2203.08111 (2022). https:\/\/doi.org\/10.48550\/arXiv.2203.08111","DOI":"10.18653\/v1\/2022.emnlp-main.499"},{"key":"48_CR16","unstructured":"Linder, L., Jungo, M., Hennebert, J., Musat, C.-C., Fischer, A.: Automatic creation of text corpora for low-resource languages from the internet: the case of swiss German. In B\u00e9chet, F., et al. (eds.) Proceedings of The 12th Language Resources and Evaluation Conference, LREC 2020, Marseille, France, 11\u201316 May 2020, pp. 2706\u20132711, European Language Resources Association (2020). https:\/\/aclanthology.org\/2020.lrec-1.329\/"},{"key":"48_CR17","unstructured":"Dovbnia, O., Wr\u00f3blewska, A.: Automatic language identification for celtic texts. CoRR abs\/2203.04831 (2022). https:\/\/doi.org\/10.48550\/arXiv.2203.04831"},{"key":"48_CR18","doi-asserted-by":"publisher","unstructured":"Ferilli, S.: Automatic multilingual stopwords identification from very small corpora. Electron. 10(17) (2021). https:\/\/doi.org\/10.3390\/electronics10172169","DOI":"10.3390\/electronics10172169"},{"key":"48_CR19","series-title":"Studies in Computational Intelligence","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1007\/978-3-030-67148-8_3","volume-title":"Intelligent Systems in Industrial Applications","author":"S Ferilli","year":"2021","unstructured":"Ferilli, S., Izzi, G.L., Franza, T.: Automatic stopwords identification from very small corpora. In: Stettinger, M., Leitner, G., Felfernig, A., Ras, Z.W. (eds.) ISMIS 2020. SCI, vol. 949, pp. 31\u201346. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-67148-8_3"},{"key":"48_CR20","unstructured":"Baeza-Yates, R., Ribeiro-Neto, B.-A.: Modern Information Retrieval - the Concepts and Technology Behind Search, 2nd edn. Pearson Education Ltd., Harlow (2011)"},{"key":"48_CR21","unstructured":"Croft, W.-B., Metzler, D., Strohman. T.: Search Engines - Information Retrieval in Practice. Pearson Education, London (2009). https:\/\/www.search-engines-book.com"},{"issue":"1","key":"48_CR22","first-page":"3","volume":"3","author":"R-T Lo","year":"2005","unstructured":"Lo, R.-T., He, B., Ounis, T.: Automatically building a stopword list for an information retrieval system. J. Digital Inf. Manage. 3(1), 3\u20138 (2005)","journal-title":"J. Digital Inf. Manage."},{"key":"48_CR23","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"643","DOI":"10.1007\/978-3-030-88081-1_48","volume-title":"Computational Collective Intelligence","author":"U Tukeyev","year":"2021","unstructured":"Tukeyev, U., Karibayeva, A., Turganbayeva, A., Amirova, D.: Universal programs for stemming, segmentation, morphological analysis of Turkic words. In: Nguyen, N.T., Iliadis, L., Maglogiannis, I., Trawi\u0144ski, B. (eds.) ICCCI 2021. LNCS (LNAI), vol. 12876, pp. 643\u2013654. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-88081-1_48"},{"key":"48_CR24","doi-asserted-by":"publisher","unstructured":"Chavula, C., Suleman, H.: Ranking by language similarity for resource scarce southern bantu languages. In: International Conference on the Theory of Information Retrieval (ICTIR), Virtual Event, Canada, 2021, pp. 137\u2013147. Association for Computing Machinery, New York, NY, USA (2021). https:\/\/doi.org\/10.1145\/3471158.3472251","DOI":"10.1145\/3471158.3472251"},{"key":"48_CR25","doi-asserted-by":"publisher","unstructured":"Esmaili, K.-S., et al.: Building a test collection for Sorani Kurdish. In: ACS International Conference on Computer Systems and Applications, AICCSA 2013, Ifrane, Morocco, 27\u201330 May 2013, pp. 1\u20137, IEEE Computer Society (2013). https:\/\/doi.org\/10.1109\/AICCSA.2013.6616470","DOI":"10.1109\/AICCSA.2013.6616470"},{"key":"48_CR26","doi-asserted-by":"publisher","unstructured":"Aleahmad., A., Amiri, H., Darrudi, E., Rahgozar, M., Oroumchian, F.: Hamshahri: A standard Persian text collection. Knowl. Based Syst. 22(5), 382\u2013387 (2009). https:\/\/doi.org\/10.1016\/j.knosys.2009.05.002","DOI":"10.1016\/j.knosys.2009.05.002"},{"key":"48_CR27","doi-asserted-by":"publisher","unstructured":"von-Holy, A., Bresler, A., Shuman, O., Chavula, C., Suleman, H.: Bantuweb: a digital library for resource scarce South African languages. In: Masinde, M. Proceedings of the South African Institute of Computer Scientists and Information Technologists, SAICSIT 2017, Thaba Nchu, South Africa, 26\u201328 September 2017, pp. 36:1\u201336:10, Association for Computing Machinery (2017). https:\/\/doi.org\/10.1145\/3129416.3129446","DOI":"10.1145\/3129416.3129446"},{"key":"48_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"180","DOI":"10.1007\/978-3-319-27974-9_18","volume-title":"Digital Libraries: Providing Quality Information","author":"N Malumba","year":"2015","unstructured":"Malumba, N., Moukangwe, K., Suleman, H.: AfriWeb: a web search engine for a marginalized language. In: Allen, R.B., Hunter, J., Zeng, M.L. (eds.) ICADL 2015. LNCS, vol. 9469, pp. 180\u2013189. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-27974-9_18"},{"key":"48_CR29","unstructured":"Kyeyune, M.-J.: IsiXhosa search engine development report. Department of Computer Science, University of Cape Town (2015). https:\/\/pubs.cs.uct.ac.za\/id\/eprint\/1035\/1\/report.pdf. Accessed 10 Jul 2022"},{"key":"48_CR30","doi-asserted-by":"publisher","unstructured":"Chakrabarti, S., van-den-Berg, M., Dom, B.: Focused crawling: a new approach to topic-specific web resource. Comput. Netw. 31(11\u201316), 1623\u20131640 (1999). https:\/\/doi.org\/10.1016\/S1389-1286(99)00052-3","DOI":"10.1016\/S1389-1286(99)00052-3"},{"key":"48_CR31","doi-asserted-by":"publisher","unstructured":"Robertson, S., Zaragoza, H.: The probabilistic relevance framework: BM25 and Beyond. Foundations and Trends in Information Retrieval, April 2009. vol. 3, pp. 333\u2013389, Now Publishers Inc., Hanover, MA, USA (2009). https:\/\/doi.org\/10.1561\/1500000019","DOI":"10.1561\/1500000019"},{"key":"48_CR32","doi-asserted-by":"crossref","unstructured":"Nogueira, R., Jiang, Z., Pradeep, R., Lin, J.: Document Ranking with a Pretrained Sequence-to-Sequence Model. In: Chon, T., He, Y, Liu, Y. Findings of the Association for Computational Linguistics: EMNLP 2020, Online Event, 16\u201320 November 2020, ACL, vol. EMNLP 2020, pp. 708\u2013718. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.63","DOI":"10.18653\/v1\/2020.findings-emnlp.63"},{"key":"48_CR33","unstructured":"Yang, W., Zhang, H., Lin, J.: Simple Applications of BERT for Ad hoc document retrieval. CoRR abs\/1903.10972, (2019). https:\/\/arxiv.org\/abs\/1903.10972"},{"key":"48_CR34","unstructured":"Clough, P.-D., Sanderson, M.: Evaluating the performance of information retrieval systems using test collections. Inf. Res. 18(2) (2013). https:\/\/www.informationr.net\/ir\/18-2\/paper582.html\u2019"},{"issue":"2","key":"48_CR35","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1145\/3458553.3458563","volume":"53","author":"J Lin","year":"2019","unstructured":"Lin, J.: The neural hype, justified!: a recantation. ACM SIGIR Forum 53(2), 88\u201393 (2019). https:\/\/doi.org\/10.1145\/3458553.3458563","journal-title":"ACM SIGIR Forum"},{"key":"48_CR36","doi-asserted-by":"publisher","unstructured":"Yang, W., Lu, K., Yang, P., Lin, J.: Critically examining the \u201cNeural Hype\u201d: Weak baselines and the additivity of effectiveness gains from neural ranking models. In: Piwowarski, B., Gaussier, \u00c9., Maarek, Y., Nie, J., Scholer, F. (eds.) In: Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2019, Paris, France, 21\u201325 July 2019, pp. 1129\u20131132, ACM (2019). https:\/\/doi.org\/10.1145\/3331184.3331340","DOI":"10.1145\/3331184.3331340"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-28241-6_48","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,5]],"date-time":"2024-03-05T13:06:30Z","timestamp":1709643990000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-28241-6_48"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031282409","9783031282416"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-28241-6_48","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"16 March 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECIR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Dublin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ireland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 April 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 April 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"45","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecir2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecir2023.org\/index.html?v=1.0","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"489","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"77","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"83","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"16% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}