{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:25:02Z","timestamp":1772119502217,"version":"3.50.1"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2023,8,20]],"date-time":"2023-08-20T00:00:00Z","timestamp":1692489600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,20]],"date-time":"2023-08-20T00:00:00Z","timestamp":1692489600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Faculty Research and Development Fund, The University of Auckland"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Lang Resources &amp; Evaluation"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1007\/s10579-023-09680-1","type":"journal-article","created":{"date-parts":[[2023,8,20]],"date-time":"2023-08-20T11:01:29Z","timestamp":1692529289000},"page":"1-26","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["The development of a labelled te reo M\u0101ori\u2013English bilingual database for language technology"],"prefix":"10.1007","volume":"59","author":[{"given":"Jesin","family":"James","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Isabella","family":"Shields","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vithya","family":"Yogarajan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter J.","family":"Keegan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Catherine I.","family":"Watson","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter-Lucas","family":"Jones","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Keoni","family":"Mahelona","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,8,20]]},"reference":[{"key":"9680_CR1","volume-title":"M\u0101ori","author":"W Bauer","year":"1993","unstructured":"Bauer, W., Parker, W., & Evans, T. K. (1993). M\u0101ori. Routledge."},{"key":"9680_CR2","volume-title":"Let\u2019s learn M\u0101ori: A guide to the study of the Maori language","author":"B Biggs","year":"1969","unstructured":"Biggs, B. (1969). Let\u2019s learn M\u0101ori: A guide to the study of the Maori language. A.H.A.W. Reed."},{"key":"9680_CR3","doi-asserted-by":"publisher","unstructured":"Bird, S. (2020). Decolonising speech and language technology. In Proceedings of the 28th International Conference on Computational Linguistics (pp. 3504\u20133519). Barcelona, Spain: International Committee on Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/2020.coling-main.313","DOI":"10.18653\/v1\/2020.coling-main.313"},{"key":"9680_CR4","unstructured":"Census, N. Z. 2018 census totals by topic: National highlights. Stats NZ. https:\/\/www.stats.govt.nz\/information-releases\/2018-census-totals-by-topic-national-highlights-updated"},{"key":"9680_CR5","unstructured":"Corpus of Te Reo derived from the New Zealand Hansard. Te Hiku, Media. https:\/\/github.com\/TeHikuMedia\/nga-tautohetohe-reo"},{"key":"9680_CR6","unstructured":"Eliasson, S. (1989) English-Maori language contact: Code-switching and the free-morpheme constraint. In: Reports from Uppsala University Department of Linguistics (pp. 1\u201328)."},{"key":"9680_CR7","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511611834","volume-title":"Discourse strategies","author":"JJ Gumperz","year":"1982","unstructured":"Gumperz, J. J. (1982). Discourse strategies. Cambridge University Press."},{"key":"9680_CR8","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511618697","volume-title":"M\u0101ori: A linguistic introduction","author":"R Harlow","year":"2007","unstructured":"Harlow, R. (2007). M\u0101ori: A linguistic introduction. Cambridge University Press."},{"key":"9680_CR9","unstructured":"Hedderich, M. A., Lange, L., Adel, H., Jannik, S., & Klakow, D.: A survey on recent approaches for natural language processing in low-resource scenarios. In Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (pp. 2545\u20132568)."},{"key":"9680_CR10","doi-asserted-by":"crossref","unstructured":"James, J., Shields, I., Berriman, R., Keegan, P.J., Watson, C. I. (2020). Developing resources for te reo m\u0101ori text to speech synthesis system. In P. Sojka, I. Kope\u010dek, K. Pala & A. Hor\u00e1k (Eds.), Text, speech, and dialogue, lecture notes in computer science, p. 12284.","DOI":"10.1007\/978-3-030-58323-1_32"},{"key":"9680_CR11","doi-asserted-by":"crossref","unstructured":"James, J., Yogarajan, V., Shields, I., Watson, C., Keegan, P., Jones, P. -L., Mahelona, K. (2022). Language models for code-switch detection of te reo M\u0101ori and English in a Low-resource Setting. In Findings of Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies.","DOI":"10.18653\/v1\/2022.findings-naacl.49"},{"key":"9680_CR12","unstructured":"Kaitiakitanga license. Whare K\u014drero Kaitiakitanga License. https:\/\/xn--wharekrero-v3b.nz\/kaitiakitanga\/"},{"issue":"1","key":"9680_CR13","first-page":"1","volume":"12","author":"E Kasmuri","year":"2020","unstructured":"Kasmuri, E. (2020). Segregation of code-switching sentences using rule-based technique. International Journal of Advances in Soft Computing and its Applications, 12(1), 1\u201312.","journal-title":"International Journal of Advances in Soft Computing and its Applications"},{"issue":"1","key":"9680_CR14","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1177\/117718011501100105","volume":"11","author":"TT Keegan","year":"2015","unstructured":"Keegan, T. T., Mato, P., & Ruru, S. (2015). Using Twitter in an indigenous language: An analysis of te reo M\u0101ori tweets. AlterNative: An International Journal of Indigenous Peoples, 11(1), 59\u201375.","journal-title":"AlterNative: An International Journal of Indigenous Peoples"},{"issue":"1","key":"9680_CR15","first-page":"32","volume":"17","author":"J King","year":"2011","unstructured":"King, J., Maclagan, M., Harlow, R., Keegan, P., & Watson, C. (2011). The MAONZE corpus: Transcribing and analysing M\u0101ori speech. New Zealand Studies in Applied Linguistics, 17(1), 32\u201348.","journal-title":"New Zealand Studies in Applied Linguistics"},{"key":"9680_CR16","unstructured":"LMC: Legal M\u0101ori corpus. Victoria University of Wellington, New Zealand. http:\/\/nzetc.victoria.ac.nz\/tm\/scholarly\/tei-legalMaoriCorpus.html"},{"key":"9680_CR17","unstructured":"M\u0101ori data sovereignty network. Te Mana Raraunga. https:\/\/www.temanararaunga.maori.nz\/"},{"key":"9680_CR18","unstructured":"Niupepa: M\u0101ori newspapers - New Zealand Digital Library. Ministry of Education , New Zealand. http:\/\/www.nzdl.org\/cgi-bin\/library.cgi?a=p &p=about &c=niupepa"},{"key":"9680_CR19","doi-asserted-by":"crossref","unstructured":"Sabou, M., Bontcheva, K., & Scharl, A. (2012). Crowdsourcing research opportunities: Lessons from natural language processing. In International Conference on Knowledge Management and Knowledge Technologies (pp. 1\u20138).","DOI":"10.1145\/2362456.2362479"},{"key":"9680_CR20","unstructured":"Shields, I., Watson, C. I., Keegan, P. J., Berriman, R., & James, J. (2019). Te Reo M\u0101ori Voice for TTS. Language Technologies 4 All (LT4All)."},{"key":"9680_CR21","unstructured":"Stats, N. Z. (2020). Ng\u0101 Tikanga Paihere: A framework guiding ethical and culturally appropriate data use. Guidelines, 8."},{"key":"9680_CR22","unstructured":"Te Hiku Media. https:\/\/tehiku.nz\/te-hiku-tech\/papa-reo\/"},{"key":"9680_CR23","unstructured":"Te reo M\u0101ori Speech Recognition (Under development) by Te Hiku Media. https:\/\/koreromaori.io\/"},{"key":"9680_CR24","doi-asserted-by":"crossref","unstructured":"Trye, D., Calude, A. S., Bravo-Marquez, F., & Keegan, T. T. A. G. (2019). M\u0101ori loanwords: A corpus of New Zealand English tweets. In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop (pp. 136\u2013142).","DOI":"10.18653\/v1\/P19-2018"},{"key":"9680_CR25","first-page":"1","volume":"2022","author":"D Trye","year":"2022","unstructured":"Trye, D., Keegan, T. T., Mato, P., & Apperley, M. (2022). Harnessing indigenous tweets: The Reo M\u0101ori Twitter corpus. Language Resources and Evaluation, 2022, 1\u201340.","journal-title":"Language Resources and Evaluation"},{"key":"9680_CR26","doi-asserted-by":"crossref","unstructured":"Watson, C., Keegan, P., Maclagan, M., Harlow, R., & King, J. (2017). The motivation and development of MPAi, a M\u0101ori Pronunciation Aid. In Interspeech. (pp. 2063\u20132067).","DOI":"10.21437\/Interspeech.2017-215"},{"key":"9680_CR27","doi-asserted-by":"publisher","first-page":"364","DOI":"10.1016\/j.future.2022.05.014","volume":"135","author":"X Wu","year":"2022","unstructured":"Wu, X., Xiao, L., Sun, Y., Zhang, J., Ma, T., & He, L. (2022). A survey of human-in-the-loop for machine learning. Future Generation Computer Systems, 135, 364.","journal-title":"Future Generation Computer Systems"},{"key":"9680_CR28","doi-asserted-by":"crossref","unstructured":"Yirmibe\u015fo\u011flu, Z., & Eryi\u011fit, G. (2018). Detecting code-switching between Turkish-English language pair. In EMNLP Workshop W-NUT: The 4th Workshop on Noisy User-generated Text (pp. 110\u2013115).","DOI":"10.18653\/v1\/W18-6115"}],"container-title":["Language Resources and Evaluation"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-023-09680-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10579-023-09680-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-023-09680-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,17]],"date-time":"2025-03-17T08:25:14Z","timestamp":1742199914000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10579-023-09680-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,20]]},"references-count":28,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,3]]}},"alternative-id":["9680"],"URL":"https:\/\/doi.org\/10.1007\/s10579-023-09680-1","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-2501533\/v1","asserted-by":"object"}]},"ISSN":["1574-020X","1574-0218"],"issn-type":[{"value":"1574-020X","type":"print"},{"value":"1574-0218","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8,20]]},"assertion":[{"value":"30 June 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 August 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}