{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T20:54:24Z","timestamp":1773694464803,"version":"3.50.1"},"reference-count":20,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T00:00:00Z","timestamp":1760572800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T00:00:00Z","timestamp":1760572800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,16]]},"DOI":"10.1109\/iccp68926.2025.11427162","type":"proceedings-article","created":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T19:51:37Z","timestamp":1773431497000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["Subword Tokenization for Agglutinative Low-Resource Languages: A Case Study on Crimean Tatar"],"prefix":"10.1109","author":[{"given":"Enghin","family":"Omer","sequence":"first","affiliation":[{"name":"Politehnica University,Bucharest"}]},{"given":"Mariana","family":"Mocanu","sequence":"additional","affiliation":[{"name":"Politehnica University,Bucharest"}]}],"member":"263","reference":[{"key":"ref1","first-page":"66","article-title":"SentencePiece: A simple and language independent subword tokenizer and detokenizer for Neural Text Processing","volume-title":"Proceedings Of The 2018 Conference On Empirical Methods In Natural Language Processing: System Demonstrations.","author":"Kudo"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/P16-1162","article-title":"Neural Machine Translation of Rare Words with Subword Units","author":"Sennrich","year":"2016"},{"key":"ref3","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/P19-1301","article-title":"Choosing Transfer Languages for Cross-Lingual Learning","author":"Lin","year":"2019"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"613","DOI":"10.1162\/coli_a_00482","article-title":"Cross-Lingual Transfer with Language-Specific Subnetworks for Low-Resource Dependency Parsing","volume":"49","author":"Choenni","year":"2023","journal-title":"Computational Linguistics"},{"key":"ref5","article-title":"Benchmarking Azerbaijani Neural Machine Translation","author":"Chen","year":"2022"},{"key":"ref6","first-page":"2914","article-title":"No Data to Crawl? Monolingual Corpus Creation from PDF Files of Truly low-Resource Languages in Peru","volume-title":"Proceedings of The Twelfth Language Resources And Evaluation Conference","author":"Bustamante"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.63317\/4jjcw8x7z5od"},{"key":"ref8","first-page":"24","article-title":"A free\/open-source rule-based machine translation system for Crimean Tatar to Turkish","volume-title":"Proceedings Of The 2nd Workshop On Technologies For MT Of Low Resource Languages","author":"G\u00f6k\u0131rmak"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.31392\/NPU-nc.series9.2023.26.02"},{"key":"ref10","first-page":"86","article-title":"Segmental inventory and the evolution of harmony in Crimean Tatar","volume":"17","author":"Kavitskaya","year":"2013","journal-title":"Turkic Languages"},{"key":"ref11","article-title":"Curs general de limba t\u0103tar\u0103: fonetic\u0103 \u015fi fonologie","author":"Mahmut","year":"1975","journal-title":"Morfologie, Editura Universit\u0103\u0163ii din Bucure\u015fti, Bucure\u015fti"},{"key":"ref12","article-title":"WikiExtractor","author":"Attardi","year":"2015","journal-title":"GitHub Repository"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.863"},{"key":"ref14","article-title":"What is the Best Tokenizer for Korean? Tokenization Experiment on Korean Datasets","volume-title":"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (EMNLP)","author":"Park"},{"key":"ref15","first-page":"1064","article-title":"Bidirectional LSTM-CRF Models for Sequence Tagging","volume-title":"Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT)","author":"Huang"},{"key":"ref16","article-title":"Crimean Tatar","journal-title":"UNESCO World Atlas of Languages"},{"key":"ref17","article-title":"Masakhane: African Languages Hub"},{"key":"ref18","article-title":"Linguistic Data Consortium (LDC), University of Pennsylvania"},{"key":"ref19","article-title":"Zemberek-NLP: NLP tools for Turkish","author":"Ak\u0131n (ahmetaa)"},{"key":"ref20","article-title":"Turkish Corpus","journal-title":"RedRussianArmy"}],"event":{"name":"2025 IEEE 21st International Conference on Intelligent Computer Communication and Processing (ICCP)","location":"Cluj-Napoca, Romania","start":{"date-parts":[[2025,10,16]]},"end":{"date-parts":[[2025,10,18]]}},"container-title":["2025 IEEE 21st International Conference on Intelligent Computer Communication and Processing (ICCP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11427093\/11427059\/11427162.pdf?arnumber=11427162","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T20:10:28Z","timestamp":1773691828000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11427162\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,16]]},"references-count":20,"URL":"https:\/\/doi.org\/10.1109\/iccp68926.2025.11427162","relation":{},"subject":[],"published":{"date-parts":[[2025,10,16]]}}}