{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T03:34:10Z","timestamp":1777952050975,"version":"3.51.4"},"reference-count":28,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T00:00:00Z","timestamp":1769731200000},"content-version":"vor","delay-in-days":29,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Procedia Computer Science"],"published-print":{"date-parts":[[2026]]},"DOI":"10.1016\/j.procs.2026.01.005","type":"journal-article","created":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T19:30:19Z","timestamp":1774035019000},"page":"28-37","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["PAN-KK: A Language Resource for Plagiarism Detection in Low-Resource Kazakh"],"prefix":"10.1016","volume":"275","author":[{"given":"Bakhyt","family":"Bakiyev","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Venelin","family":"Kovatchev","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mubashir","family":"Ali","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.procs.2026.01.005_bib1","doi-asserted-by":"crossref","unstructured":"Bakiyev, Bakhyt. (2022) \u201cMethod for Determining the Similarity of Text Documents for the Kazakh Language, Taking Into Account Synonyms: Extension to TF\u2013IDF.\u201d Proceedings of SIST. pp. 1\u20136. doi: 10.1109\/SIST54437.2022.9945747.","DOI":"10.1109\/SIST54437.2022.9945747"},{"key":"10.1016\/j.procs.2026.01.005_bib2","unstructured":"Barr\u00f3n-Cede\u00f1o, Alberto, Martin Potthast, Paolo Rosso, Benno Stein, and Miguel-\u00c1ngel Eiselt. (2010) \u201cCorpus and Evaluation Measures for Automatic Plagiarism Detection.\u201d Proceedings of LREC 2010."},{"key":"10.1016\/j.procs.2026.01.005_bib3","doi-asserted-by":"crossref","unstructured":"Conneau, Alexis, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzm\u00e1n, Edouard Grave, Myle Ott, Luke Zettlemoyer, and Veselin Stoyanov. (2020) \u201cUnsupervised Cross-Lingual Representation Learning at Scale.\u201d Proceedings of ACL 2020. pp. 8440\u20138451.","DOI":"10.18653\/v1\/2020.acl-main.747"},{"key":"10.1016\/j.procs.2026.01.005_bib4","doi-asserted-by":"crossref","unstructured":"Issabayeva, Saule, and Aigul Katyetova. (2025) \u201cDeep Learning of AI: Kazakhstan\u2019s Case.\u201d In Advances in Information and Communication (FICC 2025). doi: 10.1007\/978-3-031-84460-7_37.","DOI":"10.1007\/978-3-031-84460-7_37"},{"key":"10.1016\/j.procs.2026.01.005_bib5","doi-asserted-by":"crossref","unstructured":"Kamshat, A., U. Auyeskhan, N. Zarina, S. Alen, and M. Yeskazina. (2024) \u201cIntegration AI Techniques in Low-Resource Language: The Case of Kazakh Language.\u201d Proceedings of IEEE AITU. pp. 7\u201313. doi: 10.1109\/IEEECONF61558.2024.10585350.","DOI":"10.1109\/IEEECONF61558.2024.10585350"},{"key":"10.1016\/j.procs.2026.01.005_bib6","doi-asserted-by":"crossref","first-page":"555","DOI":"10.1016\/j.procs.2024.11.148","article-title":"\u201cTrends and Impact of Neural Network Research in Kazakhstan: A Comprehensive Analysis from 1992 to 2024.\u201d","volume":"251","author":"Orazbek","year":"2024","journal-title":"Procedia Computer Science"},{"issue":"2","key":"10.1016\/j.procs.2026.01.005_bib7","doi-asserted-by":"crossref","first-page":"183","DOI":"10.1017\/nlp.2024.33","article-title":"\u201cNatural Language Processing Applications for Low-Resource Languages.\u201d","volume":"31","author":"Pakray","year":"2025","journal-title":"Natural Language Processing"},{"key":"10.1016\/j.procs.2026.01.005_bib8","unstructured":"Potthast, Martin, Benno Stein, Alberto Barr\u00f3n-Cede\u00f1o, and Paolo Rosso. (2009) \u201cOverview of the 1st International Competition on Plagiarism Detection.\u201d CLEF 2009 Working Notes."},{"key":"10.1016\/j.procs.2026.01.005_bib9","doi-asserted-by":"crossref","unstructured":"Reimers, Nils, and Iryna Gurevych. (2019) \u201cSentence-BERT: Sentence Embeddings Using Siamese BERT-Networks.\u201d Proceedings of EMNLP\u2013IJCNLP 2019. pp. 3982\u20133992.","DOI":"10.18653\/v1\/D19-1410"},{"key":"10.1016\/j.procs.2026.01.005_bib10","unstructured":"Sanh, Victor, Lysandre Debut, Julien Chaumond, and Thomas Wolf. (2019) \u201cDistilBERT, a Distilled Version of BERT: Smaller, Faster, Cheaper and Lighter.\u201d arXiv preprint arXiv:1910.01108."},{"key":"10.1016\/j.procs.2026.01.005_bib11","unstructured":"Song, Kaitao, Xu Tan, Tao Qin, Jianfeng Lu, and Tie-Yan Liu. (2020) \u201cMPNet: Masked and Permuted Pre-Training for Language Understanding.\u201d NeurIPS 2020."},{"key":"10.1016\/j.procs.2026.01.005_bib12","doi-asserted-by":"crossref","unstructured":"Stamatatos, Efstathios, Martin Potthast, Francisco Rangel Pardo, Paolo Rosso, and Benno Stein. (2015) \u201cOverview of the PAN\/CLEF 2015 Evaluation Lab.\u201d CLEF 2015 Working Notes.","DOI":"10.1007\/978-3-319-24027-5_49"},{"key":"10.1016\/j.procs.2026.01.005_bib13","doi-asserted-by":"crossref","unstructured":"Togmanov, A., N. Mukhituly, D. Turmakhan, J. Mansurov, et al. (2025) \u201cKazMMLU: Evaluating Language Models on Kazakh, Russian, and Regional Knowledge of Kazakhstan.\u201d arXiv preprint arXiv:2502.12829.","DOI":"10.18653\/v1\/2025.acl-long.701"},{"key":"10.1016\/j.procs.2026.01.005_bib14","unstructured":"Veitsman, Yehor, and Marek Hartmann. (2025) \u201cRecent Advancements and Challenges of Turkic Central Asian Language Processing.\u201d Proceedings of LLM4LRL 2025. pp. 309\u2013324."},{"key":"10.1016\/j.procs.2026.01.005_bib15","unstructured":"Wang, Wenhui, Furu Wei, Li Dong, Hangbo Bao, Nan Yang, and Ming Zhou. (2020) \u201cMiniLM: Deep Self-Attention Distillation for Task-Agnostic Compression of Pre-Trained Transformers.\u201d NeurIPS 2020."},{"key":"10.1016\/j.procs.2026.01.005_bib16","doi-asserted-by":"crossref","unstructured":"Wolf, Thomas, Lysandre Debut, Victor Sanh, Julien Chaumond, Clement Delangue, et al. (2020) \u201cTransformers: State-of-the-Art Natural Language Processing.\u201d EMNLP 2020: System Demonstrations. pp. 38\u201345.","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"10.1016\/j.procs.2026.01.005_bib17","unstructured":"Yeshpanov, Rauan, Yerbolat Khassanov, and Huseyin Atakan Varol. (2022) \u201cKazNERD: Kazakh Named Entity Recognition Dataset.\u201d Proceedings of LREC 2022. pp. 417\u2013426."},{"key":"10.1016\/j.procs.2026.01.005_bib18","doi-asserted-by":"crossref","unstructured":"Yeshpanov, Rauan, Anna Polonskaya, and Huseyin Atakan Varol. (2024) \u201cKazParC: Kazakh Parallel Corpus for Machine Translation.\u201d Proceedings of LREC\u2013COLING 2024. pp. 9633\u20139644.","DOI":"10.63317\/3yr864ht9h2r"},{"issue":"4","key":"10.1016\/j.procs.2026.01.005_bib19","doi-asserted-by":"crossref","first-page":"555","DOI":"10.1162\/coli.07-034-R2","article-title":"\u201cInter-Coder Agreement for Computational Linguistics.\u201d","volume":"34","author":"Artstein","year":"2008","journal-title":"Computational Linguistics"},{"key":"10.1016\/j.procs.2026.01.005_bib20","series-title":"Content Analysis: An Introduction to Its Methodology. 3rd ed","author":"Krippendorff","year":"2013"},{"key":"10.1016\/j.procs.2026.01.005_bib21","unstructured":"Kartbayev, Amandyk. (2023) \u201cKazakhBERTmulti: A Multilingual BERT Model Fine-Tuned for the Kazakh Language.\u201d Available at: https:\/\/huggingface.co\/amandyk\/KazakhBERTmulti."},{"key":"10.1016\/j.procs.2026.01.005_bib22","unstructured":"Alzahrani, S. M., N. Salim, and A. Abraham. (2012) \u201cUnderstanding Plagiarism: Types, Tools, and Detection Techniques.\u201d IEEE Transactions on Systems, Man, and Cybernetics, Part C."},{"key":"10.1016\/j.procs.2026.01.005_bib23","unstructured":"Potthast, Martin, Benno Stein, and Paolo Rosso. (2013) \u201cA Survey of Plagiarism Detection.\u201d Proceedings of SLATE."},{"key":"10.1016\/j.procs.2026.01.005_bib24","doi-asserted-by":"crossref","unstructured":"Yeshpanov, Rustem, Pavel Efimov, Leonid Boytsov, Aidos Shalkarbayuli, and Pavel Braslavski. (2024) \u201cKazQAD: Kazakh Open-Domain Question Answering Dataset.\u201d arXiv:2404.04487. Available at: https:\/\/arxiv.org\/abs\/2404.04487.","DOI":"10.63317\/3j6y9kcsnpp9"},{"issue":"1","key":"10.1016\/j.procs.2026.01.005_bib25","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1007\/s10579-009-9114-z","article-title":"\u201cCross-Language Plagiarism Detection.\u201d","volume":"45","author":"Potthast","year":"2011","journal-title":"Language Resources and Evaluation"},{"key":"10.1016\/j.procs.2026.01.005_bib26","unstructured":"Agirre, Eneko, Daniel Cer, Mona Diab, and Aitor Gonz\u00e1lez-Agirre. (2012) \u201cSemEval-2012 Task 6: A Pilot on Semantic Textual Similarity.\u201d Proceedings of SEM 2012. pp. 385\u2013393."},{"key":"10.1016\/j.procs.2026.01.005_bib27","doi-asserted-by":"crossref","unstructured":"Agirre, Eneko, Carmen Banea, Claire Cardie, Daniel Cer, Mona Diab, Aitor Gonz\u00e1lez-Agirre, Weiwei Guo, Rada Mihalcea, German Rigau, and Janyce Wiebe. (2016) \u201cSemEval-2016 Task 1: Semantic Textual Similarity, Monolingual and Cross-Lingual Evaluation.\u201d Proceedings of SemEval 2016. pp. 497\u2013511.","DOI":"10.18653\/v1\/S16-1081"},{"key":"10.1016\/j.procs.2026.01.005_bib28","doi-asserted-by":"crossref","unstructured":"Yang, Yinfei, Yuan Zhang, Chris Tar, and Jason Baldridge. (2019) \u201cPAWS-X: A Cross-Lingual Adversarial Dataset for Paraphrase Identification.\u201d Proceedings of EMNLP\u2013IJCNLP 2019. pp. 3687\u20133692. arXiv:1908.11828.","DOI":"10.18653\/v1\/D19-1382"}],"container-title":["Procedia Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1877050926000050?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1877050926000050?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T11:21:52Z","timestamp":1777893712000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1877050926000050"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":28,"alternative-id":["S1877050926000050"],"URL":"https:\/\/doi.org\/10.1016\/j.procs.2026.01.005","relation":{},"ISSN":["1877-0509"],"issn-type":[{"value":"1877-0509","type":"print"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"PAN-KK: A Language Resource for Plagiarism Detection in Low-Resource Kazakh","name":"articletitle","label":"Article Title"},{"value":"Procedia Computer Science","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.procs.2026.01.005","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Author(s). Published by Elsevier B.V.","name":"copyright","label":"Copyright"}]}}