{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T03:34:05Z","timestamp":1777952045372,"version":"3.51.4"},"reference-count":30,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T00:00:00Z","timestamp":1769817600000},"content-version":"vor","delay-in-days":30,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Procedia Computer Science"],"published-print":{"date-parts":[[2026]]},"DOI":"10.1016\/j.procs.2026.01.056","type":"journal-article","created":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T19:30:19Z","timestamp":1774035019000},"page":"474-483","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["SciMDIX: A dataset for aspect extraction from multi-domain scientific documents in Kazakh and Russian"],"prefix":"10.1016","volume":"275","author":[{"given":"Nikita","family":"Shvarts","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tatiana","family":"Batura","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nurzhan","family":"Mukazhanov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aigerim","family":"Yerimbetova","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mussa","family":"Turdalyuly","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bakzhan","family":"Sakenov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"issue":"3\u20135","key":"10.1016\/j.procs.2026.01.056_bib1","first-page":"4171","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","volume":"1","author":"Devlin","year":"2019","journal-title":"in: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Minneapolis, MN, USA"},{"key":"10.1016\/j.procs.2026.01.056_bib2","first-page":"3615","article-title":"SciBERT: A pretrained language model for scientific text","author":"Beltagy","year":"2019","journal-title":"in: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), Association for Computational Linguistics"},{"key":"10.1016\/j.procs.2026.01.056_bib3","doi-asserted-by":"crossref","unstructured":"I. Augenstein, M. Das, S. Riedel, L. Vikraman, A. McCallum, Semeval 2017 task 10: Scienceie-extracting keyphrases and relations from scientific publications, arXiv preprint arXiv:1704.02853 (2017).","DOI":"10.18653\/v1\/S17-2091"},{"key":"10.1016\/j.procs.2026.01.056_bib4","doi-asserted-by":"crossref","unstructured":"Y. Luan, L. He, M. Ostendorf, H. Hajishirzi, Multi-task identification of entities, relations, and coreference for scientific knowledge graph construction, in: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, Association for Computational Linguistics, Brussels, Belgium, 31 October\u20134 November 2018, pp. 3219\u20133232. doi: 10.18653\/v1\/D18-1360. URL https:\/\/aclanthology.org\/D18-1360\/","DOI":"10.18653\/v1\/D18-1360"},{"key":"10.1016\/j.procs.2026.01.056_bib5","first-page":"7506","article-title":"SciREX: A challenge dataset for document-level information extraction","author":"Jain","year":"2020","journal-title":"in: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, Association for Computational Linguistics"},{"key":"10.1016\/j.procs.2026.01.056_bib6","first-page":"13083","article-title":"SciER: An entity and relation extraction dataset for datasets","author":"Zhang","year":"2024","journal-title":"methods, and tasks in scientific documents, in: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, Association for Computational Linguistics"},{"key":"10.1016\/j.procs.2026.01.056_bib7","series-title":"Automatic aspect extraction from scientific texts, in: International Conference on Analysis of Images, Social Networks and Texts","first-page":"67","author":"Marshalova","year":"2023"},{"key":"10.1016\/j.procs.2026.01.056_bib8","doi-asserted-by":"crossref","unstructured":"T. Batura, A. Yerimbetova, N. Mukazhanov, N. Shvarts, B. Sakenov, M. Turdalyuly, Information extraction from multi-domain scientific documents: Methods and insights, Applied Sciences 15 (16) (2025). doi: 10.3390\/app15169086.","DOI":"10.3390\/app15169086"},{"issue":"11","key":"10.1016\/j.procs.2026.01.056_bib9","doi-asserted-by":"crossref","first-page":"296","DOI":"10.1007\/s10462-024-10906-z","article-title":"A systematic review of aspect-based sentiment analysis: domains","volume":"57","author":"Hua","year":"2024","journal-title":"methods, and trends, Artificial Intelligence Review"},{"issue":"5","key":"10.1016\/j.procs.2026.01.056_bib10","doi-asserted-by":"crossref","first-page":"1305","DOI":"10.1007\/s10115-022-01675-8","article-title":"Span-based relational graph transformer network for aspect\u2013opinion pair extraction","volume":"64","author":"Li","year":"2022","journal-title":"Knowledge and Information Systems"},{"issue":"140","key":"10.1016\/j.procs.2026.01.056_bib11","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"Journal of machine learning research"},{"key":"10.1016\/j.procs.2026.01.056_bib12","first-page":"7871","article-title":"Bart: Denoising sequence-to-sequence pre-training for natural language generation","author":"Lewis","year":"2020","journal-title":"translation, and comprehension, in: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics"},{"issue":"1","key":"10.1016\/j.procs.2026.01.056_bib13","doi-asserted-by":"crossref","first-page":"14646","DOI":"10.1038\/s41598-024-61886-7","article-title":"Unifying aspect-based sentiment analysis bert and multi-layered graph convolutional networks for comprehensive sentiment dissection","volume":"14","author":"Aziz","year":"2024","journal-title":"Scientific reports"},{"key":"10.1016\/j.procs.2026.01.056_bib14","first-page":"417","article-title":"Kaznerd: Kazakh named entity recognition dataset","author":"Yeshpanov","year":"2022","journal-title":"in: Proceedings of the Thirteenth Language Resources and Evaluation Conference"},{"key":"10.1016\/j.procs.2026.01.056_bib15","doi-asserted-by":"crossref","first-page":"123","DOI":"10.31449\/inf.v47i9.5217","article-title":"Fine-tuning bert for aspect extraction in multi-domain absa","volume":"47","author":"Akram","year":"2023","journal-title":"Informatica"},{"key":"10.1016\/j.procs.2026.01.056_bib16","unstructured":"F. Rizvi, T. Navojith, A. Adhikari, W. Senevirathna, D. Kasthurirathna, L. Abeywardhana, Keyword extraction, and aspect classification in sinhala, english, and code-mixed content, arXiv preprint (2025). doi: 10.48550\/arXiv.2504.10679."},{"key":"10.1016\/j.procs.2026.01.056_bib17","doi-asserted-by":"crossref","unstructured":"J. \u0160m\u00edd, P. P\u0159ib\u00e1\u0148, P. Kr\u00e1l, Few-shot cross-lingual aspect-based sentiment analysis with sequence-to-sequence models, in: Text, Speech, and Dialogue: 28th International Conference, TSD 2025, Erlangen, Germany, August 25\u201328, 2025, Proceedings, Part II, 2025, p. 27\u201338. doi: 10.1007\/978-3-032-02551-7_4.","DOI":"10.1007\/978-3-032-02551-7_4"},{"key":"10.1016\/j.procs.2026.01.056_bib18","series-title":"A comparative analysis of lstm and bert models for named entity recognition in kazakh language: A multi-classification approach, in: International Conference on Modelling and Simulation of Social-Behavioural Phenomena in Creative Societies","first-page":"116","author":"Oralbekova","year":"2024"},{"key":"10.1016\/j.procs.2026.01.056_bib19","doi-asserted-by":"crossref","first-page":"103073","DOI":"10.1016\/j.inffus.2025.103073","article-title":"Cross-lingual aspect-based sentiment analysis: A survey on tasks","volume":"120","author":"\u0160m\u00edd","year":"2025","journal-title":"approaches, and challenges, Information Fusion"},{"key":"10.1016\/j.procs.2026.01.056_bib20","doi-asserted-by":"crossref","first-page":"2483","DOI":"10.18653\/v1\/P18-1231","article-title":"Bilingual sentiment embeddings: Joint projection of sentiment across languages","author":"Barnes","year":"2018","journal-title":"in: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)"},{"key":"10.1016\/j.procs.2026.01.056_bib21","series-title":"Xtreme: A massively multilingual multi-task benchmark for evaluating cross-lingual generalisation, in: International conference on machine learning","first-page":"4411","author":"Hu","year":"2020"},{"key":"10.1016\/j.procs.2026.01.056_bib22","first-page":"51","article-title":"Kfu nlp team at smm4h 2020 tasks: Cross-lingual transfer learning with pretrained language models for drug reactions","author":"Miftahutdinov","year":"2020","journal-title":"in: Proceedings of the fifth social media mining for health applications workshop & shared task"},{"key":"10.1016\/j.procs.2026.01.056_bib23","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"10.1016\/j.procs.2026.01.056_bib24","unstructured":"J. Achiam, S. Adler, S. Agarwal, L. Ahmad, I. Akkaya, F. L. Aleman, D. Almeida, J. Altenschmidt, S. Altman, S. Anadkat, et al., Gpt-4 technical report, arXiv preprint arXiv:2303.08774 (2023)."},{"key":"10.1016\/j.procs.2026.01.056_bib25","unstructured":"H. Touvron, L. Martin, K. Stone, P. Albert, A. Almahairi, Y. Babaei, N. Bashlykov, S. Batra, P. Bhargava, S. Bhosale, et al., Llama 2: Open foundation and fine-tuned chat models, arXiv preprint arXiv:2307.09288 (2023)."},{"issue":"1","key":"10.1016\/j.procs.2026.01.056_bib26","doi-asserted-by":"crossref","first-page":"1418","DOI":"10.1038\/s41467-024-45563-x","article-title":"Structured information extraction from scientific text with large language models","volume":"15","author":"Dagdelen","year":"2024","journal-title":"Nature communications"},{"key":"10.1016\/j.procs.2026.01.056_bib27","first-page":"18056","article-title":"Discrepancy and uncertainty aware denoising knowledge distillation for zero-shot cross-lingual named entity recognition","volume":"38","author":"Ge","year":"2024","journal-title":"in: Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"10.1016\/j.procs.2026.01.056_bib28","unstructured":"G. Comanici, E. Bieber, M. Schaekermann, I. Pasupat, N. Sachdeva, I. Dhillon, M. Blistein, O. Ram, D. Zhang, E. Rosen, et al., Gemini 2.5: Pushing the frontier with advanced reasoning, multimodality, long context, and next generation agentic capabilities, arXiv preprint arXiv:2507.06261 (2025)."},{"key":"10.1016\/j.procs.2026.01.056_bib29","first-page":"8440","article-title":"Unsupervised cross-lingual representation learning at scale","author":"Conneau","year":"2020","journal-title":"in: Proceedings of the 58th annual meeting of the association for computational linguistics"},{"key":"10.1016\/j.procs.2026.01.056_bib30","unstructured":"L. J. Miranda, \u00c1. K\u00e1d\u00e1r, A. Boyd, S. Van Landeghem, A. S\u00f8gaard, M. Honnibal, Multi hash embeddings in spacy, arXiv preprint arXiv:2212.09255 (2022)."}],"container-title":["Procedia Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1877050926000566?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1877050926000566?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T11:20:31Z","timestamp":1777893631000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1877050926000566"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":30,"alternative-id":["S1877050926000566"],"URL":"https:\/\/doi.org\/10.1016\/j.procs.2026.01.056","relation":{},"ISSN":["1877-0509"],"issn-type":[{"value":"1877-0509","type":"print"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"SciMDIX: A dataset for aspect extraction from multi-domain scientific documents in Kazakh and Russian","name":"articletitle","label":"Article Title"},{"value":"Procedia Computer Science","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.procs.2026.01.056","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Author(s). Published by Elsevier B.V.","name":"copyright","label":"Copyright"}]}}