{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T22:10:48Z","timestamp":1780697448740,"version":"3.54.1"},"reference-count":243,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,3,15]],"date-time":"2025-03-15T00:00:00Z","timestamp":1741996800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,3,15]],"date-time":"2025-03-15T00:00:00Z","timestamp":1741996800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"DOI":"10.1007\/s10462-025-11162-5","type":"journal-article","created":{"date-parts":[[2025,3,15]],"date-time":"2025-03-15T05:06:52Z","timestamp":1742015212000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":175,"title":["BERT applications in natural language processing: a review"],"prefix":"10.1007","volume":"58","author":[{"given":"Nadia Mushtaq","family":"Gardazi","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ali","family":"Daud","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Muhammad Kamran","family":"Malik","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Amal","family":"Bukhari","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tariq","family":"Alsahfi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bader","family":"Alshemaimri","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,3,15]]},"reference":[{"key":"11162_CR1","unstructured":"Adesam Y, Berdi\u010devskis A (2021) Part-of-speech tagging of Swedish texts in the neural era. Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa), 200\u2013209. https:\/\/aclanthology.org\/2021.nodalida-main.20\/"},{"key":"11162_CR2","unstructured":"Adesam Y, Bouma G, Johansson R (2015) Defining the Eukalyptus forest\u2013the Koala treebank of Swedish. Proceedings of the 20th Nordic Conference of Computational Linguistics (NODALIDA 2015), 1\u20139. https:\/\/aclanthology.org\/W15-1804.pdf"},{"key":"11162_CR3","doi-asserted-by":"crossref","unstructured":"Aftan S, Shah H (2023) A survey on BERT and its applications. 2023 20th Learn Technol Conf (L&T) 161:166. https:\/\/ieeexplore.ieee.org\/abstract\/document\/10092289\/","DOI":"10.1109\/LT58159.2023.10092289"},{"key":"11162_CR4","doi-asserted-by":"publisher","unstructured":"Aithal SG, Rao AB, Singh S (2021) Automatic question-answer pairs generation and question similarity mechanism in question answering system. Appl Intell 51(11):8484\u20138497. https:\/\/doi.org\/10.1007\/s10489-021-02348-9","DOI":"10.1007\/s10489-021-02348-9"},{"key":"11162_CR5","unstructured":"Akbik A, Blythe D, Vollgraf R (2018) Contextual string embeddings for sequence labeling. Proceedings of the 27th International Conference on Computational Linguistics, 1638\u20131649. https:\/\/aclanthology.org\/C18-1139\/?utm_campaign=piqcy&utm_medium=email&utm_source=Revuenewsletter"},{"key":"11162_CR6","doi-asserted-by":"crossref","unstructured":"Akhtyamova L (2020) Named entity recognition in Spanish biomedical literature: Short review and BERT model. 2020 26th Conference of Open Innovations Association (FRUCT), 1\u20137. https:\/\/ieeexplore.ieee.org\/abstract\/document\/9087359\/","DOI":"10.23919\/FRUCT48808.2020.9087359"},{"key":"11162_CR7","first-page":"1","volume":"15","author":"T Alotaibi","year":"2024","unstructured":"Alotaibi T, Al-Dossari H (2024) A review of fake news detection techniques for arabic language. Int J Adv Comput Sci Appl 15:1","journal-title":"Int J Adv Comput Sci Appl"},{"key":"11162_CR8","doi-asserted-by":"crossref","unstructured":"Alsentzer E, Murphy JR, Boag W, Weng WH, Jin D, Naumann T, McDermott M (2019) Publicly available clinical BERT embeddings. arXiv preprint arXiv:1904.03323","DOI":"10.18653\/v1\/W19-1909"},{"issue":"8","key":"11162_CR9","doi-asserted-by":"publisher","first-page":"11003","DOI":"10.1007\/s13369-021-05810-5","volume":"48","author":"JA Alzubi","year":"2023","unstructured":"Alzubi JA, Jain R, Singh A, Parwekar P, Gupta M (2023) COBERT: COVID-19 question answering system using BERT. Arab J Sci Eng 48(8):11003\u201311013","journal-title":"Arab J Sci Eng"},{"issue":"5","key":"11162_CR10","doi-asserted-by":"publisher","first-page":"9627","DOI":"10.3233\/JIFS-202140","volume":"40","author":"W Ansar","year":"2021","unstructured":"Ansar W, Goswami S, Chakrabarti A, Chakraborty B (2021) An efficient methodology for aspect-based sentiment analysis using BERT through refined aspect extraction. J Intell Fuzzy Syst 40(5):9627\u20139644","journal-title":"J Intell Fuzzy Syst"},{"issue":"2","key":"11162_CR11","first-page":"345","volume":"49","author":"J Ansell","year":"2023","unstructured":"Ansell J, Smith A, Kumar R (2023) A unified approach combining parameter-efficient adaptation, machine translation, and multi-target training for low-resource languages. J Comput Linguistics 49(2):345\u2013367","journal-title":"J Comput Linguistics"},{"key":"11162_CR12","unstructured":"Antoun W, Baly F, Hajj H (2020) Arabert: Transformer-based model for arabic language understanding. arXiv Preprint arXiv:2003.00104. https:\/\/arxiv.org\/abs\/2003.00104"},{"key":"11162_CR13","unstructured":"Artetxe M, Labaka G, Agirre E (2020) On the structural disparities between linguistically distant languages in cross-lingual models. Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, 1234\u20131245"},{"key":"11162_CR14","doi-asserted-by":"publisher","first-page":"101739","DOI":"10.1016\/j.jksuci.2023.101739","volume":"35","author":"S Bano","year":"2023","unstructured":"Bano S, Khalid S, Tairan NM, Shah H, Khattak H (2023) Summarization of scholarly articles using BERT and BiGRU: deep learning-based extractive approach. J King Saud Univ Comput Inf Sci 35:101739","journal-title":"J King Saud Univ Comput Inf Sci"},{"key":"11162_CR15","doi-asserted-by":"crossref","unstructured":"Batra H, Punn NS, Sonbhadra SK, Agarwal S (2021) Bert-based sentiment analysis: A software engineering perspective. In Database and Expert Systems Applications: 32nd International Conference, DEXA 2021, Virtual Event, September 27\u201330, 2021, Proceedings, Part I 32 (pp. 138\u2013148). Springer International Publishing","DOI":"10.1007\/978-3-030-86472-9_13"},{"key":"11162_CR16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-4410","author":"S Bell","year":"2019","unstructured":"Bell S, Yannakoudakis H, Rei M (2019) Context is Key: Grammatical Error Detection with Contextual Word Representations. Proc Fourteenth Workshop Innovative Use NLP Building Educational Appl. https:\/\/doi.org\/10.18653\/v1\/W19-4410","journal-title":"Proc Fourteenth Workshop Innovative Use NLP Building Educational Appl"},{"key":"11162_CR17","doi-asserted-by":"crossref","unstructured":"Beltagy I, Lo K, Cohan A (2019) SciBERT: A pretrained Language model for scientific text. ArXiv:1903.10676. http:\/\/arxiv.org\/abs\/1903.10676","DOI":"10.18653\/v1\/D19-1371"},{"key":"11162_CR18","unstructured":"Benamar A, Bothua M, Grouin C, Vilnat A (2021) Easy-to-use Combination of POS and BERT Model for Domain-Specific and Misspelled Terms. NL4AI@AI*IA"},{"key":"11162_CR19","doi-asserted-by":"publisher","unstructured":"Bender EM, Gebru T, McMillan-Major A, Shmitchell S (2021) On the Dangers of Stochastic Parrots: Can Language Models Be Too Big? Proceedings of the 2021 ACM Conference on Fairness, Accountability, and Transparency, 610\u2013623. https:\/\/doi.org\/10.1145\/3442188.3445922","DOI":"10.1145\/3442188.3445922"},{"key":"11162_CR20","unstructured":"Benikova D, Biemann C, Kisselew M, Pado S (2014) Germeval 2014 named entity recognition shared task: Companion paper. Workshop Proceedings of the 12th Edition of the KONVENS Conference, 104\u2013112. https:\/\/hildok.bsz-bw.de\/files\/283\/03_00.pdf"},{"key":"11162_CR21","doi-asserted-by":"crossref","unstructured":"Bentivogli L, Bisazza A, Cettolo M, Federico M (2016) Neural versus Phrase-Based machine translation quality: A case study. ArXiv:1608.04631. http:\/\/arxiv.org\/abs\/1608.04631","DOI":"10.18653\/v1\/D16-1025"},{"key":"11162_CR22","doi-asserted-by":"crossref","unstructured":"Bikku T, Jarugula J, Kongala L, Tummala ND, Donthiboina NV (2023), June Exploring the effectiveness of BERT for sentiment analysis on large-scale social media data. In 2023 3rd International Conference on Intelligent Technologies (CONIT) (pp. 1\u20134). IEEE","DOI":"10.1109\/CONIT59222.2023.10205600"},{"key":"11162_CR23","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1162\/tacl_a_00051","volume":"5","author":"P Bojanowski","year":"2017","unstructured":"Bojanowski P, Grave E, Joulin A, Mikolov T (2017) Enriching word vectors with subword information. Trans Assoc Comput Linguistics 5:135\u2013146","journal-title":"Trans Assoc Comput Linguistics"},{"key":"11162_CR24","doi-asserted-by":"crossref","unstructured":"Bojar O, Buck C, Federmann C, Haddow B, Koehn P, Leveling J, Monz C, Pecina P, Post M, Saint-Amand H (2014) Findings of the 2014 workshop on statistical machine translation. Proceedings of the Ninth Workshop on Statistical Machine Translation, 12\u201358. https:\/\/aclanthology.org\/W14-3302.pdf","DOI":"10.3115\/v1\/W14-3302"},{"key":"11162_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1155\/2021\/6633213","volume":"2021","author":"N Boudjellal","year":"2021","unstructured":"Boudjellal N, Zhang H, Khan A, Ahmad A, Naseem R, Shang J, Dai L (2021) ABioNER: A BERT-based model for Arabic biomedical named-entity recognition. Complexity 2021:1\u20136","journal-title":"Complexity"},{"issue":"3","key":"11162_CR26","doi-asserted-by":"publisher","first-page":"589","DOI":"10.3390\/electronics13030589","volume":"13","author":"A Branco","year":"2024","unstructured":"Branco A, Parada D, Silva M, Mendon\u00e7a F, Mostafa SS, Morgado-Dias F (2024) Sentiment analysis in Portuguese restaurant reviews: application of transformer models in edge computing. Electronics 13(3):589","journal-title":"Electronics"},{"key":"11162_CR27","unstructured":"Bridges RA, Jones CL, Iannacone MD, Testa KM, Goodall JR (2014) Automatic Labeling for Entity Extraction in Cyber Security (arXiv:1308.4941). http:\/\/arxiv.org\/abs\/1308.4941"},{"key":"11162_CR28","unstructured":"Cai D, Zhao H (2016) Neural Word Segmentation Learning for Chinese (arXiv:1606.04300). arXiv. http:\/\/arxiv.org\/abs\/1606.04300"},{"key":"11162_CR29","doi-asserted-by":"publisher","first-page":"152183","DOI":"10.1109\/ACCESS.2020.3017382","volume":"8","author":"L Cai","year":"2020","unstructured":"Cai L et al (2020) A hybrid BERT model that incorporates label semantics via adjustive attention for multi-label text classification. Ieee Access 8:152183\u2013152192","journal-title":"Ieee Access"},{"key":"11162_CR30","doi-asserted-by":"publisher","first-page":"118290","DOI":"10.1016\/j.eswa.2022.118290","volume":"209","author":"R Catelli","year":"2022","unstructured":"Catelli R, Fujita H, De Pietro G, Esposito M (2022) Deceptive reviews and sentiment Polarity: effective link by exploiting BERT. Expert Syst Appl 209:118290","journal-title":"Expert Syst Appl"},{"issue":"4","key":"11162_CR31","doi-asserted-by":"publisher","first-page":"e17787","DOI":"10.2196\/17787","volume":"8","author":"Y-P Chen","year":"2020","unstructured":"Chen Y-P, Chen Y-Y, Lin J-J, Huang C-H, Lai F (2020) Modified bidirectional encoder representations from transformers extractive summarization model for hospital information systems based on character-level tokens (AlphaBERT): Development and performance evaluation. JMIR Med Inf 8(4):e17787","journal-title":"JMIR Med Inf"},{"issue":"1","key":"11162_CR32","doi-asserted-by":"publisher","first-page":"103135","DOI":"10.1016\/j.ipm.2022.103135","volume":"60","author":"X Chen","year":"2023","unstructured":"Chen X, He B, Hui K, Sun L, Sun Y (2023) Dealing with textual noise for robust and effective BERT re-ranking. Inf Process Manag 60(1):103135","journal-title":"Inf Process Manag"},{"issue":"1","key":"11162_CR33","doi-asserted-by":"publisher","first-page":"223","DOI":"10.33140\/JSNDC.03.01.14","volume":"3","author":"EQ Chinedu","year":"2023","unstructured":"Chinedu EQ, Asogwa EC, Sunday BT, Onyeizu NM, Obulezi JO (2023) Unraveling emotions: contemporary approaches in sentiment analysis. J Sen Net Data Comm 3(1):223\u2013230","journal-title":"J Sen Net Data Comm"},{"key":"11162_CR34","doi-asserted-by":"crossref","unstructured":"Chinnalagu A, Durairaj AK (2022), December Comparative analysis of BERT-base transformers and deep learning sentiment prediction models. In 2022 11th International Conference on System Modeling & Advancement in Research Trends (SMART) (pp. 874\u2013879). IEEE","DOI":"10.1109\/SMART55829.2022.10047651"},{"issue":"1","key":"11162_CR35","doi-asserted-by":"publisher","first-page":"735","DOI":"10.1186\/s12859-019-3321-4","volume":"20","author":"H Cho","year":"2019","unstructured":"Cho H, Lee H (2019) Biomedical named entity recognition using deep neural networks with contextual information. BMC Bioinform 20(1):735. https:\/\/doi.org\/10.1186\/s12859-019-3321-4","journal-title":"BMC Bioinform"},{"key":"11162_CR36","unstructured":"Chu Y, Xu J, Zhou X, Yang Q, Zhang S, Yan Z, Zhou J (2023) Qwen-audio: advancing universal audio Understanding via unified large-scale audio-language models. Preprint arXiv:2311.07919."},{"key":"11162_CR37","unstructured":"Chung J, Gulcehre C, Cho K, Bengio Y (2014) Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555"},{"key":"11162_CR38","unstructured":"Clark K, Luong MT, Le QV, Manning CD (2020) Electra: Pre-training text encoders as discriminators rather than generators. arXiv:2003.10555"},{"key":"11162_CR39","doi-asserted-by":"crossref","unstructured":"Clinchant S, Jung KW, Nikoulina V (2019) On the use of BERT for Neural Machine Translation (arXiv:1909.12744). arXiv. http:\/\/arxiv.org\/abs\/1909.12744","DOI":"10.18653\/v1\/D19-5611"},{"key":"11162_CR40","unstructured":"Collier N, Kim J-D (2004) Introduction to the bio-entity recognition task at JNLPBA. Proceedings of the International Joint Workshop on Natural Language Processing in Biomedicine and Its Applications (NLPBA\/BioNLP), 73\u201378. https:\/\/aclanthology.org\/W04-1213.pdf"},{"key":"11162_CR41","doi-asserted-by":"crossref","unstructured":"Cui Y, Liang M (2024) Automated Scoring of Translations with BERT Models: Chinese and English Language Case Study. Applied Sciences","DOI":"10.3390\/app14051925"},{"key":"11162_CR42","doi-asserted-by":"publisher","first-page":"118841","DOI":"10.1016\/j.eswa.2022.118841","volume":"213","author":"T Dai","year":"2023","unstructured":"Dai T, Zhao J, Li D, Tian S, Zhao X, Pan S (2023) Heterogeneous deep graph convolutional network with citation relational BERT for COVID-19 inline citation recommendation. Expert Syst Appl 213:118841","journal-title":"Expert Syst Appl"},{"key":"11162_CR43","doi-asserted-by":"crossref","unstructured":"Darji H, Mitrovi\u0107 J, Granitzer M (2023) German BERT Model for Legal Named Entity Recognition. International Conference on Agents and Artificial Intelligence","DOI":"10.5220\/0011749400003393"},{"issue":"3","key":"11162_CR44","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/s10462-016-9482-x","volume":"47","author":"A Daud","year":"2017","unstructured":"Daud A, Khan W, Che D (2017) Urdu language processing: a survey. Artif Intell Rev 47(3):279\u2013311. https:\/\/doi.org\/10.1007\/s10462-016-9482-x","journal-title":"Artif Intell Rev"},{"key":"11162_CR45","doi-asserted-by":"publisher","first-page":"103336","DOI":"10.1016\/j.ipm.2023.103336","volume":"60","author":"C de Andrade","year":"2023","unstructured":"de Andrade C, Bel\u00e9m FM, Cunha W, Fran\u00e7a C, Viegas F, Rocha LC, Gon\u00e7alves MA (2023) On the class separability of contextual embeddings representations - or the classifier does not matter when the (text) representation is so good! Inf Process Manag 60:103336","journal-title":"Inf Process Manag"},{"issue":"1","key":"11162_CR46","doi-asserted-by":"publisher","first-page":"3463","DOI":"10.1038\/s41598-023-28579-z","volume":"13","author":"BBM de Paiva","year":"2023","unstructured":"de Paiva BBM, Pereira PD, de Andrade CMV, Gomes VMR, Souza-Silva MVR, Martins KPMP, Marcolino MS (2023) Potential and limitations of machine meta-learning (ensemble) methods for predicting COVID-19 mortality in a large inhospital Brazilian dataset. Sci Rep 13(1):3463","journal-title":"Sci Rep"},{"key":"11162_CR47","unstructured":"de Vries W, van Cranenburgh A, Bisazza A, Caselli T, van Noord G, Nissim M (2019) BERTje: A Dutch BERT model. ArXiv. ArXiv:1912.09582. http:\/\/arxiv.org\/abs\/1912.09582"},{"issue":"7","key":"11162_CR48","first-page":"1708","volume":"12","author":"MD Deepa","year":"2021","unstructured":"Deepa MD (2021) Bidirectional encoder representations from Transformers (BERT) Language model for sentiment analysis task. Turkish J Comput Math Educ (TURCOMAT) 12(7):1708\u20131721","journal-title":"Turkish J Comput Math Educ (TURCOMAT)"},{"issue":"13","key":"11162_CR49","doi-asserted-by":"publisher","first-page":"2910","DOI":"10.3390\/electronics12132910","volume":"12","author":"L Deng","year":"2023","unstructured":"Deng L, Yin T, Li Z, Ge Q (2023) Sentiment analysis of comment data based on BERT-ETextCNN-ELSTM. Electronics 12(13):2910","journal-title":"Electronics"},{"issue":"1","key":"11162_CR50","first-page":"89","volume":"10","author":"M Deode","year":"2023","unstructured":"Deode M, Patel S, Singh T (2023) Creating multilingual sentence BERT models using synthetic corpora for Indian languages. IEEE Trans Nat Lang Process 10(1):89\u2013101","journal-title":"IEEE Trans Nat Lang Process"},{"key":"11162_CR51","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2019) BERT: Pre-training of deep bidirectional Transformers for Language Understanding. ArXiv. ArXiv:1810.04805. http:\/\/arxiv.org\/abs\/1810.04805"},{"key":"11162_CR52","doi-asserted-by":"crossref","unstructured":"Djeffal N, Kheddar H, Addou D, Mazari AC, Himeur Y (2023) Automatic Speech Recognition with BERT and CTC Transformers: A Review. 2023 2nd International Conference on Electronics, Energy and Measurement (IC2EM), 1, 1\u20138","DOI":"10.1109\/IC2EM59347.2023.10419784"},{"issue":"12","key":"11162_CR53","doi-asserted-by":"publisher","first-page":"2395","DOI":"10.1080\/13658816.2020.1784425","volume":"34","author":"S Dodge","year":"2020","unstructured":"Dodge S, Gao S, Tomko M, Weibel R (2020) Progress in computational movement analysis \u2013 towards movement data science. Int J Geogr Inf Sci 34(12):2395\u20132400. https:\/\/doi.org\/10.1080\/13658816.2020.1784425","journal-title":"Int J Geogr Inf Sci"},{"key":"11162_CR54","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.jbi.2013.12.006","volume":"47","author":"RI Do\u011fan","year":"2014","unstructured":"Do\u011fan RI, Leaman R, Lu Z (2014) NCBI disease corpus: A resource for disease name recognition and concept normalization. J Biomed Inform 47:1\u201310","journal-title":"J Biomed Inform"},{"key":"11162_CR55","doi-asserted-by":"publisher","unstructured":"Donabauer G, Kruschwitz U, Corney D (2021) Making sense of subtitles: sentence boundary detection and speaker change detection in unpunctuated texts. Companion Proc Web Conf 2021 357\u2013362. https:\/\/doi.org\/10.1145\/3442442.3451894","DOI":"10.1145\/3442442.3451894"},{"key":"11162_CR56","doi-asserted-by":"crossref","unstructured":"Doostmohammadi E, Nassajian M, Rahimi A (2020) Persian Ezafe recognition using Transformers and its role in Part-Of-Speech tagging. ArXiv. ArXiv:2009.09474. http:\/\/arxiv.org\/abs\/2009.09474","DOI":"10.18653\/v1\/2020.findings-emnlp.86"},{"key":"11162_CR57","unstructured":"Du J, Huang Y, Moilanen K (2019) AIG Investments. AI at the FinSBD task: Sentence boundary detection through sequence labelling and BERT fine-tuning. Proceedings of the First Workshop on Financial Technology and Natural Language Processing, 81\u201387. https:\/\/aclanthology.org\/W19-5513.pdf"},{"key":"11162_CR58","doi-asserted-by":"crossref","unstructured":"Duraisamy P, Duraisamy M, Periyanayaki M, Natarajan Y (2023) Predicting Disaster Tweets using Enhanced BERT Model. 2023 7th International Conference on Intelligent Computing and Control Systems (ICICCS), 1745\u20131749","DOI":"10.1109\/ICICCS56967.2023.10142660"},{"key":"11162_CR59","doi-asserted-by":"crossref","unstructured":"Errami M, Ouassil MA, Rachidi R, Cherradi B, Hamida S, Raihani A (2023), May Investigating the Performance of BERT Model for Sentiment Analysis on Moroccan News Comments. In 2023 3rd International Conference on Innovative Research in Applied Science, Engineering and Technology (IRASET) (pp. 1\u20138). IEEE","DOI":"10.1109\/IRASET57153.2023.10152965"},{"key":"11162_CR60","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1162\/tacl_a_00298","volume":"8","author":"A Ettinger","year":"2020","unstructured":"Ettinger A (2020) What BERT is not: lessons from a new suite of psycholinguistic diagnostics for Language models. Trans Assoc Comput Linguistics 8:34\u201348","journal-title":"Trans Assoc Comput Linguistics"},{"key":"11162_CR61","unstructured":"Fang L, Chen Q, Wei C, Lu Z, Wang K (2023) Bioformer: an efficient transformer language model for biomedical text mining"},{"issue":"1","key":"11162_CR62","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1016\/S0306-4573(02)00079-1","volume":"40","author":"S Foo","year":"2004","unstructured":"Foo S, Li H (2004) Chinese word segmentation and its effect on information retrieval. Inf Process Manag 40(1):161\u2013190","journal-title":"Inf Process Manag"},{"key":"#cr-split#-11162_CR63.1","unstructured":"Freitas C, Carvalho P, Gon\u00e7alo Oliveira H, Mota C, Santos D (2010) Second HAREM: Advancing the state of the art of named entity recognition in Portuguese. Quot"},{"key":"#cr-split#-11162_CR63.2","unstructured":"Daniel Tapias (Ed) Proceedings of the International Conference on Language Resources and Evaluation (LREC 2010)(Valletta 17-23 May de 2010) European Language Resources Association. https:\/\/comum.rcaap.pt\/bitstream\/10400.26\/20499\/2\/FreitasetalLREC2010.pdf"},{"key":"#cr-split#-11162_CR63.3","unstructured":"Freitas C, Carvalho P, Gon\u00e7alo Oliveira H, Mota C, Santos D (2010) Second HAREM: Advancing the state of the art of named entity recognition in Portuguese. Quot; In Nicoletta Calzolari; Khalid Choukri; Bente Maegaard; Joseph Mariani; Jan Odijk; Stelios Piperidis; Mike Rosner; Daniel Tapias"},{"key":"#cr-split#-11162_CR63.4","unstructured":"(Ed) Proceedings of the International Conference on Language Resources and Evaluation (LREC 2010)(Valletta 17-23 May de 2010) European Language Resources Association. https:\/\/comum.rcaap.pt\/bitstream\/10400.26\/20499\/2\/FreitasetalLREC2010.pdf"},{"key":"11162_CR64","doi-asserted-by":"crossref","unstructured":"Gao J, Xu H, Shi H, Ren X, Philip LH, Liang X, Li Z (2022), June Autobert-zero: Evolving bert backbone from scratch. In Proceedings of the AAAI Conference on Artificial Intelligence (Vol. 36, No. 10, pp. 10663\u201310671)","DOI":"10.1609\/aaai.v36i10.21311"},{"key":"11162_CR65","unstructured":"Ghojogh B, Ghodsi A, Karray F, Crowley M (2020) Locally linear embedding and its variants: Tutorial and survey. arXiv preprint arXiv:2011.10925"},{"key":"11162_CR66","unstructured":"Gokaslan A, Cohen V, Pavlick E, Tellex S (2019) Openwebtext corpus"},{"key":"11162_CR67","doi-asserted-by":"crossref","unstructured":"Gong C, Tang J, Zhou S, Hao Z, Wang J (2019) Chinese named entity recognition with bert. DEStech Trans Comput Sci Eng\u00a012","DOI":"10.12783\/dtcse\/cisnrc2019\/33299"},{"key":"11162_CR68","doi-asserted-by":"publisher","first-page":"101261","DOI":"10.1016\/j.csl.2021.101261","volume":"71","author":"R Guarasci","year":"2022","unstructured":"Guarasci R, Silvestri S, De Pietro G, Fujita H, Esposito M (2022) BERT syntactic transfer: A computational experiment on Italian, French, and english languages. Comput Speech Lang 71:101261","journal-title":"Comput Speech Lang"},{"key":"11162_CR69","doi-asserted-by":"crossref","unstructured":"Guo Z, Nguyen ML (2020) Document-Level Neural Machine Translation Using BERT as Context Encoder. AACL","DOI":"10.18653\/v1\/2020.aacl-srw.15"},{"key":"11162_CR70","doi-asserted-by":"crossref","unstructured":"Gupta R (2024a) Bidirectional encoders to state-of-the-art: a review of BERT and its transformative impact on natural language processing. \u0418\u043d\u0444\u043e\u0440\u043c\u0430\u0442\u0438\u043a\u0430. \u042d\u043a\u043e\u043d\u043e\u043c\u0438\u043a\u0430. \u0423\u043f\u0440\u0430\u0432\u043b\u0435\u043d\u0438\u0435 - Informatics. Economics. Management","DOI":"10.47813\/2782-5280-2024-3-1-0311-0320"},{"issue":"3","key":"11162_CR71","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1007\/s10462-023-10123-4","volume":"47","author":"S Gupta","year":"2024","unstructured":"Gupta S (2024b) The impact of BERT on natural Language processing: A review and future directions. Artif Intell Rev 47(3):309\u2013325. https:\/\/doi.org\/10.1007\/s10462-023-10123-4","journal-title":"Artif Intell Rev"},{"key":"11162_CR72","doi-asserted-by":"crossref","unstructured":"Gupta P, Gandhi S, Chakravarthi B (2021) Leveraging Transfer learning techniques- BERT, RoBERTa, ALBERT and DistilBERT for Fake Review Detection. Proceedings of the 13th Annual Meeting of the Forum for Information Retrieval Evaluation","DOI":"10.1145\/3503162.3503169"},{"issue":"14","key":"11162_CR73","doi-asserted-by":"publisher","first-page":"i37","DOI":"10.1093\/bioinformatics\/btx228","volume":"33","author":"M Habibi","year":"2017","unstructured":"Habibi M, Weber L, Neves M, Wiegandt DL, Leser U (2017) Deep learning with word embeddings improves biomedical named entity recognition. Bioinformatics 33(14):i37\u2013i48","journal-title":"Bioinformatics"},{"key":"11162_CR74","doi-asserted-by":"crossref","unstructured":"Harte J, Zorgdrager W, Louridas P, Katsifodimos A, Jannach D, Fragkoulis M (2023), September Leveraging large language models for sequential recommendation. In Proceedings of the 17th ACM Conference on Recommender Systems (pp. 1096\u20131102)","DOI":"10.1145\/3604915.3610639"},{"key":"11162_CR75","doi-asserted-by":"crossref","unstructured":"Hayashibe Y, Mitsuzawa K (2020) Sentence Boundary Detection on Line Breaks in Japanese. Proceedings of the Sixth Workshop on Noisy User-Generated Text (W-NUT 2020), 71\u201375. https:\/\/aclanthology.org\/2020.wnut-1.10\/","DOI":"10.18653\/v1\/2020.wnut-1.10"},{"key":"11162_CR76","unstructured":"He P, Liu X, Gao J, Chen W (2021) DeBERTa: Decoding-enhanced BERT with Disentangled Attention (arXiv:2006.03654). arXiv. http:\/\/arxiv.org\/abs\/2006.03654"},{"key":"11162_CR77","unstructured":"He C, Zhu X, Le Y, Liu Y, Yin J (2024) SEBERTNets: Sequence Enhanced BERT Networks for Event Entity Extraction Tasks Oriented to the Finance Field. arXiv preprint arXiv:2401.11408"},{"key":"11162_CR78","doi-asserted-by":"crossref","unstructured":"Hebbar S, N BAR, Supriya MS (2023) M., G, N.V., & L, S. Named Entity Recognition Using BERT Model for Kannada Language. 2023 International Conference on Recent Advances in Information Technology for Sustainable Development (ICRAIS), 212\u2013216","DOI":"10.1109\/ICRAIS59684.2023.10367119"},{"key":"11162_CR79","unstructured":"Hiraoka T, Okazaki N (2024) Knowledge of Pretrained Language Models on Surface Information of Tokens. arXiv preprint arXiv:2402.09808"},{"key":"11162_CR80","unstructured":"Hoang M, Bihorac OA, Rouces J (2019) Aspect-based sentiment analysis using bert. In Proceedings of the 22nd nordic conference on computational linguistics (pp. 187\u2013196)"},{"key":"11162_CR81","doi-asserted-by":"crossref","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780. http:\/\/arxiv.org\/abs\/1909.00100","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"11162_CR82","doi-asserted-by":"crossref","unstructured":"Hu J, Hu R, Wang Z, Li D, Wu J, Ren L, Wang M (2023), October Collaborative Fraud Detection: How Collaboration Impacts Fraud Detection. In Proceedings of the 31st ACM International Conference on Multimedia (pp. 8891\u20138899)","DOI":"10.1145\/3581783.3613780"},{"key":"11162_CR83","unstructured":"Huang K, Altosaar J, Ranganath R (2020) ClinicalBERT: Modeling Clinical Notes and Predicting Hospital Readmission (arXiv:1904.05342). arXiv. http:\/\/arxiv.org\/abs\/1904.05342"},{"key":"11162_CR84","doi-asserted-by":"crossref","unstructured":"Iandola FN, Shaw AE, Krishna R, Keutzer KW (2020) SqueezeBERT: What can computer vision teach NLP about efficient neural networks? (arXiv:2006.11316). arXiv. http:\/\/arxiv.org\/abs\/2006.11316","DOI":"10.18653\/v1\/2020.sustainlp-1.17"},{"key":"11162_CR85","doi-asserted-by":"crossref","unstructured":"Imamura K, Sumita E (2019) Recycling a pre-trained BERT encoder for neural machine translation. Proceedings of the 3rd Workshop on Neural Generation and Translation, 23\u201331. https:\/\/aclanthology.org\/D19-5603\/","DOI":"10.18653\/v1\/D19-5603"},{"key":"11162_CR86","doi-asserted-by":"crossref","unstructured":"Jacovi A, Goldberg Y (2020) Towards Faithfully Interpretable NLP Systems: How should we define and evaluate faithfulness? (arXiv:2004.03685). arXiv. http:\/\/arxiv.org\/abs\/2004.03685","DOI":"10.18653\/v1\/2020.acl-main.386"},{"key":"11162_CR87","doi-asserted-by":"crossref","unstructured":"Jeong SW, Kim CG, Whangbo TK (2023) Question Answering System for Healthcare Information based on BERT and GPT. 2023 Joint International Conference on Digital Arts, Media and Technology with ECTI Northern Section Conference on Electrical, Electronics, Computer and Telecommunications Engineering (ECTI DAMT & NCON), 348\u2013352","DOI":"10.1109\/ECTIDAMTNCON57770.2023.10139365"},{"issue":"1","key":"11162_CR88","doi-asserted-by":"publisher","first-page":"5663","DOI":"10.1038\/s41598-023-31612-w","volume":"13","author":"K Jha","year":"2023","unstructured":"Jha K, Karmakar S, Saha S (2023) Graph-BERT and Language model-based framework for protein\u2013protein interaction identification. Sci Rep 13(1):5663","journal-title":"Sci Rep"},{"key":"11162_CR89","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1162\/tacl_a_00300","volume":"8","author":"M Joshi","year":"2020","unstructured":"Joshi M, Chen D, Liu Y, Weld DS, Zettlemoyer L, Levy O (2020) Spanbert: improving pre-training by representing and predicting spans. Trans Association Comput Linguistics 8:64\u201377","journal-title":"Trans Association Comput Linguistics"},{"key":"11162_CR90","doi-asserted-by":"crossref","unstructured":"Joshy A, Sundar S (2022), December Analyzing the performance of sentiment analysis using Bert, Distilbert, and Roberta. In 2022 IEEE international power and renewable energy conference (IPRECON) (pp. 1\u20136). IEEE","DOI":"10.1109\/IPRECON55716.2022.10059542"},{"key":"11162_CR91","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1016\/j.cogsys.2019.12.005","volume":"61","author":"RK Kaliyar","year":"2020","unstructured":"Kaliyar RK, Goswami A, Narang P, Sinha S (2020) FNDNet\u2013a deep convolutional neural network for fake news detection. Cogn Syst Res 61:32\u201344","journal-title":"Cogn Syst Res"},{"key":"11162_CR92","unstructured":"Karimi A, Rossi L, Prati A (2020) Improving bert performance for aspect-based sentiment analysis. arXiv preprint arXiv:2010.11731"},{"issue":"3","key":"11162_CR93","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1007\/s10579-018-9439-6","volume":"53","author":"W Khan","year":"2019","unstructured":"Khan W, Daud A, Nasir JA, Amjad T, Arafat S, Aljohani N, Alotaibi FS (2019) Urdu part of speech tagging using conditional random fields. Lang Resour Evaluation 53(3):331\u2013362. https:\/\/doi.org\/10.1007\/s10579-018-9439-6","journal-title":"Lang Resour Evaluation"},{"key":"11162_CR94","doi-asserted-by":"publisher","first-page":"100026","DOI":"10.1016\/j.nlp.2023.100026","volume":"4","author":"W Khan","year":"2023","unstructured":"Khan W, Daud A, Khan K, Muhammad S, Haq R (2023) Exploring the frontiers of deep learning and natural Language processing: A comprehensive overview of key challenges and emerging trends. Nat Lang Process J 4:100026. https:\/\/doi.org\/10.1016\/j.nlp.2023.100026","journal-title":"Nat Lang Process J"},{"key":"11162_CR95","doi-asserted-by":"crossref","unstructured":"Kim K-M, Heo M-O, Choi S-H, Zhang B-T (2017) DeepStory: Video Story QA by Deep Embedded Memory Networks (arXiv:1707.00836). arXiv. http:\/\/arxiv.org\/abs\/1707.00836","DOI":"10.24963\/ijcai.2017\/280"},{"key":"11162_CR96","doi-asserted-by":"crossref","unstructured":"Kora R, Mohammed A (2023) A Comprehensive Review on Transformers Models For Text Classification. 2023 International Mobile, Intelligent, and Ubiquitous Computing Conference (MIUCC), 1\u20137","DOI":"10.1109\/MIUCC58832.2023.10278387"},{"key":"11162_CR97","unstructured":"Koroteev MV (2021) BERT: A Review of Applications in Natural Language Processing and Understanding (arXiv:2103.11943). arXiv. http:\/\/arxiv.org\/abs\/2103.11943"},{"key":"11162_CR98","doi-asserted-by":"crossref","unstructured":"Koshkin R, Sudoh K, Nakamura S (2024) TransLLaMa: LLM-based Simultaneous Translation System. ArXiv, abs\/2402.04636","DOI":"10.18653\/v1\/2024.findings-emnlp.27"},{"key":"11162_CR99","doi-asserted-by":"crossref","unstructured":"Kovaleva O, Romanov A, Rogers A, Rumshisky A (2019) Revealing the Dark Secrets of BERT (arXiv:1908.08593). arXiv. http:\/\/arxiv.org\/abs\/1908.08593","DOI":"10.18653\/v1\/D19-1445"},{"key":"11162_CR100","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1758-2946-7-S1-S2","volume":"7","author":"M Krallinger","year":"2015","unstructured":"Krallinger M, Rabal O, Leitner F, Vazquez M, Salgado D, Lu Z, Leaman R, Lu Y, Ji D, Lowe DM, Sayle RA, Batista-Navarro RT, Rak R, Huber T, Rockt\u00e4schel T, Matos S, Campos D, Tang B, Xu H, Valencia A (2015) The CHEMDNER corpus of chemicals and drugs and its annotation principles. J Cheminform 7:1. https:\/\/doi.org\/10.1186\/1758-2946-7-S1-S2","journal-title":"J Cheminform"},{"key":"11162_CR101","unstructured":"Labusch K, Zu S, Kulturbesitz B, Neudecker C, Zellh\u00f6fer D (2019) BERT for Named Entity Recognition in Contemporary and Historical German"},{"key":"11162_CR102","doi-asserted-by":"crossref","unstructured":"Lakshmidevi N, Swain SK, Vamsikrishna M (2023) September A Hybrid Enhancing Aspect-Based Sentiment Analysis with BERT for Aspect Extraction and Diverse ML Classifiers. In 2023 International Conference on Network, Multimedia and Information Technology (NMITCON) (pp. 01\u201308). IEEE","DOI":"10.1109\/NMITCON58196.2023.10275957"},{"key":"11162_CR103","unstructured":"Lample G, Conneau A (2019) Cross-lingual language model pretraining. arXiv preprint arXiv:1901.07291"},{"key":"11162_CR104","unstructured":"Lan Z, Chen M, Goodman S, Gimpel K, Sharma P, Soricut R (2020) ALBERT: A Lite BERT for Self-supervised Learning of Language Representations (arXiv:1909.11942). arXiv. http:\/\/arxiv.org\/abs\/1909.11942"},{"key":"11162_CR105","unstructured":"Le H, Vial L, Frej J, Segonne V, Coavoux M, Lecouteux B, Allauzen A, Crabb\u00e9 B, Besacier L, Schwab D (2020) FlauBERT: Unsupervised Language Model Pre-training for French (arXiv:1912.05372). arXiv. http:\/\/arxiv.org\/abs\/1912.05372"},{"key":"11162_CR106","doi-asserted-by":"crossref","unstructured":"Lee JS, Hsiang J (2019) Patentbert: Patent classification with fine-tuning a pre-trained bert model. arXiv preprint arXiv:1906.02124","DOI":"10.1016\/j.wpi.2020.101965"},{"issue":"4","key":"11162_CR107","doi-asserted-by":"publisher","first-page":"1234","DOI":"10.1093\/bioinformatics\/btz682","volume":"36","author":"J Lee","year":"2020","unstructured":"Lee J, Yoon W, Kim S, Kim D, Kim S, So CH, Kang J (2020) BioBERT: A pre-trained biomedical Language representation model for biomedical text mining. Bioinformatics 36(4):1234\u20131240","journal-title":"Bioinformatics"},{"key":"11162_CR108","doi-asserted-by":"crossref","unstructured":"Lei J, Yu L, Bansal M, Berg TL (2019) TVQA: Localized, Compositional Video Question Answering (arXiv:1809.01696). arXiv. http:\/\/arxiv.org\/abs\/1809.01696","DOI":"10.18653\/v1\/D18-1167"},{"key":"11162_CR109","doi-asserted-by":"crossref","unstructured":"Li X, Chen L (2023) Fake Review Detection Using Deep Neural Networks with Multimodal Feature Fusion Method. 2023 IEEE 29th International Conference on Parallel and Distributed Systems (ICPADS), 2869\u20132872","DOI":"10.1109\/ICPADS60453.2023.00411"},{"key":"11162_CR110","unstructured":"Li J, Zhang Y (2024) The Death of Feature Engineering? BERT with Linguistic Features on SQuAD 2.0. ArXiv, abs\/2404.03184"},{"key":"11162_CR111","doi-asserted-by":"crossref","unstructured":"Li J, Sun Y, Johnson RJ, Sciaky D, Wei C-H, Leaman R, Davis AP, Mattingly CJ, Wiegers TC, Lu Z (2016) BioCreative V CDR task corpus: A resource for chemical disease relation extraction. Database, 2016. https:\/\/academic.oup.com\/database\/article\/doi\/10.1093\/database\/baw068\/2630414?ref=https%3A%2F%2Fgithubhelp.com&login=true","DOI":"10.1093\/database\/baw068"},{"key":"11162_CR112","unstructured":"Li Y, Anastasopoulos A, Black AW (2020a) Towards Minimal Supervision BERT-based Grammar Error Correction (arXiv:2001.03521). arXiv. http:\/\/arxiv.org\/abs\/2001.03521"},{"key":"11162_CR113","unstructured":"Li Y, Anastasopoulos A, Black AW (2020b) Towards Minimal Supervision BERT-based Grammar Error Correction (arXiv:2001.03521). arXiv. http:\/\/arxiv.org\/abs\/2001.03521"},{"key":"11162_CR114","doi-asserted-by":"crossref","unstructured":"Li L, Ma R, Guo Q, Xue X, Qiu X (2020c) Bert-attack: Adversarial attack against bert using bert. arXiv preprint arXiv:2004.09984","DOI":"10.18653\/v1\/2020.emnlp-main.500"},{"key":"11162_CR115","doi-asserted-by":"crossref","unstructured":"Li Y, Wang S, Lin C, Guerin F, Barrault L (2023) FrameBERT: Conceptual Metaphor Detection with Frame Embedding Learning. ArXiv, abs\/2302.04834","DOI":"10.18653\/v1\/2023.eacl-main.114"},{"key":"11162_CR116","unstructured":"Liang W, Liang Y (2024) DrBERT: Unveiling the Potential of Masked Language Modeling Decoder in BERT pretraining. arXiv preprint arXiv:2401.15861"},{"key":"11162_CR117","doi-asserted-by":"crossref","unstructured":"Liang M, Shi Y (2023) Named Entity Recognition Method Based on BERT-whitening and Dynamic Fusion Model. 2023 5th International Conference on Natural Language Processing (ICNLP), 191\u2013197","DOI":"10.1109\/ICNLP58431.2023.00041"},{"key":"11162_CR118","doi-asserted-by":"publisher","unstructured":"Licari V (2022) ITALIAN-LEGAL-BERT: Pre-training on Italian civil law corpora. J Comput Law 35(2):211\u2013225. https:\/\/doi.org\/10.1093\/jcl\/ztac024","DOI":"10.1093\/jcl\/ztac024"},{"key":"11162_CR119","doi-asserted-by":"crossref","unstructured":"Lim K, Park J (2020) Part-of-speech tagging using multiview learning. IEEE Access 8:195184\u2013195196","DOI":"10.1109\/ACCESS.2020.3033979"},{"key":"11162_CR120","doi-asserted-by":"publisher","first-page":"105127","DOI":"10.1016\/j.envint.2019.105127","volume":"132","author":"K Liu","year":"2019","unstructured":"Liu K, Wang X, Wei N, Song Z, Li D (2019a) Accurate quantification and transport Estimation of suspended atmospheric microplastics in megacities: implications for human health. Environ Int 132:105127","journal-title":"Environ Int"},{"key":"11162_CR121","unstructured":"Liu Y, Ott M, Goyal N, Du J, Joshi M, Chen D, Levy O, Lewis M, Zettlemoyer L, Stoyanov V (2019b) RoBERTa: A Robustly Optimized BERT Pretraining Approach (arXiv:1907.11692). arXiv. http:\/\/arxiv.org\/abs\/1907.11692"},{"key":"11162_CR122","doi-asserted-by":"crossref","unstructured":"Liu Z, Huang D, Huang K, Li Z, Zhao J (2021) Finbert: A pre-trained financial language representation model for financial text mining. In Proceedings of the twenty-ninth international conference on international joint conferences on artificial intelligence (pp. 4513\u20134519)","DOI":"10.24963\/ijcai.2020\/622"},{"key":"11162_CR123","doi-asserted-by":"crossref","unstructured":"Liu W, Lin S, Gao B, Huang K, Liu W, Huang Z, Feng J, Chen X, Huang F (2022) BERT-POS: Sentiment Analysis of MOOC Reviews Based on BERT with Part-of-Speech Information. International Conference on Artificial Intelligence in Education","DOI":"10.1007\/978-3-031-11647-6_72"},{"key":"11162_CR124","doi-asserted-by":"crossref","unstructured":"Lu J, Zhan X, Liu G, Zhan X, Deng X (2023a) BSTC: A Fake Review Detection Model Based on a Pre-Trained Language Model and Convolutional Neural Network. Electronics","DOI":"10.3390\/electronics12102165"},{"key":"11162_CR125","doi-asserted-by":"crossref","unstructured":"Lu X, Liu W, Jiang S, Liu C (2023b), March Multilingual BERT cross-lingual transferability with pre-trained representations on Tangut: A survey. In 2023 5th International Conference on Natural Language Processing (ICNLP) (pp. 229\u2013234). IEEE","DOI":"10.1109\/ICNLP58431.2023.00048"},{"key":"11162_CR126","doi-asserted-by":"crossref","unstructured":"Ma B, Chen L (2023) Named entity recognition in medical field based on BERT model. Other Conferences","DOI":"10.1117\/12.3011763"},{"issue":"3","key":"11162_CR127","doi-asserted-by":"publisher","first-page":"1380","DOI":"10.1109\/TNNLS.2021.3105284","volume":"34","author":"J Ma","year":"2021","unstructured":"Ma J, Liu J, Lin Q, Wu B, Wang Y, You Y (2021) Multitask learning for visual question answering. IEEE Trans Neural Networks Learn Syst 34(3):1380\u20131394","journal-title":"IEEE Trans Neural Networks Learn Syst"},{"key":"11162_CR128","doi-asserted-by":"crossref","unstructured":"Ma Z, Yan K, Wang H (2023) BERT-based Question Answering using Knowledge Graph Embeddings in Nuclear Power Domain. 2023 26th International Conference on Computer Supported Cooperative Work in Design (CSCWD), 267\u2013272","DOI":"10.1109\/CSCWD57460.2023.10152692"},{"issue":"1","key":"11162_CR129","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3129290","volume":"17","author":"MK Malik","year":"2018","unstructured":"Malik MK (2018) Urdu named entity recognition and classification system using artificial neural network. ACM Trans Asian Low-Resource Lang Inform Process 17(1):1\u201313. https:\/\/doi.org\/10.1145\/3129290","journal-title":"ACM Trans Asian Low-Resource Lang Inform Process"},{"key":"11162_CR130","first-page":"10","volume":"7","author":"MK Malik","year":"2016","unstructured":"Malik MK, Sarwar SM (2016) Named entity recognition system for postpositional languages: Urdu as a case study. Int J Adv Comput Sci Appl 7:10","journal-title":"Int J Adv Comput Sci Appl"},{"key":"11162_CR131","unstructured":"Malmsten M, B\u00f6rjeson L, Haffenden C (2020) Playing with Words at the National Library of Sweden\u2014Making a Swedish BERT (arXiv:2007.01658). arXiv. http:\/\/arxiv.org\/abs\/2007.01658"},{"key":"11162_CR132","doi-asserted-by":"publisher","unstructured":"Martin L, Muller B, Su\u00e1rez PJO, Dupont Y, Romary L, de la Clergerie \u00c9V, Seddah D, Sagot B (2020) CamemBERT: A Tasty French Language Model. Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, 7203\u20137219. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.645","DOI":"10.18653\/v1\/2020.acl-main.645"},{"key":"11162_CR133","doi-asserted-by":"crossref","unstructured":"Mehta H, Kumar Bharti S, Doshi N (2024) Comparative Analysis of Part of Speech(POS) Tagger for Gujarati Language using Deep Learning and Pre-Trained LLM. 2024 3rd International Conference for Innovation in Technology (INOCON), 1\u20133","DOI":"10.1109\/INOCON60754.2024.10511678"},{"key":"11162_CR134","doi-asserted-by":"crossref","unstructured":"Mewada A, Dewang RK, Goldar P, Maurya SK (2023) SentiBERT: A Novel Approach for Fake Review Detection Incorporating Sentiment Features with Contextual Features. Proceedings of the 2023 Fifteenth International Conference on Contemporary Computing","DOI":"10.1145\/3607947.3607991"},{"key":"11162_CR135","unstructured":"Mikolov T, Chen K, Corrado G, Dean J (2013) Efficient Estimation of Word Representations in Vector Space (arXiv:1301.3781). arXiv. http:\/\/arxiv.org\/abs\/1301.3781"},{"key":"11162_CR136","unstructured":"Ming NW, Wang Z, Liu C, Goh RSM, Luo T (2022) Ma-bert: Towards matrix arithmetic-only bert inference by eliminating complex non-linear functions. In The Eleventh International Conference on Learning Representations"},{"key":"11162_CR137","unstructured":"Mir AQ, Khan FY, Chishti MA (2023) Online Fake Review Detection Using Supervised Machine Learning And BERT Model. ArXiv, abs\/2301.03225"},{"key":"11162_CR138","first-page":"52","volume":"36","author":"A Mohtashami","year":"2024","unstructured":"Mohtashami A, Jaggi M (2024) Random-access infinite context length for transformers. Adv Neural Inf Process Syst 36:52","journal-title":"Adv Neural Inf Process Syst"},{"key":"11162_CR139","unstructured":"Moon T, Awasthy P, Ni J, Florian R (2019) Towards lingua franca named entity recognition with bert. arXiv preprint arXiv:1912.01389"},{"key":"11162_CR140","doi-asserted-by":"crossref","unstructured":"Mozannar H, Hajal KE, Maamary E, Hajj H (2019) Neural Arabic question answering. arXiv preprint arXiv:1906.05394","DOI":"10.18653\/v1\/W19-4612"},{"key":"11162_CR141","unstructured":"Muffo M, Bertino E (2023) Bertino: An italian distilbert model. arXiv preprint arXiv:2303.18121"},{"issue":"1","key":"11162_CR142","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1075\/li.30.1.03nad","volume":"30","author":"D Nadeau","year":"2007","unstructured":"Nadeau D, Sekine S (2007) A survey of named entity recognition and classification. Lingvisticae Investigationes 30(1):3\u201326. https:\/\/doi.org\/10.1075\/li.30.1.03nad","journal-title":"Lingvisticae Investigationes"},{"key":"11162_CR143","unstructured":"Nagel S (2016) Cc-news. URL: Http:\/\/Web.Archive.Org\/Save\/CommoncrawlOrg\/2016\/10\/Newsdatasetavailable."},{"key":"11162_CR144","doi-asserted-by":"crossref","unstructured":"Napoles C, Sakaguchi K, Tetreault J (2017) JFLEG: A Fluency Corpus and Benchmark for Grammatical Error Correction (arXiv:1702.04066). arXiv. http:\/\/arxiv.org\/abs\/1702.04066","DOI":"10.18653\/v1\/E17-2037"},{"key":"11162_CR145","doi-asserted-by":"crossref","unstructured":"Naseem U, Khushi M, Reddy V, Rajendran S, Razzak I, Kim J (2021) Bioalbert: A simple and effective pre-trained language model for biomedical named entity recognition. 2021 International Joint Conference on Neural Networks (IJCNN), 1\u20137. https:\/\/ieeexplore.ieee.org\/abstract\/document\/9533884\/","DOI":"10.1109\/IJCNN52387.2021.9533884"},{"key":"11162_CR146","doi-asserted-by":"publisher","first-page":"e205","DOI":"10.3389\/frai.2022.813967","volume":"5","author":"V Nedumpozhimana","year":"2022","unstructured":"Nedumpozhimana V, Klubicka F, Kelleher JD (2022) Shapley Idioms: Analysing BERT Sentence Embeddings for General Idiom Token Identification. Front Artif Intell 5:e205","journal-title":"Front Artif Intell"},{"issue":"9945","key":"11162_CR147","doi-asserted-by":"publisher","first-page":"766","DOI":"10.1016\/S0140-6736(14)60460-8","volume":"384","author":"M Ng","year":"2014","unstructured":"Ng M, Fleming T, Robinson M, Thomson B, Graetz N, Margono C, Mullany EC, Biryukov S, Abbafati C, Abera SF (2014) Global, regional, and National prevalence of overweight and obesity in children and adults during 1980\u20132013: A systematic analysis for the global burden of disease study 2013. Lancet 384(9945):766\u2013781","journal-title":"Lancet"},{"key":"11162_CR148","doi-asserted-by":"publisher","first-page":"254","DOI":"10.1016\/j.cor.2017.07.004","volume":"98","author":"T Nguyen","year":"2018","unstructured":"Nguyen T, Li Z, Spiegler V, Ieromonachou P, Lin Y (2018) Big data analytics in supply chain management: A state-of-the-art literature review. Comput Oper Res 98:254\u2013264","journal-title":"Comput Oper Res"},{"key":"11162_CR149","unstructured":"Nivre J (2014) Universal dependencies for swedish. Proceedings of the Swedish Language Technology Conference (SLTC), 5"},{"key":"11162_CR150","unstructured":"Nivre J, De Marneffe M-C, Ginter F, Goldberg Y, Hajic J, Manning CD, McDonald R, Petrov S, Pyysalo S, Silveira N (2016) Universal dependencies v1: A multilingual treebank collection. Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC\u201916), 1659\u20131666. https:\/\/aclanthology.org\/L16-1262\/"},{"key":"11162_CR151","unstructured":"Nogueira R, Cho K (2020) Passage Re-ranking with BERT (arXiv:1901.04085). arXiv. http:\/\/arxiv.org\/abs\/1901.04085"},{"key":"11162_CR152","unstructured":"Nozza D, Bianchi F, Hovy D (2020) What the [MASK]? Making Sense of Language-Specific BERT Models. ArXiv, abs\/2003.02912"},{"key":"11162_CR153","doi-asserted-by":"crossref","unstructured":"Otieno DO, Namin AS, Jones KS (2023) The Application of the BERT Transformer Model for Phishing Email Classification. 2023 IEEE 47th Annual Computers, Software, and Applications Conference (COMPSAC), 1303\u20131310","DOI":"10.1109\/COMPSAC57700.2023.00198"},{"key":"11162_CR154","doi-asserted-by":"crossref","unstructured":"Pappagari R, Zelasko P, Villalba J, Carmiel Y, Dehak N (2019) December). Hierarchical Transformers for long document classification. 2019 IEEE automatic speech recognition and Understanding workshop (ASRU). IEEE, pp 838\u2013844","DOI":"10.1109\/ASRU46091.2019.9003958"},{"key":"11162_CR155","doi-asserted-by":"crossref","unstructured":"Peng N, Dredze M (2017) Improving Named Entity Recognition for Chinese Social Media with Word Segmentation Representation Learning (arXiv:1603.00786). arXiv. http:\/\/arxiv.org\/abs\/1603.00786","DOI":"10.18653\/v1\/P16-2025"},{"key":"11162_CR156","doi-asserted-by":"crossref","unstructured":"Peng Z, Zhao Y (2023) Triple-Compressed BERT for Efficient Implementation on NLP Tasks. 2023 3rd International Conference on Electronic Information Engineering and Computer Science (EIECS), 1162\u20131165.T","DOI":"10.1109\/EIECS59936.2023.10435469"},{"key":"11162_CR157","doi-asserted-by":"publisher","unstructured":"Pennington J, Socher R, Manning C (2014) Glove: Global Vectors for Word Representation. Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), 1532\u20131543. https:\/\/doi.org\/10.3115\/v1\/D14-1162","DOI":"10.3115\/v1\/D14-1162"},{"key":"11162_CR158","doi-asserted-by":"crossref","unstructured":"Peters ME, Ruder S, Smith NA (2019) To Tune or Not to Tune? Adapting Pretrained Representations to Diverse Tasks (arXiv:1903.05987). arXiv. http:\/\/arxiv.org\/abs\/1903.05987","DOI":"10.18653\/v1\/W19-4302"},{"key":"11162_CR159","doi-asserted-by":"crossref","unstructured":"Pfeiffer J, Houlsby N, Gurevych I (2020) MAD-X: An adapter-based framework for efficient cross-lingual transfer. Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing, 6543\u20136551","DOI":"10.18653\/v1\/2020.emnlp-main.617"},{"key":"11162_CR160","unstructured":"Poostchi H, Borzeshi EZ, Piccardi M (2018) BiLSTM-CRF for Persian named-entity recognition ArmanPersoNERCorpus: The first entity-annotated Persian dataset. Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018). https:\/\/aclanthology.org\/L18-1701.pdf"},{"key":"11162_CR161","unstructured":"Pourkamali N, Sharifi SE (2024) Machine Translation with Large Language Models: Prompt Engineering for Persian, English, and Russian Directions. ArXiv, abs\/2401.08429"},{"key":"11162_CR162","doi-asserted-by":"crossref","unstructured":"Pudasaini S, Shakya S (2023) Question Answering on Biomedical Research Papers using Transfer Learning on BERT-Base Models. 2023 7th International Conference on I-SMAC (IoT in Social, Mobile, Analytics and Cloud) (I-SMAC), 496\u2013501","DOI":"10.1109\/I-SMAC58438.2023.10290240"},{"key":"11162_CR163","doi-asserted-by":"publisher","first-page":"2088","DOI":"10.12694\/scpe.v25i3.2770","volume":"25","author":"Y Qing","year":"2024","unstructured":"Qing Y (2024) Design and application of automatic english translation grammar error detection system based on BERT machine vision. Scalable Comput Pract Exp 25:2088\u20132102","journal-title":"Scalable Comput Pract Exp"},{"issue":"10","key":"11162_CR164","doi-asserted-by":"publisher","first-page":"1872","DOI":"10.1007\/s11431-020-1647-3","volume":"63","author":"X Qiu","year":"2020","unstructured":"Qiu X, Sun T, Xu Y, Shao Y, Dai N, Huang X (2020) Pre-trained models for natural Language processing: A survey. Sci China Technological Sci 63(10):1872\u20131897. https:\/\/doi.org\/10.1007\/s11431-020-1647-3","journal-title":"Sci China Technological Sci"},{"key":"11162_CR165","doi-asserted-by":"publisher","unstructured":"Qu C, Yang L, Qiu M, Croft WB, Zhang Y, Iyyer M (2019) BERT with History Answer Embedding for Conversational Question Answering. Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval, 1133\u20131136. https:\/\/doi.org\/10.1145\/3331184.3331341","DOI":"10.1145\/3331184.3331341"},{"issue":"140","key":"11162_CR166","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel C, Shazeer N, Roberts A, Lee K, Narang S, Matena M, Liu PJ (2020) Exploring the limits of transfer learning with a unified text-to-text transformer. J Mach Learn Res 21(140):1\u201367","journal-title":"J Mach Learn Res"},{"key":"11162_CR167","first-page":"96","volume":"8","author":"V Ramaraj","year":"2024","unstructured":"Ramaraj V, Appa Swamy MV, Prince EE, Kumar C (2024) Improving the BERT model for long text sequences in question answering domain. Int J Adv Appl Sci 8:96","journal-title":"Int J Adv Appl Sci"},{"key":"11162_CR168","doi-asserted-by":"crossref","unstructured":"Refaeli D, H\u00e1jek P (2021) Detecting Fake Online Reviews using Fine-tuned BERT. Proceedings of the 2021 5th International Conference on E-Business and Internet","DOI":"10.1145\/3497701.3497714"},{"key":"11162_CR169","unstructured":"Rehbein I, Ruppenhofer J, Schmidt T (2020) Improving sentence boundary detection for spoken language transcripts. Proceedings of the Twelfth Language Resources and Evaluation Conference, 7102\u20137111. https:\/\/aclanthology.org\/2020.lrec-1.878\/"},{"key":"11162_CR170","doi-asserted-by":"crossref","unstructured":"Rei M (2017) Semi-supervised Multitask Learning for Sequence Labeling (arXiv:1704.07156). arXiv. http:\/\/arxiv.org\/abs\/1704.07156","DOI":"10.18653\/v1\/P17-1194"},{"key":"11162_CR171","doi-asserted-by":"crossref","unstructured":"Roy K, Hasan, Fuhad KM, Mohammed N, Hasan RABBYA, Nahar N, J., Rahman F (2020) Bangla Part of Speech Tagging Using Contextual Embeddings and Oversampling Techniques","DOI":"10.1007\/978-3-030-63128-4_50"},{"key":"11162_CR172","doi-asserted-by":"crossref","unstructured":"Sahoo A, Chanda R, Das N, Sadhukhan B (2023), August Comparative Analysis of BERT Models for Sentiment Analysis on Twitter Data. In 2023 9th International Conference on Smart Computing and Communications (ICSCC) (pp. 658\u2013663). IEEE","DOI":"10.1109\/ICSCC59169.2023.10335061"},{"key":"11162_CR173","doi-asserted-by":"crossref","unstructured":"Saidi R, Jarray F, Mansour M (2021) A BERT Based Approach for Arabic POS Tagging. International Work-Conference on Artificial and Natural Neural Networks","DOI":"10.1007\/978-3-030-85030-2_26"},{"key":"11162_CR174","unstructured":"Sang EFTK, De Meulder F (2003) Introduction to the CoNLL-2003 Shared Task: Language-Independent Named Entity Recognition (arXiv:cs\/0306050). arXiv. http:\/\/arxiv.org\/abs\/cs\/0306050"},{"key":"11162_CR175","unstructured":"Sanh V, Debut L, Chaumond J, Wolf T (2020) DistilBERT, a distilled version of BERT: Smaller, faster, cheaper and lighter (arXiv:1910.01108). arXiv. http:\/\/arxiv.org\/abs\/1910.01108"},{"issue":"10","key":"11162_CR176","doi-asserted-by":"publisher","first-page":"e140","DOI":"10.1371\/journal.pcbi.0020140","volume":"2","author":"FC Santos","year":"2006","unstructured":"Santos FC, Pacheco JM, Lenaerts T (2006) Cooperation prevails when individuals adjust their social ties. PLoS Comput Biol 2(10):e140","journal-title":"PLoS Comput Biol"},{"key":"11162_CR177","unstructured":"Sarkar S, Babar MF, Hassan MM, Hasan M, Karmaker S (2023) Exploring Challenges of Deploying BERT-based NLP Models in Resource-Constrained Embedded Devices. ArXiv, abs\/2304.11520"},{"key":"11162_CR178","unstructured":"Schwartz D, Toneva M, Wehbe L (2019) Inducing brain-relevant bias in natural language processing models. Advances in Neural Information Processing Systems, 32. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2019\/hash\/2b8501af7b64d1aaae7dd832805f0709-Abstract.html"},{"key":"11162_CR179","unstructured":"Shahshahani MS, Mohseni M, Shakery A, Faili H (2018) PEYMA: A Tagged Corpus for Persian Named Entities (arXiv:1801.09936). arXiv. http:\/\/arxiv.org\/abs\/1801.09936"},{"issue":"2","key":"11162_CR180","doi-asserted-by":"publisher","first-page":"59","DOI":"10.3390\/a17020059","volume":"17","author":"MK Shaik Vadla","year":"2024","unstructured":"Shaik Vadla MK, Suresh MA, Viswanathan VK (2024) Enhancing product design through AI-driven sentiment analysis of Amazon reviews using BERT. Algorithms 17(2):59","journal-title":"Algorithms"},{"key":"11162_CR181","doi-asserted-by":"crossref","unstructured":"Sharma R, Chen F, Fard F, Lo D (2022) May An exploratory study on code attention in BERT. In Proceedings of the 30th IEEE\/ACM International Conference on Program Comprehension (pp. 437\u2013448)","DOI":"10.1145\/3524610.3527921"},{"key":"11162_CR182","doi-asserted-by":"crossref","unstructured":"Sharma KV, Singh K, Sharma K, Gupta J (2023) Question summation and sentence similarity using BERT for key information extraction. Int J Res Appl Sci Eng Technol","DOI":"10.22214\/ijraset.2023.50087"},{"key":"11162_CR183","doi-asserted-by":"crossref","unstructured":"Shavarani HS, Sarkar A (2021) Better Neural Machine Translation by Extracting Linguistic Information from BERT. Conference of the European Chapter of the Association for Computational Linguistics","DOI":"10.18653\/v1\/2021.eacl-main.241"},{"key":"11162_CR184","unstructured":"Shih CF, Tseng YH, Yang CW, Chen PE, Chou HY, Tan LH, Hsieh SK (2021), October What confuses BERT? Linguistic Evaluation of Sentiment Analysis on Telecom Customer Opinion. In Proceedings of the 33rd Conference on Computational Linguistics and Speech Processing (ROCLING 2021) (pp. 271\u2013279)"},{"key":"11162_CR185","unstructured":"Shimanaka H, Kajiwara T, Komachi M (2019) Machine Translation Evaluation with BERT Regressor (arXiv:1907.12679). arXiv. http:\/\/arxiv.org\/abs\/1907.12679"},{"key":"11162_CR186","doi-asserted-by":"publisher","unstructured":"Smith L, Tanabe LK, Ando RJN, Kuo C-J, Chung I-F, Hsu C-N, Lin Y-S, Klinger R, Friedrich CM, Ganchev K, Torii M, Liu H, Haddow B, Struble CA, Povinelli RJ, Vlachos A, Baumgartner WA, Hunter L, Carpenter B, Wilbur WJ (2008) Overview of BioCreative II gene mention recognition. Genome Biology, 9(S2), S2. https:\/\/doi.org\/10.1186\/gb-2008-9-s2-s2","DOI":"10.1186\/gb-2008-9-s2-s2"},{"key":"11162_CR187","doi-asserted-by":"crossref","unstructured":"Smith A, Bohnet B, de Lhoneux M, Nivre J, Shao Y, Stymne S (2018) 82 Treebanks, 34 Models: Universal Dependency Parsing with Multi-Treebank Models (arXiv:1809.02237). arXiv. http:\/\/arxiv.org\/abs\/1809.02237","DOI":"10.18653\/v1\/K18-2011"},{"key":"11162_CR188","first-page":"987","volume":"73","author":"G Snaebjarnarson","year":"2023","unstructured":"Snaebjarnarson G, J\u00f3nsson H, Bj\u00f6rnsson H (2023) Leveraging closely related languages for improving NLP tasks in Faroese. J Artif Intell Res 73:987\u20131003","journal-title":"J Artif Intell Res"},{"key":"11162_CR189","unstructured":"Souza F, Nogueira R, Lotufo R (2020) Portuguese Named Entity Recognition using BERT-CRF (arXiv:1909.10649). arXiv. http:\/\/arxiv.org\/abs\/1909.10649"},{"key":"11162_CR190","doi-asserted-by":"crossref","unstructured":"Srivastava S, Paul B, Gupta D (2023) Study of word embeddings for enhanced cyber security named entity recognition. Procedia Computer Science","DOI":"10.1016\/j.procs.2023.01.027"},{"key":"11162_CR191","doi-asserted-by":"crossref","unstructured":"Strubell E, Ganesh A, McCallum A (2019) Energy and Policy Considerations for Deep Learning in NLP (arXiv:1906.02243). arXiv. http:\/\/arxiv.org\/abs\/1906.02243","DOI":"10.18653\/v1\/P19-1355"},{"key":"11162_CR192","doi-asserted-by":"crossref","unstructured":"Sun S, Cheng Y, Gan Z, Liu J (2019) Patient knowledge distillation for bert model compression. arXiv preprint arXiv:1908.09355","DOI":"10.18653\/v1\/D19-1441"},{"key":"11162_CR193","doi-asserted-by":"crossref","unstructured":"Sun Z, Yu H, Song X, Liu R, Yang Y, Zhou D (2020) MobileBERT: A Compact Task-Agnostic BERT for Resource-Limited Devices (arXiv:2004.02984). arXiv. http:\/\/arxiv.org\/abs\/2004.02984","DOI":"10.18653\/v1\/2020.acl-main.195"},{"key":"11162_CR194","doi-asserted-by":"publisher","first-page":"103799","DOI":"10.1016\/j.jbi.2021.103799","volume":"118","author":"C Sun","year":"2021","unstructured":"Sun C, Yang Z, Wang L, Zhang Y, Lin H, Wang J (2021) Biomedical named entity recognition using BERT in the machine reading comprehension framework. J Biomed Inform 118:103799","journal-title":"J Biomed Inform"},{"issue":"1","key":"11162_CR195","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1007\/s42452-019-1765-9","volume":"2","author":"C Sur","year":"2020","unstructured":"Sur C (2020) RBN: enhancement in Language attribute prediction using global representation of natural Language transfer learning technology like Google BERT. SN Appl Sci 2(1):22","journal-title":"SN Appl Sci"},{"key":"11162_CR196","unstructured":"Taher E, Hoseini SA, Shamsfard M (2020) Beheshti-NER: Persian Named Entity Recognition Using BERT (arXiv:2003.08875). arXiv. http:\/\/arxiv.org\/abs\/2003.08875"},{"key":"11162_CR197","doi-asserted-by":"crossref","unstructured":"Tamburini F (2020) How BERTology Changed the State-of-the-Art also for Italian NLP. Proceedings of the Seventh Italian Conference on Computational Linguistics CLiC-it 2020","DOI":"10.4000\/books.aaccademia.8920"},{"key":"11162_CR198","doi-asserted-by":"crossref","unstructured":"Tanaka H, Shinnou H, Cao R, Bai J, Ma W (2019) Document Classification by Word Embeddings of BERT. International Conference of the Pacific Association for Computaitonal Linguistics","DOI":"10.1007\/978-981-15-6168-9_13"},{"key":"11162_CR199","doi-asserted-by":"crossref","unstructured":"Tikayat Ray A, Pinon-Fischer OJ, Mavris DN, White RT, Cole BF (2023) aeroBERT-NER: Named-Entity Recognition for Aerospace Requirements Engineering using BERT. AIAA SCITECH 2023 Forum","DOI":"10.2514\/6.2023-2583"},{"key":"11162_CR200","unstructured":"Trinh TH, Le QV (2019) A Simple Method for Commonsense Reasoning (arXiv:1806.02847). arXiv. http:\/\/arxiv.org\/abs\/1806.02847"},{"key":"11162_CR201","unstructured":"Tripty Z, Nafis M, Chowdhury A, Hossain J, Ahsan S, Das A, Hoque MM (2024), March CUETSentimentSillies@ DravidianLangTech-EACL2024: Transformer-based Approach for Sentiment Analysis in Tamil and Tulu Code-Mixed Texts. In Proceedings of the Fourth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages (pp. 234\u2013239)"},{"key":"11162_CR202","doi-asserted-by":"crossref","unstructured":"Tsai H, Riesa J, Johnson M, Arivazhagan N, Li X, Archer A (2019) Small and Practical BERT Models for Sequence Labeling (arXiv:1909.00100).","DOI":"10.18653\/v1\/D19-1374"},{"key":"11162_CR203","doi-asserted-by":"crossref","unstructured":"Tuli S, Dedhia B, Tuli S, Jha NK (2023) FlexiBERT: are current transformer architectures too homogeneous and rigid? J Artif Intell Res 77:39\u201370. http:\/\/www2.lingfil.uu.se\/SLTC2014\/abstracts\/sltc2014_submission_7.pdf","DOI":"10.1613\/jair.1.13942"},{"key":"11162_CR204","unstructured":"Turton J, Vinson DP, Smith R (2020) Deriving Contextualised Semantic Features from BERT (and Other Transformer Model) Embeddings. ArXiv, abs\/2012.15353"},{"key":"11162_CR205","doi-asserted-by":"publisher","unstructured":"Ul\u010dar M, Robnik-\u0160ikonja M (2020) FinEst BERT and CroSloEngual BERT: Less Is More in Multilingual Models. In P. Sojka, I. Kope\u010dek, K. Pala, & A. Hor\u00e1k (Eds.), Text, Speech, and Dialogue (Vol. 12284, pp. 104\u2013111). Springer International Publishing. https:\/\/doi.org\/10.1007\/978-3-030-58323-1_11","DOI":"10.1007\/978-3-030-58323-1_11"},{"key":"11162_CR206","doi-asserted-by":"crossref","unstructured":"Van Aken B, Winter B, L\u00f6ser A, Gers FA (2019), November How does bert answer questions? a layer-wise analysis of transformer representations. In Proceedings of the 28th ACM international conference on information and knowledge management (pp. 1823\u20131832)","DOI":"10.1145\/3357384.3358028"},{"key":"11162_CR207","doi-asserted-by":"crossref","unstructured":"Van Noord G, Bouma G, Van Eynde F, De Kok D, Van der Linde J, Schuurman I, Sang ETK, Vandeghinste V (2013) Large scale syntactic annotation of written Dutch: Lassy. Essential Speech and Language Technology for Dutch: Results by the STEVIN Programme, 147\u2013164","DOI":"10.1007\/978-3-642-30910-6_9"},{"key":"11162_CR208","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \\Lukasz, Polosukhin I (2017) Attention is all you need. Advances in Neural Information Processing Systems, 30. https:\/\/proceedings.neurips.cc\/paper\/7181-attention-is-all"},{"key":"11162_CR209","unstructured":"Verma N, Elbayad M (2024) Merging text transformer models from different initializations. arXiv preprint arXiv:2403.00986"},{"key":"11162_CR210","doi-asserted-by":"crossref","unstructured":"Vernikos G, Popescu-Belis A (2024) Don\u2019t Rank, Combine! Combining Machine Translation Hypotheses Using Quality Estimation. ArXiv, abs\/2401.06688","DOI":"10.18653\/v1\/2024.acl-long.653"},{"issue":"5","key":"11162_CR211","doi-asserted-by":"publisher","first-page":"537","DOI":"10.1080\/01434632.2020.1731144","volume":"41","author":"Y Wang","year":"2020","unstructured":"Wang Y (2020) Extending multilingual BERT to low-resource languages. J Multiling Multicultural Dev 41(5):537\u2013551. https:\/\/doi.org\/10.1080\/01434632.2020.1731144","journal-title":"J Multiling Multicultural Dev"},{"key":"11162_CR212","doi-asserted-by":"crossref","unstructured":"Wang P, Gu J (2023) Named entity recognition of electronic medical records based on BERT-BiLSTM-Biaffine Model. J Phys: Conf Ser 2560","DOI":"10.1088\/1742-6596\/2560\/1\/012044"},{"key":"11162_CR213","unstructured":"Wang A, Pruksachatkun Y, Nangia N, Singh A, Michael J, Hill F, Levy O, Bowman S (2019) Superglue: A stickier benchmark for general-purpose language understanding systems. Advances in Neural Information Processing Systems, 32. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2019\/hash\/4496bf24afe7fab6f046bf4923da8de6-Abstract.html"},{"key":"11162_CR214","unstructured":"Wang H, Kurosawa M, Katsumata S, Komachi M (2020) Chinese Grammatical Correction Using BERT-based Pre-trained Model (arXiv:2011.02093). arXiv. http:\/\/arxiv.org\/abs\/2011.02093"},{"key":"11162_CR215","doi-asserted-by":"publisher","first-page":"1318","DOI":"10.1109\/TASLP.2021.3065201","volume":"29","author":"Y Wang","year":"2021","unstructured":"Wang Y, Cui L, Zhang Y (2021) Improving Skip-Gram embeddings using BERT. IEEE\/ACM Trans Audio Speech Lang Process 29:1318\u20131328","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"11162_CR216","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1016\/j.eng.2022.04.024","volume":"25","author":"H Wang","year":"2023","unstructured":"Wang H, Li J, Wu H, Hovy E, Sun Y (2023) Pre-Trained Language models and their applications. Engineering 25:51\u201365. https:\/\/doi.org\/10.1016\/j.eng.2022.04.024","journal-title":"Engineering"},{"key":"11162_CR217","doi-asserted-by":"crossref","unstructured":"Weng M, Zhang W (2023) Named Entity Recognition Based on BERT-BiLSTM-SPAN in Low Resource Scenarios. 2023 15th International Conference on Computer Research and Development (ICCRD), 32\u201337","DOI":"10.1109\/ICCRD56364.2023.10080054"},{"key":"11162_CR218","doi-asserted-by":"crossref","unstructured":"Weng R, Yu H, Huang S, Cheng S, Luo W (2020) Acquiring knowledge from pre-trained model to neural machine translation. Proceedings of the AAAI Conference on Artificial Intelligence, 34(05), 9266\u20139273. https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/6465","DOI":"10.1609\/aaai.v34i05.6465"},{"key":"11162_CR219","unstructured":"Wu Y, Dredze M (2020) Analyzing the performance decline of multilingual BERT on low-resource languages. Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, 1120\u20131130"},{"key":"11162_CR220","unstructured":"Wu Y, Schuster M, Chen Z, Le QV, Norouzi M, Macherey W, Krikun M, Cao Y, Gao Q, Macherey K, Klingner J, Shah A, Johnson M, Liu X, Kaiser \u0141, Gouws S, Kato Y, Kudo T, Kazawa H, Dean J (2016) Google\u2019s Neural Machine Translation System: Bridging the Gap between Human and Machine Translation (arXiv:1609.08144). arXiv. http:\/\/arxiv.org\/abs\/1609.08144"},{"key":"11162_CR221","doi-asserted-by":"crossref","unstructured":"Wu Z, Chen Y, Kao B, Liu Q (2020) Perturbed masking: Parameter-free probing for analyzing and interpreting BERT. arXiv preprint arXiv:2004.14786","DOI":"10.18653\/v1\/2020.acl-main.383"},{"key":"11162_CR222","unstructured":"Wu M, Vu T, Qu L, Foster G, Haffari G (2024) Adapting Large Language Models for Document-Level Machine Translation. ArXiv, abs\/2401.06468"},{"issue":"5","key":"11162_CR223","first-page":"2045","volume":"32","author":"Y Xia","year":"2021","unstructured":"Xia Y, Li X, Zhou Z (2021) MetaXL: A meta-learning framework for transforming representations in low-resource languages. IEEE Trans Neural Networks Learn Syst 32(5):2045\u20132058","journal-title":"IEEE Trans Neural Networks Learn Syst"},{"key":"11162_CR224","unstructured":"Xu J, Deng Y, Guo Y, Ney H (2007) Domain dependent statistical machine translation. Proceedings of Machine Translation Summit XI: Papers. https:\/\/aclanthology.org\/2007.mtsummit-papers.68.pdf"},{"key":"11162_CR225","unstructured":"Yang Z, Dai Z, Yang Y, Carbonell J, Salakhutdinov RR, Le QV (2019) Xlnet: Generalized autoregressive pretraining for language understanding. Advances in Neural Information Processing Systems, 32. https:\/\/proceedings.neurips.cc\/paper\/2019\/hash\/dc6a7e655d7e5840e66733e9ee67cc69-Abstract.html"},{"key":"11162_CR226","doi-asserted-by":"crossref","unstructured":"Yang Z, Garcia N, Chu C, Otani M, Nakashima Y, Takemura H (2020) Bert representations for video question answering. Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, 1556\u20131565. http:\/\/openaccess.thecvf.com\/content_WACV_2020\/html\/Yang_BERT_representations_for_Video_Question_Answering_WACV_2020_paper.html","DOI":"10.1109\/WACV45572.2020.9093596"},{"key":"11162_CR227","doi-asserted-by":"crossref","unstructured":"Yang B, Luo X, Sun K, Luo MY (2023) August Recent progress on text summarisation based on bert and gpt. In International Conference on Knowledge Science, Engineering and Management (pp. 225\u2013241). Cham: Springer Nature Switzerland","DOI":"10.1007\/978-3-031-40292-0_19"},{"key":"11162_CR228","unstructured":"Yannakoudakis H, Briscoe T, Medlock B (2011) A new dataset and method for automatically grading ESOL texts. Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies, 180\u2013189. https:\/\/aclanthology.org\/P11-1019.pdf"},{"key":"11162_CR229","first-page":"195","volume":"14","author":"T Yeshambel","year":"2023","unstructured":"Yeshambel T, Mothe J, Assabie Y (2023) Learned text representation for amharic information retrieval and natural Language processing. Inf 14:195","journal-title":"Inf"},{"key":"11162_CR230","doi-asserted-by":"crossref","unstructured":"Yin H, Liu X, Wu Y, Arini HM, Mohawesh R (2023), October A BERT-Based Semantic Enhanced Model for COVID-19 Fake News Detection. In Asia-Pacific Web (APWeb) and Web-Age Information Management (WAIM) Joint International Conference on Web and Big Data (pp. 1\u201315). Singapore: Springer Nature Singapore","DOI":"10.1007\/978-981-97-2303-4_1"},{"key":"11162_CR231","doi-asserted-by":"crossref","unstructured":"Yu W, Wu L, Deng Y, Mahindru R, Zeng Q, Guven S, Jiang M (2020) A technical question answering system with transfer learning. Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, 92\u201399. https:\/\/aclanthology.org\/2020.emnlp-demos.13\/","DOI":"10.18653\/v1\/2020.emnlp-demos.13"},{"key":"11162_CR232","doi-asserted-by":"crossref","unstructured":"Zabeen S, Hasan A, Islam MF, Hossain MS, Rasel AA (2023) Robust Fake Review Detection Using Uncertainty-Aware LSTM and BERT. 2023 IEEE 15th International Conference on Computational Intelligence and Communication Networks (CICN), 786\u2013791","DOI":"10.1109\/CICN59264.2023.10402342"},{"key":"11162_CR233","unstructured":"Zeman D, Hajic J, Popel M, Potthast M, Straka M, Ginter F, Nivre J, Petrov S (2018) CoNLL 2018 shared task: Multilingual parsing from raw text to universal dependencies. Proceedings of the CoNLL 2018 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies, 1\u201321. https:\/\/aclanthology.org\/K18-2001\/"},{"key":"11162_CR234","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1016\/j.neucom.2021.07.002","volume":"460","author":"Z Zhang","year":"2021","unstructured":"Zhang Z, Wu S, Jiang D, Chen G (2021) BERT-JAM: maximizing the utilization of BERT for neural machine translation. Neurocomputing 460:84\u201394","journal-title":"Neurocomputing"},{"key":"11162_CR235","doi-asserted-by":"crossref","unstructured":"Zhang X, Malkov Y, Florez O, Park S, McWilliams B, Han J, El-Kishky A (2023), August Twhin-bert: A socially-enriched pre-trained language model for multilingual tweet representations at twitter. In Proceedings of the 29th ACM SIGKDD conference on knowledge discovery and data mining (pp. 5597\u20135607)","DOI":"10.1145\/3580305.3599921"},{"key":"11162_CR236","doi-asserted-by":"crossref","unstructured":"Zhou S, Liu J, Zhong X, Zhao W (2021) Named entity recognition using BERT with whole world masking in cybersecurity domain. 2021 IEEE 6th International Conference on Big Data Analytics (ICBDA), 316\u2013320. https:\/\/ieeexplore.ieee.org\/abstract\/document\/9403180\/","DOI":"10.1109\/ICBDA51983.2021.9403180"},{"key":"11162_CR237","doi-asserted-by":"publisher","first-page":"33039","DOI":"10.1021\/acsomega.3c05114","volume":"8","author":"X Zhou","year":"2023","unstructured":"Zhou X, Zhang S, Agarwal M, Akroyd J, Mosbach S, Kraft M (2023) Marie and BERT\u2014A knowledge graph embedding based question answering system for chemistry. ACS Omega 8:33039\u201333057","journal-title":"ACS Omega"},{"key":"11162_CR238","doi-asserted-by":"crossref","unstructured":"Zhu Y, Kiros R, Zemel R, Salakhutdinov R, Urtasun R, Torralba A, Fidler S (2015) Aligning books and movies: Towards story-like visual explanations by watching movies and reading books. Proceedings of the IEEE International Conference on Computer Vision, 19\u201327. https:\/\/www.cv-foundation.org\/openaccess\/content_iccv_2015\/html\/Zhu_Aligning_Books_and_ICCV_2015_paper.html","DOI":"10.1109\/ICCV.2015.11"},{"key":"11162_CR239","unstructured":"Zhu J, Xia Y, Wu L, He D, Qin T, Zhou W, Li H, Liu T-Y (2020) Incorporating BERT into Neural Machine Translation (arXiv:2002.06823). arXiv. http:\/\/arxiv.org\/abs\/2002.06823"},{"issue":"2","key":"11162_CR240","doi-asserted-by":"publisher","first-page":"2173","DOI":"10.11591\/ijai.v13.i2.pp2173-2184","volume":"13","author":"I Zyout","year":"2024","unstructured":"Zyout I, Zyout MA (2024) Sentiment analysis of student feedback using attention-based RNN and transformer embedding. Int J Artif Intell 13(2):2173\u20132184","journal-title":"Int J Artif Intell"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-025-11162-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10462-025-11162-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-025-11162-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,17]],"date-time":"2025-04-17T20:07:25Z","timestamp":1744920445000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10462-025-11162-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,15]]},"references-count":243,"journal-issue":{"issue":"6","published-online":{"date-parts":[[2025,6]]}},"alternative-id":["11162"],"URL":"https:\/\/doi.org\/10.1007\/s10462-025-11162-5","relation":{},"ISSN":["1573-7462"],"issn-type":[{"value":"1573-7462","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3,15]]},"assertion":[{"value":"19 February 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 March 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"166"}}