{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T13:13:16Z","timestamp":1778332396094,"version":"3.51.4"},"reference-count":32,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,3,29]],"date-time":"2026-03-29T00:00:00Z","timestamp":1774742400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["clinicalkey.com","clinicalkey.com.au","clinicalkey.es","clinicalkey.fr","clinicalkey.jp","elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computers in Biology and Medicine"],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1016\/j.compbiomed.2026.111648","type":"journal-article","created":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T09:34:24Z","timestamp":1775122464000},"page":"111648","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Constructing a corpus of hematologic pathology notes for the fine-tuning of BERT models for named entity recognition"],"prefix":"10.1016","volume":"208","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-3812-5853","authenticated-orcid":false,"given":"Desiree","family":"Jaschke","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7269-3573","authenticated-orcid":false,"given":"Celine-Fabienne","family":"Bergmann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Max","family":"Blumenstock","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexander","family":"Brobeil","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sascha","family":"Dietrich","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tobias","family":"Dittrich","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael","family":"Herweg","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Petra","family":"Knaup","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9559-4646","authenticated-orcid":false,"given":"Christoph U.","family":"Lehmann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3887-9416","authenticated-orcid":false,"given":"Nora","family":"Liebers","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gunhild","family":"Mechtersheimer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sarah","family":"Richter","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cleo-Aron","family":"Weis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2716-5425","authenticated-orcid":false,"given":"Matthias","family":"Ganzinger","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.compbiomed.2026.111648_bib1","first-page":"144","article-title":"Building gold standard corpora for medical natural language processing tasks","author":"Deleger","year":"2012","journal-title":"AMIA Annu Symp Proc"},{"key":"10.1016\/j.compbiomed.2026.111648_bib2","article-title":"Updates in immunohistochemistry for hematopoietic and lymphoid neoplasms","author":"Kavus","year":"2023","journal-title":"Arch. Pathol. Lab Med."},{"issue":"10","key":"10.1016\/j.compbiomed.2026.111648_bib3","doi-asserted-by":"crossref","first-page":"4555","DOI":"10.4049\/jimmunol.1502033","article-title":"CD nomenclature 2015: human leukocyte differentiation antigen workshops as a driving force in immunology","volume":"195","author":"Engel","year":"2015","journal-title":"J. Immunol."},{"issue":"3","key":"10.1016\/j.compbiomed.2026.111648_bib4","doi-asserted-by":"crossref","first-page":"1566","DOI":"10.3892\/mmr.2014.2914","article-title":"Ki67 is a promising molecular target in the diagnosis of cancer","volume":"11","author":"Li","year":"2014","journal-title":"Mol. Med. Rep."},{"key":"10.1016\/j.compbiomed.2026.111648_bib5","doi-asserted-by":"crossref","DOI":"10.1016\/j.jpi.2022.100003","article-title":"Automatic classification of cancer pathology reports: a systematic review","volume":"13","author":"Santos","year":"2022","journal-title":"J Pathol Inform"},{"issue":"1","key":"10.1016\/j.compbiomed.2026.111648_bib6","doi-asserted-by":"crossref","first-page":"29","DOI":"10.1186\/s12911-018-0609-7","article-title":"Automated extraction of biomarker information from pathology reports","volume":"18","author":"Lee","year":"2018","journal-title":"BMC Med Inform Decis Mak"},{"issue":"1","key":"10.1016\/j.compbiomed.2026.111648_bib7","doi-asserted-by":"crossref","DOI":"10.1038\/s41598-021-03204-z","article-title":"Electronic case report forms generation from pathology reports by ARGO, automatic record generator for onco-hematology","volume":"11","author":"Zaccaria","year":"2021","journal-title":"Sci. Rep."},{"key":"10.1016\/j.compbiomed.2026.111648_bib8","doi-asserted-by":"crossref","DOI":"10.1016\/j.jbi.2023.104293","article-title":"Detecting evidence of invasive fungal infections in cytology and histopathology reports enriched with concept-level annotations","volume":"139","author":"Rozova","year":"2023","journal-title":"J Biomed Inform"},{"key":"10.1016\/j.compbiomed.2026.111648_bib9","series-title":"CHIFIR: Cytology and Histopathology Invasive Fungal Infection Reports","author":"Rozova","year":"2024"},{"issue":"1","key":"10.1016\/j.compbiomed.2026.111648_bib10","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1186\/s12911-021-01395-z","article-title":"A clinical trials corpus annotated with UMLS entities to enhance the access to evidence-based medicine","volume":"21","author":"Campillos-Llanos","year":"2021","journal-title":"BMC Med Inform Decis Mak"},{"issue":"1","key":"10.1016\/j.compbiomed.2026.111648_bib11","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1038\/s41597-023-02128-9","article-title":"A distributable German clinical corpus containing cardiovascular clinical routine doctor's letters","volume":"10","author":"Richter-Pechanski","year":"2023","journal-title":"Sci. Data"},{"key":"10.1016\/j.compbiomed.2026.111648_bib12","doi-asserted-by":"crossref","DOI":"10.1016\/j.ijmedinf.2020.104351","article-title":"A comprehensive study of mobility functioning information in clinical notes: entity hierarchy, corpus annotation, and sequence labeling","volume":"147","author":"Thieu","year":"2021","journal-title":"Int J Med Inform"},{"key":"10.1016\/j.compbiomed.2026.111648_bib13","series-title":"Applications in Medicine and Manufacturing","first-page":"137","article-title":"German medical natural language processing-a data-centric survey","author":"Zesch","year":"2022"},{"key":"10.1016\/j.compbiomed.2026.111648_bib14","series-title":"BERT: Pre-Training of Deep Bidirectional Transformers for Language Understanding","author":"Devlin","year":"2018"},{"key":"10.1016\/j.compbiomed.2026.111648_bib15","doi-asserted-by":"crossref","DOI":"10.1038\/sdata.2016.35","article-title":"MIMIC-III, a freely accessible critical care database","volume":"3","author":"Johnson","year":"2016","journal-title":"Sci. Data"},{"key":"10.1016\/j.compbiomed.2026.111648_bib16","series-title":"MIMIC-III Clinical Database","author":"Johnson","year":"2023"},{"key":"10.1016\/j.compbiomed.2026.111648_bib17","series-title":"Natural Language Annotation for Machine Learning: a Guide to corpus-building for Applications","author":"Pustejovsky","year":"2012"},{"key":"10.1016\/j.compbiomed.2026.111648_bib18","series-title":"Handbook of Linguistic Annotation","first-page":"21","article-title":"Designing annotation schemes: from theory to model","author":"Pustejovsky","year":"2017"},{"key":"10.1016\/j.compbiomed.2026.111648_bib19","doi-asserted-by":"crossref","DOI":"10.1093\/bioinformatics\/btab880","article-title":"MedTator: a serverless annotation tool for corpus development","author":"He","year":"2022","journal-title":"Bioinformatics"},{"key":"10.1016\/j.compbiomed.2026.111648_bib20","series-title":"Introduction to Transformers for NLP: with the Hugging Face Library and Models to Solve Problems","author":"Jain","year":"2022"},{"key":"10.1016\/j.compbiomed.2026.111648_bib21","doi-asserted-by":"crossref","unstructured":"Sang, Erik F. Tjong Kim, Meulder F de. Introduction to the CoNLL-2003 shared task: language-independent named entity recognition. Proceedings of CoNLL-2003..","DOI":"10.3115\/1119176.1119195"},{"key":"10.1016\/j.compbiomed.2026.111648_bib22","series-title":"Natural Language Processing Using Very Large Corpora","first-page":"157","article-title":"Text chunking using transformation-based learning","author":"Ramshaw","year":"1999"},{"key":"10.1016\/j.compbiomed.2026.111648_bib23","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.121598","article-title":"medBERT.de: a comprehensive German BERT model for the medical domain","volume":"237","author":"Bressem","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.compbiomed.2026.111648_bib24","doi-asserted-by":"crossref","unstructured":"Wolf T, Debut L, Sanh V, Chaumond J, Delangue C, Moi A et al. Transformers: state-of-the-art natural language processing. In: Liu Q, Schlangen D, editors. Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. Stroudsburg, PA, USA: Association for Computational Linguistics, p. 38\u201345..","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"10.1016\/j.compbiomed.2026.111648_bib25","series-title":"German's next Language Model","author":"Chan","year":"2020"},{"issue":"4","key":"10.1016\/j.compbiomed.2026.111648_bib26","doi-asserted-by":"crossref","first-page":"ooac087","DOI":"10.1093\/jamiaopen\/ooac087","article-title":"Critical assessment of transformer-based AI models for German clinical notes","volume":"5","author":"Lentzen","year":"2022","journal-title":"JAMIA Open"},{"key":"10.1016\/j.compbiomed.2026.111648_bib27","series-title":"Llama: Open and Efficient Foundation Language Models","author":"Touvron","year":"2023"},{"key":"10.1016\/j.compbiomed.2026.111648_bib28","unstructured":"Grattafiori A, Dubey A, Jauhri A, Pandey A, Kadian A, Al-Dahle A et al. The llama 3 herd of models: http:\/\/arxiv.org\/pdf\/2407.21783v3.."},{"issue":"Suppl 1","key":"10.1016\/j.compbiomed.2026.111648_bib29","doi-asserted-by":"crossref","first-page":"S2","DOI":"10.1186\/1472-6947-8-S1-S2","article-title":"Forty years of SNOMED: a literature review","volume":"8","author":"Cornet","year":"2008","journal-title":"BMC Med Inform Decis Mak"},{"key":"10.1016\/j.compbiomed.2026.111648_bib30","series-title":"WHO Classification of Tumours of Haematopoietic and Lymphoid Tissues","year":"2017"},{"key":"10.1016\/j.compbiomed.2026.111648_bib31","series-title":"Large Language Models for Generative Information Extraction","author":"Xu","year":"2023"},{"issue":"1","key":"10.1016\/j.compbiomed.2026.111648_bib32","doi-asserted-by":"crossref","first-page":"1418","DOI":"10.1038\/s41467-024-45563-x","article-title":"Structured information extraction from scientific text with large language models","volume":"15","author":"Dagdelen","year":"2024","journal-title":"Nat. Commun."}],"container-title":["Computers in Biology and Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0010482526002118?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0010482526002118?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T13:05:41Z","timestamp":1778331941000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0010482526002118"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5]]},"references-count":32,"alternative-id":["S0010482526002118"],"URL":"https:\/\/doi.org\/10.1016\/j.compbiomed.2026.111648","relation":{},"ISSN":["0010-4825"],"issn-type":[{"value":"0010-4825","type":"print"}],"subject":[],"published":{"date-parts":[[2026,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Constructing a corpus of hematologic pathology notes for the fine-tuning of BERT models for named entity recognition","name":"articletitle","label":"Article Title"},{"value":"Computers in Biology and Medicine","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.compbiomed.2026.111648","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Authors. Published by Elsevier Ltd.","name":"copyright","label":"Copyright"}],"article-number":"111648"}}