{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T20:57:06Z","timestamp":1758056226065,"version":"3.44.0"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032051752","type":"print"},{"value":"9783032051769","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T00:00:00Z","timestamp":1757894400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T00:00:00Z","timestamp":1757894400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-05176-9_26","type":"book-chapter","created":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T13:41:09Z","timestamp":1757943669000},"page":"335-347","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["LLM\u2013Based Framework for\u00a0Synthetic Data Generation in\u00a0Portuguese Clinical NER"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-7216-189X","authenticated-orcid":false,"given":"Lu\u00eds","family":"Henriques","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2854-2891","authenticated-orcid":false,"given":"Nuno","family":"Guimar\u00e3es","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5475-1382","authenticated-orcid":false,"given":"Al\u00edpio","family":"Jorge","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,15]]},"reference":[{"key":"26_CR1","doi-asserted-by":"publisher","unstructured":"Begoli, E., Brown, K., Srinivas, S., Tamang, S.: SynthNotes: a generator framework for high-volume, high-fidelity synthetic mental health notes. In: 2018 IEEE International Conference on Big Data (Big Data), Seattle, WA, USA, pp. 951\u2013958 (2018). https:\/\/doi.org\/10.1109\/BigData.2018.8621981","DOI":"10.1109\/BigData.2018.8621981"},{"key":"26_CR2","doi-asserted-by":"crossref","unstructured":"Bodenreider, O.: The unified medical language system (UMLS): integrating biomedical terminology. Nucl. Acids Res. 32(Database issue), D267\u201370 (2004)","DOI":"10.1093\/nar\/gkh061"},{"key":"26_CR3","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1007\/978-3-319-99722-3_9","volume-title":"Computational Processing of the Portuguese Language","author":"PV Quinta de Castro","year":"2018","unstructured":"Quinta de Castro, P.V., F\u00e9lix Felipe da Silva, N., da Silva Soares, A.: Portuguese named entity recognition using LSTM-CRF. In: Villavicencio, A., et al. (eds.) PROPOR 2018. LNCS (LNAI), vol. 11122, pp. 83\u201392. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-99722-3_9"},{"key":"26_CR4","series-title":"IFIP Advances in Information and Communication Technology","doi-asserted-by":"publisher","first-page":"210","DOI":"10.1007\/978-3-642-35142-6_14","volume-title":"Shaping the Future of ICT Research. Methods and Approaches","author":"K Crowston","year":"2012","unstructured":"Crowston, K.: Amazon mechanical turk: a research tool for organizations and information systems scholars. In: Bhattacherjee, A., Fitzgerald, B. (eds.) IS &O 2012. IAICT, vol. 389, pp. 210\u2013221. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-35142-6_14"},{"key":"26_CR5","doi-asserted-by":"publisher","unstructured":"Dai, H., et al.: ChatAug: Leveraging ChatGPT for Text Data Augmentation. arXiv abs\/2302.13007 (2023). https:\/\/doi.org\/10.48550\/arXiv.2302.13007","DOI":"10.48550\/arXiv.2302.13007"},{"key":"26_CR6","unstructured":"DeepSeek-AI: Deepseek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning (2025). https:\/\/arxiv.org\/abs\/2501.12948"},{"key":"26_CR7","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Burstein, J., Doran, C., Solorio, T. (eds.) Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), Minneapolis, Minnesota, pp. 4171\u20134186. Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423. https:\/\/aclanthology.org\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"26_CR8","doi-asserted-by":"crossref","unstructured":"Gilardi, F., Alizadeh, M., Kubli, M.: ChatGPT outperforms crowd workers for text-annotation tasks. Proc. Natl. Acad. Sci. 120(30), e2305016120 (2023)","DOI":"10.1073\/pnas.2305016120"},{"issue":"1","key":"26_CR9","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1109\/TCBB.2019.2948985","volume":"18","author":"J Guan","year":"2021","unstructured":"Guan, J., Li, R., Yu, S., Zhang, X.: A method for generating synthetic electronic medical record text. IEEE\/ACM Trans. Comput. Biol. Bioinf. 18(1), 173\u2013182 (2021). https:\/\/doi.org\/10.1109\/TCBB.2019.2948985","journal-title":"IEEE\/ACM Trans. Comput. Biol. Bioinf."},{"key":"26_CR10","doi-asserted-by":"crossref","unstructured":"He, X., et al.: AnnoLLM: making large language models to be better crowdsourced annotators. In: Yang, Y., Davani, A., Sil, A., Kumar, A. (eds.) Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track), Mexico City, Mexico, pp. 165\u2013190. Association for Computational Linguistics (2024). https:\/\/aclanthology.org\/2024.naacl-industry.15","DOI":"10.18653\/v1\/2024.naacl-industry.15"},{"key":"26_CR11","unstructured":"Huang, K., Altosaar, J., Ranganath, R.: ClinicalBERT: Modeling Clinical Notes and Predicting Hospital Readmission. arXiv abs\/1904.05342 (2019)"},{"key":"26_CR12","doi-asserted-by":"publisher","unstructured":"Kumar, V., Choudhary, A., Cho, E.: Data augmentation using pre-trained transformer models. In: Campbell, W.M., et al. (eds.) Proceedings of the 2nd Workshop on Life-long Learning for Spoken Language Systems, Suzhou, China, pp. 18\u201326. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.lifelongnlp-1.3. https:\/\/aclanthology.org\/2020.lifelongnlp-1.3\/","DOI":"10.18653\/v1\/2020.lifelongnlp-1.3"},{"key":"26_CR13","doi-asserted-by":"publisher","first-page":"1234","DOI":"10.1093\/bioinformatics\/btz682","volume":"36","author":"J Lee","year":"2019","unstructured":"Lee, J., et al.: BioBERT: a pre-trained biomedical language representation model for biomedical text mining. Bioinformatics 36, 1234\u20131240 (2019). https:\/\/doi.org\/10.1093\/bioinformatics\/btz682","journal-title":"Bioinformatics"},{"key":"26_CR14","doi-asserted-by":"publisher","unstructured":"Libbi, C.A., Trienes, J., Trieschnigg, D., Seifert, C.: Generating synthetic training data for supervised de-identification of electronic health records. Future Internet 13(5) (2021). https:\/\/doi.org\/10.3390\/fi13050136. https:\/\/www.mdpi.com\/1999-5903\/13\/5\/136","DOI":"10.3390\/fi13050136"},{"key":"26_CR15","unstructured":"Lin, C.Y.: ROUGE: a package for automatic evaluation of summaries. In: Text Summarization Branches Out, Barcelona, Spain, pp. 74\u201381. Association for Computational Linguistics (2004). https:\/\/aclanthology.org\/W04-1013"},{"key":"26_CR16","doi-asserted-by":"publisher","unstructured":"Liu, X., Hersch, G.L., Khalil, I., Devarakonda, M.: Clinical trial information extraction with BERT. In: 2021 IEEE 9th International Conference on Healthcare Informatics (ICHI), Victoria, BC, Canada, pp. 505\u2013506 (2021). https:\/\/doi.org\/10.1109\/ICHI52183.2021.00092","DOI":"10.1109\/ICHI52183.2021.00092"},{"key":"26_CR17","doi-asserted-by":"publisher","unstructured":"Lopes, F., Teixeira, C., Gon\u00e7alo\u00a0Oliveira, H.: Contributions to clinical named entity recognition in Portuguese. In: Demner-Fushman, D., Cohen, K.B., Ananiadou, S., Tsujii, J. (eds.) Proceedings of the 18th BioNLP Workshop and Shared Task, Florence, Italy, pp. 223\u2013233. Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/v1\/W19-5024. https:\/\/aclanthology.org\/W19-5024","DOI":"10.18653\/v1\/W19-5024"},{"issue":"4","key":"26_CR18","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1007\/s10916-020-1542-8","volume":"44","author":"F Lopes","year":"2020","unstructured":"Lopes, F., Teixeira, C., Gon\u00e7alo Oliveira, H.: Comparing different methods for named entity recognition in Portuguese neurology text. J. Med. Syst. 44(4), 77 (2020)","journal-title":"J. Med. Syst."},{"key":"26_CR19","unstructured":"van\u00a0der Maaten, L., Hinton, G.: Visualizing data using t-SNE. J. Mach. Learn. Res. 9(86), 2579\u20132605 (2008). http:\/\/jmlr.org\/papers\/v9\/vandermaaten08a.html"},{"key":"26_CR20","doi-asserted-by":"crossref","unstructured":"M\u00f8ller, A.G., Pera, A., Dalsgaard, J., Aiello, L.: The parrot dilemma: human-labeled vs. LLM-augmented data in classification tasks. In: Graham, Y., Purver, M. (eds.) Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers), St. Julian\u2019s, Malta, pp. 179\u2013192. Association for Computational Linguistics (2024). https:\/\/aclanthology.org\/2024.eacl-short.17","DOI":"10.18653\/v1\/2024.eacl-short.17"},{"key":"26_CR21","doi-asserted-by":"crossref","unstructured":"Nunes, M., Bon\u00c3, J., Ferreira, J., Chaves, P., Elvas, L.: MediAlbertina: an European Portuguese medical language model. CBM 182 (2024). https:\/\/doi.org\/10.1016\/j.compbiomed.2024.109233","DOI":"10.1016\/j.compbiomed.2024.109233"},{"key":"26_CR22","doi-asserted-by":"crossref","unstructured":"Oliveira, L.E.S.E., et al.: SemClinBr - a multi-institutional and multi-specialty semantically annotated corpus for Portuguese clinical NLP tasks. J. Biomed. Semant. 13(1), 13 (2022)","DOI":"10.1186\/s13326-022-00269-1"},{"key":"26_CR23","unstructured":"OpenAI: GPT-4 Technical Report (2024). https:\/\/arxiv.org\/abs\/2303.08774"},{"key":"26_CR24","unstructured":"OpenAI: Openai O1 System Card (2024). https:\/\/arxiv.org\/abs\/2412.16720"},{"key":"26_CR25","doi-asserted-by":"publisher","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: BLEU: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting on Association for Computational Linguistics, ACL 2002, pp. 311\u2013318. Association for Computational Linguistics, USA (2002). https:\/\/doi.org\/10.3115\/1073083.1073135","DOI":"10.3115\/1073083.1073135"},{"key":"26_CR26","doi-asserted-by":"publisher","unstructured":"Peng, Y., Yan, S., Lu, Z.: Transfer learning in biomedical natural language processing: an evaluation of BERT and ELMo on ten benchmarking datasets. In: Demner-Fushman, D., Cohen, K.B., Ananiadou, S., Tsujii, J. (eds.) Proceedings of the 18th BioNLP Workshop and Shared Task, Florence, Italy, pp. 58\u201365. Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/v1\/W19-5006. https:\/\/aclanthology.org\/W19-5006","DOI":"10.18653\/v1\/W19-5006"},{"key":"26_CR27","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks (2019). https:\/\/arxiv.org\/abs\/1908.10084","DOI":"10.18653\/v1\/D19-1410"},{"key":"26_CR28","doi-asserted-by":"publisher","unstructured":"Rodrigues, J., et al.: Advancing neural encoding of Portuguese with transformer Albertina PT-*, pp. 441\u2013453. Springer (2023). https:\/\/doi.org\/10.1007\/978-3-031-49008-8_35","DOI":"10.1007\/978-3-031-49008-8_35"},{"key":"26_CR29","doi-asserted-by":"publisher","unstructured":"dos Santos, C., Guimar\u00e3es, V.: Boosting named entity recognition with neural character embeddings. In: Duan, X., Banchs, R.E., Zhang, M., Li, H., Kumaran, A. (eds.) Proceedings of the Fifth Named Entity Workshop, Beijing, China, pp. 25\u201333. Association for Computational Linguistics (2015). https:\/\/doi.org\/10.18653\/v1\/W15-3904. https:\/\/aclanthology.org\/W15-3904\/","DOI":"10.18653\/v1\/W15-3904"},{"key":"26_CR30","doi-asserted-by":"publisher","unstructured":"Santos, J., Consoli, B., dos Santos, C., Terra, J., Collonini, S., Vieira, R.: Assessing the impact of contextual embeddings for Portuguese named entity recognition. In: 2019 8th Brazilian Conference on Intelligent Systems (BRACIS), pp. 437\u2013442 (2019). https:\/\/doi.org\/10.1109\/BRACIS.2019.00083","DOI":"10.1109\/BRACIS.2019.00083"},{"key":"26_CR31","doi-asserted-by":"publisher","unstructured":"Schneider, E.T.R., et al.: BioBERTpt - a Portuguese neural language model for clinical named entity recognition. In: Rumshisky, A., Roberts, K., Bethard, S., Naumann, T. (eds.) Proceedings of the 3rd Clinical Natural Language Processing Workshop, pp. 65\u201372. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.clinicalnlp-1.7. https:\/\/aclanthology.org\/2020.clinicalnlp-1.7","DOI":"10.18653\/v1\/2020.clinicalnlp-1.7"},{"key":"26_CR32","unstructured":"Segura-Bedmar, I., Mart\u00ednez, P., Herrero-Zazo, M.: SemEval-2013 task 9: extraction of drug-drug interactions from biomedical texts (DDIExtraction 2013). In: Manandhar, S., Yuret, D. (eds.) Second Joint Conference on Lexical and Computational Semantics (*SEM), Volume 2: Proceedings of the Seventh International Workshop on Semantic Evaluation (SemEval 2013), Atlanta, Georgia, USA, pp. 341\u2013350. Association for Computational Linguistics (2013). https:\/\/aclanthology.org\/S13-2056"},{"key":"26_CR33","doi-asserted-by":"publisher","unstructured":"Shokri, R., Stronati, M., Song, C., Shmatikov, V.: Membership inference attacks against machine learning models. In: 2017 IEEE Symposium on Security and Privacy (SP), Los Alamitos, CA, USA, pp. 3\u201318. IEEE Computer Society (2017). https:\/\/doi.org\/10.1109\/SP.2017.41. https:\/\/doi.ieeecomputersociety.org\/10.1109\/SP.2017.41","DOI":"10.1109\/SP.2017.41"},{"key":"26_CR34","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1007\/978-3-030-61377-8_28","volume-title":"Intelligent Systems","author":"F Souza","year":"2020","unstructured":"Souza, F., Nogueira, R., Lotufo, R.: BERTimbau: pretrained BERT models for Brazilian Portuguese. In: Cerri, R., Prati, R.C. (eds.) BRACIS 2020. LNCS (LNAI), vol. 12319, pp. 403\u2013417. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-61377-8_28"},{"key":"26_CR35","doi-asserted-by":"publisher","unstructured":"Tang, R., Han, X., Jiang, X., Hu, X.: Does Synthetic Data Generation of LLMs Help Clinical Text Mining? arXiv abs\/2303.04360 (2023). https:\/\/doi.org\/10.48550\/arXiv.2303.04360","DOI":"10.48550\/arXiv.2303.04360"},{"key":"26_CR36","unstructured":"Qwen Team: QwQ-32B: Embracing the Power of Reinforcement Learning (2025). https:\/\/qwenlm.github.io\/blog\/qwq-32b\/"},{"key":"26_CR37","doi-asserted-by":"publisher","unstructured":"Ubani, S., Polat, S., Nielsen, R.D.: ZeroShotDataAug: Generating and Augmenting Training Data with ChatGPT. arXiv abs\/2304.14334 (2023). https:\/\/doi.org\/10.48550\/arXiv.2304.14334","DOI":"10.48550\/arXiv.2304.14334"},{"key":"26_CR38","doi-asserted-by":"publisher","unstructured":"Wei, J., Zou, K.: EDA: easy data augmentation techniques for boosting performance on text classification tasks. In: Inui, K., Jiang, J., Ng, V., Wan, X. (eds.) Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), Hong Kong, China, pp. 6382\u20136388. Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/v1\/D19-1670. https:\/\/aclanthology.org\/D19-1670\/","DOI":"10.18653\/v1\/D19-1670"}],"container-title":["Lecture Notes in Computer Science","Progress in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-05176-9_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T13:41:19Z","timestamp":1757943679000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-05176-9_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,15]]},"ISBN":["9783032051752","9783032051769"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-05176-9_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,15]]},"assertion":[{"value":"15 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"EPIA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"EPIA Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Faro","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"epia2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/epia2025.ualg.pt\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}