{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,20]],"date-time":"2026-06-20T01:21:17Z","timestamp":1781918477536,"version":"3.54.5"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031703805","type":"print"},{"value":"9783031703812","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70381-2_14","type":"book-chapter","created":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T10:01:54Z","timestamp":1725184914000},"page":"215-230","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":21,"title":["MedSyn: LLM-Based Synthetic Medical Text Generation Framework"],"prefix":"10.1007","author":[{"given":"Gleb","family":"Kumichev","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Pavel","family":"Blinov","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yulia","family":"Kuzkina","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Vasily","family":"Goncharov","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Galina","family":"Zubkova","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nikolai","family":"Zenovkin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Aleksei","family":"Goncharov","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Andrey","family":"Savchenko","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,8,22]]},"reference":[{"key":"14_CR1","doi-asserted-by":"crossref","unstructured":"Abdollahi, M., Gao, X., Mei, Y., Ghosh, S., Li, J., Narag, M.: Substituting clinical features using synthetic medical phrases: Medical text data augmentation techniques. Artifi. Intell. Med 120, 102167 (2021)","DOI":"10.1016\/j.artmed.2021.102167"},{"key":"14_CR2","unstructured":"Achiam, J., et\u00a0al.: GPT-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"14_CR3","doi-asserted-by":"crossref","unstructured":"Azaria, A., Mitchell, T.: The internal state of an llm knows when its lying. arXiv preprint arXiv:2304.13734 (2023)","DOI":"10.18653\/v1\/2023.findings-emnlp.68"},{"key":"14_CR4","doi-asserted-by":"crossref","unstructured":"Benoit, J.R.: ChatGPT for clinical vignette generation, revision, and evaluation. MedRxiv, pp. 2023\u201302 (2023)","DOI":"10.1101\/2023.02.04.23285478"},{"key":"14_CR5","doi-asserted-by":"publisher","unstructured":"Blinov, P., Reshetnikova, A., Nesterov, A., Zubkova, G., Kokh, V.: RuMedBench: a Russian medical language understanding benchmark. In: Artificial Intelligence in Medicine, pp. 383\u2013392. Springer International Publishing (2022). https:\/\/doi.org\/10.1007\/978-3-031-09342-5_38","DOI":"10.1007\/978-3-031-09342-5_38"},{"key":"14_CR6","doi-asserted-by":"crossref","unstructured":"Bodenreider, O.: The unified medical language system (umls): integrating biomedical terminology. Nucleic Acids Res. 32(suppl_1), D267\u2013D270 (2004)","DOI":"10.1093\/nar\/gkh061"},{"key":"14_CR7","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"1","key":"14_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1472-6947-10-59","volume":"10","author":"AL Buczak","year":"2010","unstructured":"Buczak, A.L., Babin, S., Moniz, L.: Data-driven approach for creating synthetic electronic medical records. BMC Med. Inform. Decis. Mak. 10(1), 1\u201328 (2010)","journal-title":"BMC Med. Inform. Decis. Mak."},{"issue":"1","key":"14_CR9","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1038\/s41597-023-01960-3","volume":"10","author":"P Chandak","year":"2023","unstructured":"Chandak, P., Huang, K., Zitnik, M.: Building a knowledge graph to enable precision medicine. Sci. Data 10(1), 67 (2023)","journal-title":"Sci. Data"},{"key":"14_CR10","unstructured":"Choi, E., Biswal, S., Malin, B., Duke, J., Stewart, W.F., Sun, J.: Generating multi-label discrete patient records using generative adversarial networks. In: Proceedings of the 2nd Machine Learning for Healthcare Conference, pp. 286\u2013305. PMLR (2017)"},{"key":"14_CR11","unstructured":"Cui, H., et\u00a0al.: A survey on knowledge graphs for healthcare: Resources, applications, and promises. arXiv preprint arXiv:2306.04802 (2023)"},{"key":"14_CR12","doi-asserted-by":"crossref","unstructured":"Gao, Y., et al.: Leveraging a medical knowledge graph into large language models for diagnosis prediction. arXiv preprint arXiv:2308.14321 (2023)","DOI":"10.2196\/preprints.58670"},{"key":"14_CR13","doi-asserted-by":"publisher","DOI":"10.1016\/j.bdr.2020.100174","volume":"23","author":"F Gong","year":"2021","unstructured":"Gong, F., Wang, M., Wang, H., Wang, S., Liu, M.: SMR: medical knowledge graph embedding for safe medicine recommendation. Big Data Res. 23, 100174 (2021)","journal-title":"Big Data Res."},{"issue":"3","key":"14_CR14","doi-asserted-by":"publisher","first-page":"827","DOI":"10.1093\/ije\/dyv098","volume":"44","author":"E Herrett","year":"2015","unstructured":"Herrett, E., et al.: Data resource profile: clinical practice research datalink (CPRD). Int. J. Epidemiol. 44(3), 827\u2013836 (2015)","journal-title":"Int. J. Epidemiol."},{"key":"14_CR15","doi-asserted-by":"crossref","unstructured":"Hiebel, N., Ferret, O., Fort, K., N\u00e9v\u00e9ol, A.: Can synthetic text help clinical named entity recognition? a study of electronic health records in French. In: Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics (EACL), pp. 2320\u20132338. ACL, Dubrovnik, Croatia (May 2023)","DOI":"10.18653\/v1\/2023.eacl-main.170"},{"key":"14_CR16","unstructured":"Hu, E.J., et al.: Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"14_CR17","unstructured":"Huang, K., Altosaar, J., Ranganath, R.: ClinicalBERT: Modeling clinical notes and predicting hospital readmission. arXiv preprint arXiv:1904.05342 (2019)"},{"issue":"1","key":"14_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41597-022-01899-x","volume":"10","author":"AE Johnson","year":"2023","unstructured":"Johnson, A.E., et al.: MIMIC-IV, a freely accessible electronic health record dataset. Sci. Data 10(1), 1 (2023)","journal-title":"Sci. Data"},{"key":"14_CR19","doi-asserted-by":"crossref","unstructured":"Johnson, A.E., et al.: MIMIC-III, a freely accessible critical care database. Sci. Data 3(1), 1\u20139 (2016)","DOI":"10.1038\/sdata.2016.35"},{"key":"14_CR20","unstructured":"Kuratov, Y., Arkhipov, M.: Adaptation of deep bidirectional multilingual transformers for Russian language. arXiv preprint arXiv:1905.07213 (2019)"},{"key":"14_CR21","unstructured":"Lewis, P., et al.: Retrieval-augmented generation for knowledge-intensive NLP tasks: Adv. Neural. Inf. Process. Syst. 33, 9459\u20139474 (2020)"},{"key":"14_CR22","doi-asserted-by":"publisher","unstructured":"Li, J., et al.: Are synthetic clinical notes useful for real natural language processing tasks: A case study on clinical entity recognition. J. Am. Med. Inform. Associat. 28 (2021). https:\/\/doi.org\/10.1093\/jamia\/ocab112","DOI":"10.1093\/jamia\/ocab112"},{"key":"14_CR23","doi-asserted-by":"crossref","unstructured":"Luo, R., et al.: BioGPT: generative pre-trained transformer for biomedical text generation and mining. Briefings Bioinform. 23(6) (2022)","DOI":"10.1093\/bib\/bbac409"},{"key":"14_CR24","unstructured":"Nguyen, T.T., et al.: Mimic-iv-icd: a new benchmark for extreme multilabel classification. arXiv preprint arXiv:2304.13998 (2023)"},{"key":"14_CR25","doi-asserted-by":"crossref","unstructured":"Pampari, A., Raghavan, P., Liang, J., Peng, J.: emrQA: a large corpus for question answering on electronic medical records. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 2357\u20132368 (2018)","DOI":"10.18653\/v1\/D18-1258"},{"issue":"1","key":"14_CR26","doi-asserted-by":"publisher","first-page":"210","DOI":"10.1038\/s41746-023-00958-w","volume":"6","author":"C Peng","year":"2023","unstructured":"Peng, C., et al.: A study of generative large language model for medical research and healthcare. NPJ Digital Med. 6(1), 210 (2023)","journal-title":"NPJ Digital Med."},{"key":"14_CR27","doi-asserted-by":"crossref","unstructured":"Ray, P.P.: ChatGPT: a comprehensive review on background, applications, key challenges, bias, ethics, limitations and future scope. Internet of Things Cyber-Phys. Syst. (2023)","DOI":"10.1016\/j.iotcps.2023.04.003"},{"key":"14_CR28","unstructured":"Reiter, J.P., Drechsler, J.: Releasing multiply-imputed synthetic data generated in two stages to protect confidentiality. Statistica Sinica, 405\u2013421 (2010)"},{"key":"14_CR29","doi-asserted-by":"crossref","unstructured":"Remy, F., Demuynck, K., Demeester, T.: BioLORD-2023: Semantic textual representations fusing llm and clinical knowledge graph insights. arXiv preprint arXiv:2311.16075 (2023)","DOI":"10.1093\/jamia\/ocae029"},{"key":"14_CR30","doi-asserted-by":"crossref","unstructured":"Romanov, A., Shivade, C.: Lessons from natural language inference in the clinical domain. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 1586\u20131596 (2018)","DOI":"10.18653\/v1\/D18-1187"},{"issue":"4","key":"14_CR31","doi-asserted-by":"publisher","first-page":"1768","DOI":"10.1177\/1460458218799470","volume":"25","author":"S Santiso","year":"2019","unstructured":"Santiso, S., Casillas, A., P\u00e9rez, A.: The class imbalance problem detecting adverse drug reactions in electronic health records. Health Informatics J. 25(4), 1768\u20131778 (2019)","journal-title":"Health Informatics J."},{"key":"14_CR32","doi-asserted-by":"crossref","unstructured":"Shaib, C., Li, M.L., Joseph, S., Marshall, I.J., Li, J.J., Wallace, B.C.: Summarizing, simplifying, and synthesizing medical evidence using GPT-3 (with varying success). arXiv preprint arXiv:2305.06299 (2023)","DOI":"10.18653\/v1\/2023.acl-short.119"},{"key":"14_CR33","doi-asserted-by":"crossref","unstructured":"Sharma, S., Santra, B., Jana, A., Tokala, S., Ganguly, N., Goyal, P.: Incorporating domain knowledge into medical NLI using knowledge graphs. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/D19-1631"},{"issue":"7972","key":"14_CR34","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1038\/s41586-023-06291-2","volume":"620","author":"K Singhal","year":"2023","unstructured":"Singhal, K., et al.: Large language models encode clinical knowledge. Nature 620(7972), 172\u2013180 (2023)","journal-title":"Nature"},{"key":"14_CR35","doi-asserted-by":"publisher","unstructured":"Starovoytova, E., et al.: RuMedPrimeData (2021). https:\/\/doi.org\/10.5281\/zenodo.5765873","DOI":"10.5281\/zenodo.5765873"},{"key":"14_CR36","unstructured":"Tang, R., Han, X., Jiang, X., Hu, X.: Does synthetic data generation of LLMs help clinical text mining? arXiv preprint arXiv:2303.04360 (2023)"},{"key":"14_CR37","unstructured":"Touvron, H., et\u00a0al.: LLaMA: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)"},{"key":"14_CR38","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12911-020-01245-4","volume":"20","author":"Y Wang","year":"2020","unstructured":"Wang, Y., Wei, Y., Yang, H., Li, J., Zhou, Y., Wu, Q.: Utilizing imbalanced electronic health records to predict acute kidney injury by ensemble learning and time series model. BMC Med. Inform. Decis. Mak. 20, 1\u201313 (2020)","journal-title":"BMC Med. Inform. Decis. Mak."},{"key":"14_CR39","unstructured":"Wu, C., Zhang, X., Zhang, Y., Wang, Y., Xie, W.: PMC-LLaMA: Further finetuning llama on medical papers. arXiv preprint arXiv:2304.14454 (2023)"},{"key":"14_CR40","doi-asserted-by":"publisher","unstructured":"Wu, X., Duan, J., Pan, Y., Li, M.: Medical knowledge graph: data sources, construction, reasoning, and applications. Big Data Mining Analy. 6(2), 201\u2013217 (2023). https:\/\/doi.org\/10.26599\/BDMA.2022.9020021","DOI":"10.26599\/BDMA.2022.9020021"},{"key":"14_CR41","doi-asserted-by":"publisher","unstructured":"Xie, Q., Schenck, E.J., Yang, H.S., Chen, Y., Peng, Y., Wang, F.: Faithful AI in medicine: A systematic review with large language models and beyond. medRxiv (2023). https:\/\/doi.org\/10.1101\/2023.04.18.23288752","DOI":"10.1101\/2023.04.18.23288752"},{"key":"14_CR42","unstructured":"Xiong, H., et al.: DoctorGLM: Fine-tuning your chinese doctor is not a herculean task. arXiv preprint arXiv:2304.01097 (2023)"},{"key":"14_CR43","doi-asserted-by":"crossref","unstructured":"Xu, X., et al.: Predictive modeling of clinical events with mutual enhancement between longitudinal patient records and medical knowledge graph. In: Proceedings of IEEE International Conference on Data Mining (ICDM), pp. 777\u2013786 (2021)","DOI":"10.1109\/ICDM51629.2021.00089"},{"key":"14_CR44","unstructured":"Yalunin, A., Nesterov, A., Umerenkov, D.: RuBioRoBERTa: a pre-trained biomedical language model for Russian language biomedical text mining. arXiv preprint arXiv:2204.03951 (2022)"},{"key":"14_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, H., et\u00a0al.: HuatuoGPT, towards taming language model to be a doctor. arXiv preprint arXiv:2305.15075 (2023)","DOI":"10.18653\/v1\/2023.findings-emnlp.725"},{"key":"14_CR46","unstructured":"Zhang, T., Kishore, V., Wu, F., Weinberger, K.Q., Artzi, Y.: BERTScore: Evaluating text generation with BERT. arXiv preprint arXiv:1904.09675 (2020)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Applied Data Science Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70381-2_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T10:10:09Z","timestamp":1725185409000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70381-2_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031703805","9783031703812"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70381-2_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"22 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vilnius","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lithuania","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2024.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}