{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T09:39:48Z","timestamp":1771580388272,"version":"3.50.1"},"reference-count":93,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T00:00:00Z","timestamp":1750636800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T00:00:00Z","timestamp":1750636800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"the Non-Profit Central Research Institute Fund of Chinese Academy of Medical Sciences","award":["NO. 2023-RC320-01"],"award-info":[{"award-number":["NO. 2023-RC320-01"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s10489-025-06634-8","type":"journal-article","created":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T04:38:51Z","timestamp":1750653531000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Textual similarity calculation techniques in the medical field: a retrospective review"],"prefix":"10.1007","volume":"55","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8890-2161","authenticated-orcid":false,"given":"Hongzhen","family":"Cui","sequence":"first","affiliation":[]},{"given":"Shichao","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Haoming","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Xiaoyue","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Longhao","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Meihua","family":"Piao","sequence":"additional","affiliation":[]},{"given":"Yunfeng","family":"Peng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,23]]},"reference":[{"issue":"3","key":"6634_CR1","doi-asserted-by":"crossref","first-page":"457","DOI":"10.1093\/jamia\/ocz200","volume":"27","author":"S Wu","year":"2020","unstructured":"Wu S, Roberts K, Datta S et al (2020) Deep learning in clinical natural Language processing: a methodical review[J]. J Am Med Inform Assoc 27(3):457\u2013470","journal-title":"J Am Med Inform Assoc"},{"issue":"4","key":"6634_CR2","doi-asserted-by":"crossref","first-page":"432","DOI":"10.1177\/1460458216678443","volume":"24","author":"YM Kim","year":"2018","unstructured":"Kim YM, Delen D (2018) Medical informatics research trend analysis: a text mining approach[J]. Health Inf J 24(4):432\u2013452","journal-title":"Health Inf J"},{"key":"6634_CR3","doi-asserted-by":"crossref","first-page":"e45948","DOI":"10.2196\/45948","volume":"25","author":"M Wolfien","year":"2023","unstructured":"Wolfien M, Ahmadi N, Fitzer K et al (2023) Ten topics to get started in medical informatics Research[J]. J Med Internet Res 25:e45948","journal-title":"J Med Internet Res"},{"key":"6634_CR4","doi-asserted-by":"crossref","unstructured":"Xiao W, Jing L, Xu Y et al (2021) Different data mining approaches based medical text data[J]. J Healthcare Eng 2021","DOI":"10.1155\/2021\/1285167"},{"issue":"1","key":"6634_CR5","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1093\/bib\/bbv118","volume":"18","author":"K Shameer","year":"2017","unstructured":"Shameer K, Badgeley MA, Miotto R et al (2017) Translational bioinformatics in the era of real-time biomedical, health care and wellness data streams[J]. Brief Bioinform 18(1):105\u2013124","journal-title":"Brief Bioinform"},{"key":"6634_CR6","doi-asserted-by":"crossref","first-page":"68","DOI":"10.1016\/j.ijmedinf.2017.12.003","volume":"112","author":"JT Wu","year":"2018","unstructured":"Wu JT, Dernoncourt F, Gehrmann S et al (2018) Behind the scenes: A medical natural Language processing project[J]. Int J Med Informatics 112:68\u201373","journal-title":"Int J Med Informatics"},{"key":"6634_CR7","doi-asserted-by":"crossref","unstructured":"Chen N, Ren J (2023) An EHR\u00a0data quality evaluation approach based on medical knowledge and text matching[J]. IRBM 44(5): 100782.\u00a0Elsevier BV","DOI":"10.1016\/j.irbm.2023.100782"},{"key":"6634_CR8","doi-asserted-by":"crossref","unstructured":"Jiang K, Jin G, Zhang Z et al (2024) Incorporating external knowledge for text matching model[J]. Comput Speech Language 87: 101638. Elsevier BV","DOI":"10.1016\/j.csl.2024.101638"},{"issue":"6","key":"6634_CR9","doi-asserted-by":"crossref","first-page":"1236","DOI":"10.1093\/bib\/bbx044","volume":"19","author":"R Miotto","year":"2018","unstructured":"Miotto R, Wang F, Wang S et al (2018) Deep learning for healthcare: review, opportunities and challenges[J]. Brief Bioinform 19(6):1236\u20131246","journal-title":"Brief Bioinform"},{"issue":"4","key":"6634_CR10","first-page":"1","volume":"52","author":"H Kaur","year":"2019","unstructured":"Kaur H, Pannu HS, Malhi AK (2019) A systematic review on imbalanced data challenges in machine learning: applications and solutions[J]. ACM Comput Surv (CSUR) 52(4):1\u201336","journal-title":"ACM Comput Surv (CSUR)"},{"key":"6634_CR11","doi-asserted-by":"crossref","first-page":"103957","DOI":"10.1016\/j.jbi.2021.103957","volume":"125","author":"K De Angeli","year":"2022","unstructured":"De Angeli K, Gao S, Danciu I et al (2022) Class imbalance in out-of-distribution datasets: improving the robustness of the TextCNN for the classification of rare cancer types[J]. J Biomed Inform 125:103957","journal-title":"J Biomed Inform"},{"issue":"01","key":"6634_CR12","doi-asserted-by":"crossref","first-page":"016","DOI":"10.1055\/s-0039-1677908","volume":"28","author":"F Wang","year":"2019","unstructured":"Wang F, Preininger A (2019) AI in health: state of the art, challenges, and future directions[J]. Yearb Med Inform 28(01):016\u2013026","journal-title":"Yearb Med Inform"},{"key":"6634_CR13","doi-asserted-by":"crossref","first-page":"104458","DOI":"10.1016\/j.jbi.2023.104458","volume":"144","author":"Y Ge","year":"2023","unstructured":"Ge Y, Guo Y, Das S, Al-Garadi MA, Sarker A (2023) Few-shot learning for medical text: A review of advances, trends, and opportunities. J Biomed Inform 144:104458","journal-title":"J Biomed Inform"},{"key":"6634_CR14","doi-asserted-by":"crossref","first-page":"113942","DOI":"10.1109\/ACCESS.2019.2935223","volume":"7","author":"G Wu","year":"2019","unstructured":"Wu G, Tang G, Wang Z et al (2019) An attention-based BiLSTM-CRF model for Chinese clinic named entity recognition[J]. Ieee Access 7:113942\u2013113949","journal-title":"Ieee Access"},{"issue":"4","key":"6634_CR15","doi-asserted-by":"crossref","first-page":"1234","DOI":"10.1093\/bioinformatics\/btz682","volume":"36","author":"J Lee","year":"2020","unstructured":"Lee J, Yoon W, Kim S et al (2020) BioBERT: a pre-trained biomedical Language representation model for biomedical text mining[J]. Bioinformatics 36(4):1234\u20131240","journal-title":"Bioinformatics"},{"key":"6634_CR16","doi-asserted-by":"crossref","unstructured":"Luo J, Xiong W, Du J et al (2021) Traditional Chinese medicine text similarity calculation model based on the bidirectional temporal siamese network[J]. Evid Based Complementary Altern Med 2021","DOI":"10.1155\/2021\/2337924"},{"issue":"4","key":"6634_CR17","doi-asserted-by":"crossref","first-page":"381","DOI":"10.1145\/356827.356830","volume":"12","author":"PAV Hall","year":"1980","unstructured":"Hall PAV, Dowling GR (1980) Approximate string matching[J]. ACM Comput Surv (CSUR) 12(4):381\u2013402","journal-title":"ACM Comput Surv (CSUR)"},{"issue":"13","key":"6634_CR18","first-page":"13","volume":"68","author":"WH Gomaa","year":"2013","unstructured":"Gomaa WH, Fahmy AA (2013) A survey of text similarity approaches[J]. Int J Comput Appl 68(13):13\u201318","journal-title":"Int J Comput Appl"},{"key":"6634_CR19","unstructured":"Alberto B, Paolo R, Eneko A, Gorka L (2010) Plagiarism Detection across Distant Language Pairs, In Proceedings of the 23rd International Conference on Computational Linguistics, pages 37\u201345"},{"key":"6634_CR20","doi-asserted-by":"crossref","unstructured":"So\u011fanc\u0131o\u011flu G, \u00d6zt\u00fcrk H, \u00d6zg\u00fcr A (2017) BIOSSES: a semantic sentence similarity estimation system for the biomedical domain[J]. Bioinformatics 33(14):i49\u2013i58","DOI":"10.1093\/bioinformatics\/btx238"},{"key":"6634_CR21","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1016\/j.jbi.2018.04.009","volume":"82","author":"RA Gabriel","year":"2018","unstructured":"Gabriel RA, Kuo TT, McAuley J et al (2018) Identifying and characterizing highly similar notes in big clinical note datasets[J]. J Biomed Inform 82:63\u201369","journal-title":"J Biomed Inform"},{"key":"6634_CR22","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1016\/j.ijmedinf.2019.02.003","volume":"124","author":"X Zeng","year":"2019","unstructured":"Zeng X, Jia Z, He Z et al (2019) Measure clinical drug\u2013drug similarity using electronic medical records[J]. Int J Med Informatics 124:97\u2013103","journal-title":"Int J Med Informatics"},{"issue":"Suppl 1","key":"6634_CR23","doi-asserted-by":"crossref","first-page":"17","DOI":"10.1186\/s13326-019-0216-2","volume":"10","author":"H Tissot","year":"2019","unstructured":"Tissot H, Dobson R (2019) Combining string and phonetic similarity matching to identify misspelt names of drugs in medical records written in Portuguese[J]. J Biomedical Semant 10(Suppl 1):17","journal-title":"J Biomedical Semant"},{"key":"6634_CR24","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s12911-019-1002-x","volume":"20","author":"Q Chen","year":"2020","unstructured":"Chen Q, Du J, Kim S et al (2020) Deep learning with sentence embeddings pre-trained on biomedical corpora improves the performance of finding similar sentences in electronic medical records[J]. BMC Med Inf Decis Mak 20:1\u201310","journal-title":"BMC Med Inf Decis Mak"},{"issue":"8","key":"6634_CR25","doi-asserted-by":"crossref","first-page":"717","DOI":"10.1016\/j.compbiomed.2004.06.002","volume":"35","author":"JF Wang","year":"2005","unstructured":"Wang JF, Li ZR, Cai CZ et al (2005) Assessment of approximate string matching in a biomedical text retrieval problem[J]. Comput Biol Med 35(8):717\u2013724","journal-title":"Comput Biol Med"},{"key":"6634_CR26","doi-asserted-by":"crossref","first-page":"147892","DOI":"10.1109\/ACCESS.2019.2946622","volume":"7","author":"M Cui","year":"2019","unstructured":"Cui M, Bai R, Lu Z et al (2019) Regular expression based medical text classification using constructive heuristic approach[J]. IEEE Access 7:147892\u2013147904","journal-title":"IEEE Access"},{"key":"6634_CR27","doi-asserted-by":"crossref","unstructured":"Liu J, Bai R, Lu Z et al (2020) Data-driven regular expressions evolution for medical text classification using genetic programming[C]\/\/2020 IEEE Congress on evolutionary computation (CEC). IEEE,: 1\u20138","DOI":"10.1109\/CEC48606.2020.9185500"},{"key":"6634_CR28","doi-asserted-by":"crossref","unstructured":"Tu C, Cui M (2020) Learning regular expressions for interpretable medical text classification using a pool-based simulated annealing approach[C]\/\/2020 IEEE Congress on evolutionary computation (CEC). IEEE,: 1\u20137","DOI":"10.1109\/CEC48606.2020.9185650"},{"key":"6634_CR29","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s12911-016-0389-x","volume":"17","author":"Y Zhu","year":"2017","unstructured":"Zhu Y, Yan E, Wang F (2017) Semantic relatedness and similarity of biomedical terms: examining the effects of recency, size, and section of biomedical publications on the performance of word2vec[J]. BMC Med Inf Decis Mak 17:1\u20138","journal-title":"BMC Med Inf Decis Mak"},{"key":"6634_CR30","doi-asserted-by":"crossref","first-page":"103182","DOI":"10.1016\/j.jbi.2019.103182","volume":"94","author":"J Park","year":"2019","unstructured":"Park J, Kim K, Hwang W et al (2019) Concept embedding to measure semantic relatedness for biomedical information ontologies[J]. J Biomed Inform 94:103182","journal-title":"J Biomed Inform"},{"issue":"1","key":"6634_CR31","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1145\/951787.951766","volume":"10","author":"G Salton","year":"1973","unstructured":"Salton G, Yu CT (1973) On the construction of effective vocabularies for information retrieval[J]. Acm Sigplan Notices 10(1):48\u201360","journal-title":"Acm Sigplan Notices"},{"key":"6634_CR32","doi-asserted-by":"crossref","unstructured":"Wang J, Xu W, Yan W et al (2019) Text similarity calculation method based on hybrid model of LDA and TF-IDF[C]\/\/Proceedings of the 2019 3rd International Conference on Computer Science and Artificial Intelligence.: 1\u20138","DOI":"10.1145\/3374587.3374590"},{"key":"6634_CR33","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1016\/j.knosys.2017.03.029","volume":"126","author":"J Zhang","year":"2017","unstructured":"Zhang J, Xu W, Guo J et al (2017) A Temporal model in electronic health record search[J]. Knowl Based Syst 126:56\u201367","journal-title":"Knowl Based Syst"},{"key":"6634_CR34","doi-asserted-by":"crossref","unstructured":"Islamaj R, Wilbur WJ, Xie N et al (2019) PubMed text similarity model and its application to curation efforts in the conserved domain database[J]. Database 2019: baz064","DOI":"10.1093\/database\/baz064"},{"issue":"3","key":"6634_CR35","doi-asserted-by":"crossref","first-page":"e18029","DOI":"10.1371\/journal.pone.0018029","volume":"6","author":"KW Boyack","year":"2011","unstructured":"Boyack KW, Newman D, Duhon RJ et al (2011) Clustering more than two million biomedical publications: comparing the accuracies of nine text-based similarity approaches[J]. PLoS ONE 6(3):e18029","journal-title":"PLoS ONE"},{"key":"6634_CR36","doi-asserted-by":"crossref","unstructured":"Wan Q, Xu X, Han J (2024) A dimensionality reduction method for large-scale group decision-making using TF-IDF feature similarity and information loss entropy[J]. Appl Soft Comput 150: 111039. Elsevier BV","DOI":"10.1016\/j.asoc.2023.111039"},{"key":"6634_CR37","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.artmed.2018.08.006","volume":"93","author":"S Henry","year":"2019","unstructured":"Henry S, McQuilkin A, McInnes BT (2019) Association measures for estimating semantic similarity and relatedness between biomedical concepts[J]. Artif Intell Med 93:1\u201310","journal-title":"Artif Intell Med"},{"key":"6634_CR38","doi-asserted-by":"publisher","unstructured":"Wang N, Huang Y, Liu H et al (2021) Study on the semi-supervised learning-based patient similarity from heterogeneous electronic medical records. BMC Med Inf Decis Mak 21(Suppl 2):58. https:\/\/doi.org\/10.1186\/s12911-021-01432-x","DOI":"10.1186\/s12911-021-01432-x"},{"key":"6634_CR39","doi-asserted-by":"crossref","unstructured":"Memarzadeh H, Ghadiri N, Samwald M et al (2022) A study into patient similarity through representation learning from medical records[J]. Knowl Inf Syst 64(12):3293\u20133324","DOI":"10.1007\/s10115-022-01740-2"},{"key":"6634_CR40","doi-asserted-by":"crossref","unstructured":"Satti FA, Hussain M, Ali SI et al (2023) A semantic sequence similarity based approach for extracting medical entities from clinical conversations[J]. Inf Process Manag 60(2): 103213. Elsevier BV","DOI":"10.1016\/j.ipm.2022.103213"},{"key":"6634_CR41","doi-asserted-by":"publisher","unstructured":"Blagec K, Xu H, Agibetov A et al (2019) Neural sentence embedding models for semantic similarity Estimation in the biomedical domain. BMC Bioinformatics 20:178. https:\/\/doi.org\/10.1186\/s12859-019-2789-2","DOI":"10.1186\/s12859-019-2789-2"},{"key":"6634_CR42","doi-asserted-by":"crossref","unstructured":"Hubel DH, Wiesel TN (1968) Receptive fields and functional architecture of monkey striate cortex[J]. J Physiol 195(1):215\u2013243","DOI":"10.1113\/jphysiol.1968.sp008455"},{"issue":"4","key":"6634_CR43","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1007\/BF00344251","volume":"36","author":"K Fukushima","year":"1980","unstructured":"Fukushima K, Neocognitron (1980) A self-organizing neural network model for a mechanism of pattern recognition unaffected by shift in position[J]. Biol Cybern 36(4):193\u2013202","journal-title":"Biol Cybern"},{"key":"6634_CR44","unstructured":"LeCun Y, Boser B, Denker J et al (1989) Handwritten digit recognition with a back-propagation network[J]. Adv Neural Inf Process Syst 2"},{"key":"6634_CR45","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks[J]. Adv Neural Inf Process Syst 25"},{"key":"6634_CR46","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition[J]. arXiv preprint arXiv:1409.1556"},{"key":"6634_CR47","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S et al (2016) Deep residual learning for image recognition[C]\/\/Proceedings of the IEEE conference on computer vision and pattern recognition.: 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"6634_CR48","doi-asserted-by":"crossref","unstructured":"Liao S, Wang J, Yu R et al (2017) CNN for situations Understanding based on sentiment analysis of Twitter data[J]. Procedia Comput Sci 111:376\u2013381","DOI":"10.1016\/j.procs.2017.06.037"},{"key":"6634_CR49","doi-asserted-by":"crossref","unstructured":"Guo J, Yue B, Xu G et al (2017) An enhanced convolutional neural network model for answer selection[C]\/\/Proceedings of the 26th international conference on world wide web companion.: 789\u2013790","DOI":"10.1145\/3041021.3054216"},{"key":"6634_CR50","doi-asserted-by":"publisher","unstructured":"Kim Y (2014) Convolutional neural networks for sentence Classification[J]. Eprint Arxiv. https:\/\/doi.org\/10.3115\/v1\/D14-1181","DOI":"10.3115\/v1\/D14-1181"},{"key":"6634_CR51","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s12911-018-0723-6","volume":"19","author":"T Zheng","year":"2019","unstructured":"Zheng T, Gao Y, Wang F et al (2019) Detection of medical text semantic similarity based on convolutional neural network[J]. BMC Med Inf Decis Mak 19:1\u201311","journal-title":"BMC Med Inf Decis Mak"},{"key":"6634_CR52","doi-asserted-by":"crossref","unstructured":"Grover A, Leskovec J (2016) node2vec: Scalable feature learning for networks[C]\/\/Proceedings of the 22nd ACM SIGKDD international conference on Knowledge discovery and data mining.: 855\u2013864","DOI":"10.1145\/2939672.2939754"},{"key":"6634_CR53","doi-asserted-by":"crossref","unstructured":"Ribeiro MT, Singh S, Guestrin C (2016) Why should i trust you? Explaining the predictions of any classifier[C]\/\/Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining.: 1135\u20131144","DOI":"10.1145\/2939672.2939778"},{"key":"6634_CR54","unstructured":"Wu Y, Jiang M, Xu J, Zhi D, Xu H (2018) Clinical named entity recognition using deep learning models. AMIA Annu Symp Proc 2017:1812\u20131819"},{"key":"6634_CR55","doi-asserted-by":"publisher","unstructured":"Xiong CZ, Su M (2019) IARNN-Based semantic-containing double-level embedding Bi-LSTM for question-and-answer matching. Comput Intell Neurosci 2019:6074840. https:\/\/doi.org\/10.1155\/2019\/6074840","DOI":"10.1155\/2019\/6074840"},{"key":"6634_CR56","doi-asserted-by":"publisher","unstructured":"Prabhakar SK, Won D-O (2021) Medical Text Classification Using Hybrid Deep Learning Models with Multihead Attention[J]. Comput Intell Neurosci 2021(16): 16.\u00a0https:\/\/doi.org\/10.1155\/2021\/9425655.\u00a0Article ID 9425655","DOI":"10.1155\/2021\/9425655"},{"key":"6634_CR57","unstructured":"Hongda AN, Zhengguang LI, Di WU, Wei ZHENG (2021) Attentive Siamese neural network for biomedical textual Similarity[J]. J Dalian Jiaotong Univ 42(02):109\u2013113"},{"key":"6634_CR58","doi-asserted-by":"crossref","unstructured":"Li Z, Ren J (2020) Fine-tuning ERNIE for chest abnormal imaging signs extraction[J]. J Biomed Inform 108:103492","DOI":"10.1016\/j.jbi.2020.103492"},{"key":"6634_CR59","doi-asserted-by":"crossref","unstructured":"Mutinda FW, Yada S, Wakamiya S et al (2021) Semantic textual similarity in Japanese clinical domain texts using BERT[J]. Methods Inf Med 60(S 01):e56\u2013e64","DOI":"10.1055\/s-0041-1731390"},{"key":"6634_CR60","unstructured":"Alsentzer E, Murphy J, Boag W, Weng W, Jindi D, Naumann T et al Publicly available clinical BERT embeddings. arXiv. Jun. URL: https:\/\/arxiv.org\/abs\/1904.03323. [accessed 2021-04-19]"},{"key":"6634_CR61","doi-asserted-by":"crossref","unstructured":"Liu S, Vuli\u0107 I, Korhonen A, Collier N et al (2021) Self-alignment pretraining for biomedical entity representations[C]\/\/Findings of the Association for Computational Linguistics: EMNLP. 2021: 4221\u20134233","DOI":"10.18653\/v1\/2021.naacl-main.334"},{"key":"6634_CR62","doi-asserted-by":"crossref","unstructured":"Kades K, Sellner J, Koehler G et al (2021) Adapting bidirectional encoder representations from transformers (BERT) to assess clinical semantic textual similarity: algorithm development and validation study[J]. JMIR Med Inform 9(2):e22795","DOI":"10.2196\/22795"},{"key":"6634_CR63","doi-asserted-by":"crossref","unstructured":"Hu J, Zhu Y, Wu L et al (2025) Text semantic matching algorithm based on the introduction of external knowledge under contrastive learning[J]. Int J Mach Learn Cybernet 16:741\u2013753","DOI":"10.1007\/s13042-024-02285-2"},{"key":"6634_CR64","unstructured":"Jiang M, Wu Y, Shah A et al (2014) Extracting and standardizing medication information in clinical text\u2013the MedEx-UIMA system[J]. AMIA Summits Transl Sci Proc 2014:37"},{"key":"6634_CR65","unstructured":"Lan Z et al (2019) ALBERT: A lite BERT for Self-supervised learning of Language representations. ArXiv Abs\/1909. 11942:pag"},{"key":"6634_CR66","doi-asserted-by":"crossref","unstructured":"Li J, Zhang X, Zhou X (2021) ALBERT-Based self-ensemble model with semisupervised learning and data augmentation for clinical semantic textual similarity calculation: algorithm validation study. JMIR Med Inf 9(1):e23086","DOI":"10.2196\/23086"},{"key":"6634_CR67","doi-asserted-by":"crossref","unstructured":"Bartolini I, Moscato V, Postiglione M et al (2023) Data augmentation via context similarity: An application to biomedical Named Entity Recognition[J]. Inform Syst 119: 102291.\u00a0Elsevier BV","DOI":"10.1016\/j.is.2023.102291"},{"key":"6634_CR68","unstructured":"Yang Z, Dai Z, Yang Y et al (2019) Xlnet: generalized autoregressive pretraining for Language understanding[J]. Adv Neural Inf Process Syst 32"},{"key":"6634_CR69","unstructured":"Liu Y, Ott M, Goyal N et al (2019) Roberta: A robustly optimized Bert pretraining approach[J]. arXiv preprint arXiv:1907.11692"},{"key":"6634_CR70","unstructured":"Yang X, Yu Z, Guo Y et al (2021) Clinical relation extraction using transformer-based models[J]. arXiv preprint arXiv:2107.08957"},{"key":"6634_CR71","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1007\/s40264-018-0762-z","volume":"42","author":"A Jagannatha","year":"2019","unstructured":"Jagannatha A, Liu F, Liu W et al (2019) Overview of the first natural Language processing challenge for extracting medication, indication, and adverse drug events from electronic health record notes (MADE 1.0) [J]. Drug Saf 42:99\u2013111","journal-title":"Drug Saf"},{"issue":"1","key":"6634_CR72","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1093\/jamia\/ocz166","volume":"27","author":"S Henry","year":"2020","unstructured":"Henry S, Buchan K, Filannino M et al (2020) 2018 n2c2 shared task on adverse drug events and medication extraction in electronic health records[J]. J Am Med Inform Assoc 27(1):3\u201312","journal-title":"J Am Med Inform Assoc"},{"key":"6634_CR73","doi-asserted-by":"crossref","unstructured":"Wang Y, Afzal N, Fu S et al (2020) MedSTS: a resource for clinical semantic textual similarity[J]. Lang Resour Evaluation 54:57\u201372","DOI":"10.1007\/s10579-018-9431-1"},{"key":"6634_CR74","doi-asserted-by":"crossref","unstructured":"Li M, Bi X, Wang L et al (2022) Text similarity measurement method and application of online medical community based on density peak clustering[J]. J Organizational End User Comput (JOEUC) 34(2):1\u201325","DOI":"10.4018\/JOEUC.302893"},{"key":"6634_CR75","doi-asserted-by":"crossref","unstructured":"Zhang J, Chang D (2019) Semi-supervised patient similarity clustering algorithm based on electronic medical records[J]. IEEE Access 7:90705\u201390714","DOI":"10.1109\/ACCESS.2019.2923333"},{"key":"6634_CR76","doi-asserted-by":"crossref","unstructured":"Bacco L, Dell\u2019Orletta F, Lai H et al (2023) A text style transfer system for reducing the physician\u2013patient expertise gap: An analysis with automatic and human evaluations[J]. Expert Syst Appl 233: 120874.\u00a0Elsevier BV","DOI":"10.1016\/j.eswa.2023.120874"},{"key":"6634_CR77","doi-asserted-by":"crossref","first-page":"105002","DOI":"10.1109\/ACCESS.2021.3099021","volume":"9","author":"Z Li","year":"2021","unstructured":"Li Z, Chen H, Chen H (2021) Biomedical text similarity evaluation using attention mechanism and Siamese neural network[J]. IEEE Access 9:105002\u2013105011","journal-title":"IEEE Access"},{"issue":"1","key":"6634_CR78","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1016\/j.jbi.2011.10.005","volume":"45","author":"D S\u00e1nchez","year":"2012","unstructured":"S\u00e1nchez D, Sol\u00e9-Ribalta A, Batet M et al (2012) Enabling semantic similarity Estimation across multiple ontologies: an evaluation in the biomedical domain[J]. J Biomed Inform 45(1):141\u2013155","journal-title":"J Biomed Inform"},{"issue":"2","key":"6634_CR79","doi-asserted-by":"crossref","first-page":"e12596","DOI":"10.2196\/12596","volume":"7","author":"A Arbabi","year":"2019","unstructured":"Arbabi A, Adams DR, Fidler S et al (2019) Identifying clinical terms in medical text using ontology-guided machine learning[J]. JMIR Med Inf 7(2):e12596","journal-title":"JMIR Med Inf"},{"issue":"339","key":"6634_CR80","doi-asserted-by":"crossref","first-page":"578","DOI":"10.1080\/01621459.1972.10481251","volume":"67","author":"JH Zar","year":"1972","unstructured":"Zar JH (1972) Significance testing of the spearman rank correlation coefficient[J]. J Am Stat Assoc 67(339):578\u2013580","journal-title":"J Am Stat Assoc"},{"key":"6634_CR81","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s12911-015-0129-7","volume":"15","author":"LWC Chan","year":"2015","unstructured":"Chan LWC, Liu Y, Chan T et al (2015) PubMed-supported clinical term weighting approach for improving inter-patient similarity measure in diagnosis prediction[J]. BMC Med Inf Decis Mak 15:1\u20138","journal-title":"BMC Med Inf Decis Mak"},{"issue":"09","key":"6634_CR82","doi-asserted-by":"crossref","first-page":"1844","DOI":"10.1093\/jamia\/ocae029","volume":"31","author":"F Remy","year":"2024","unstructured":"Remy F, Demuynck K, Demeester T et al (2024) BioLORD-2023: semantic textual representations fusing large Language models and clinical knowledge graph insights[J]. J Am Med Inform Assoc 31(09):1844\u20131855","journal-title":"J Am Med Inform Assoc"},{"key":"6634_CR83","doi-asserted-by":"crossref","first-page":"2431","DOI":"10.3390\/electronics13132431","volume":"13","author":"L Min","year":"2024","unstructured":"Min L, Fan Z, Dou F, Sun J, Luo C, Lv Q et al (2024) Adaption BERT for medical information processing with ChatGPT and contrastive Learning[J]. Electronics 13:2431","journal-title":"Electronics"},{"key":"6634_CR84","doi-asserted-by":"crossref","first-page":"88","DOI":"10.1007\/s10489-024-05934-9","volume":"55","author":"X Zhang","year":"2025","unstructured":"Zhang X, Zhao G, Ren Y et al (2025) Data augmented large Language models for medical record generation[J]. Appl Intell 55:88","journal-title":"Appl Intell"},{"issue":"11","key":"6634_CR85","doi-asserted-by":"crossref","first-page":"e23101","DOI":"10.2196\/23101","volume":"9","author":"D Chang","year":"2021","unstructured":"Chang D, Lin E, Brandt C et al (2021) Incorporating domain knowledge into Language models by using graph convolutional networks for assessing semantic textual similarity: model development and performance comparison[J]. JMIR Med Inf 9(11):e23101","journal-title":"JMIR Med Inf"},{"key":"6634_CR86","unstructured":"Marelli M, Menini S, Baroni M, Bentivogli L, Bernardi R, Zamparelli R et al (2014) A SICK cure for the evaluation of compositional distributional semantic models[C]\/\/Proceedings of the Ninth International Conference on Language Resources and Evaluation,: 216\u2013233"},{"issue":"1","key":"6634_CR87","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1111\/stan.12111","volume":"72","author":"A Ly","year":"2018","unstructured":"Ly A, Marsman M, Wagenmakers EJ (2018) Analytic posteriors for Pearson\u2019s correlation coefficient[J]. Stat Neerl 72(1):4\u201313","journal-title":"Stat Neerl"},{"issue":"5","key":"6634_CR88","doi-asserted-by":"crossref","first-page":"e0127428","DOI":"10.1371\/journal.pone.0127428","volume":"10","author":"J Lee","year":"2015","unstructured":"Lee J, Maslove DM, Dubin JA (2015) Personalized mortality prediction driven by electronic medical data and a patient similarity metric. PLoS ONE 10(5):e0127428","journal-title":"PLoS ONE"},{"key":"6634_CR89","doi-asserted-by":"crossref","unstructured":"Chen Q, Du J, Kim S et al (2019) Evaluation of five sentence similarity models on electronic medical records[C]\/\/Proceedings of the 10th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics.: 533\u2013533","DOI":"10.1145\/3307339.3343239"},{"key":"6634_CR90","unstructured":"Wang L, Xu W (2018) Neural network models for paraphrase identification, semantic textual similarity, natural Language inference, and question answering[J]. arXiv preprint arXiv:1806.04330"},{"issue":"11","key":"6634_CR91","doi-asserted-by":"crossref","first-page":"e19735","DOI":"10.2196\/19735","volume":"8","author":"X Yang","year":"2020","unstructured":"Yang X, He, Zhang et al (2020) Measurement of semantic textual similarity in clinical texts: comparison of transformer-based models[J]. JMIR Med Inf 8(11):e19735","journal-title":"JMIR Med Inf"},{"issue":"S1","key":"6634_CR92","first-page":"32","volume":"49","author":"YU Jiaqi","year":"2022","unstructured":"Jiaqi YU, Xiaodong KANG, Chengcheng BAI (2022) New text retrieval model of Chinese electronic medical Records[J]. Comput Sci 49(S1):32\u201338","journal-title":"Comput Sci"},{"key":"6634_CR93","doi-asserted-by":"crossref","unstructured":"Sparck Jones K (1972) A statistical interpretation of term specificity and its application in retrieval[J]. J Doc 28(1):11\u201321","DOI":"10.1108\/eb026526"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06634-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-025-06634-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06634-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T13:39:08Z","timestamp":1758289148000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-025-06634-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,23]]},"references-count":93,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["6634"],"URL":"https:\/\/doi.org\/10.1007\/s10489-025-06634-8","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,23]]},"assertion":[{"value":"9 May 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 June 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"Informed consent was obtained from all individual participants included in the study.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}},{"value":"The authors declare that there is no conflict of interest regarding the publication of this paper.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"814"}}