{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T10:08:44Z","timestamp":1775815724034,"version":"3.50.1"},"reference-count":74,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T00:00:00Z","timestamp":1760054400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T00:00:00Z","timestamp":1760054400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s10664-025-10701-6","type":"journal-article","created":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T12:34:14Z","timestamp":1760099654000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["LogEval: A comprehensive benchmark suite for LLMs in log analysis"],"prefix":"10.1007","volume":"30","author":[{"given":"Tianyu","family":"Cui","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shiyu","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ziang","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tong","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chenyu","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shimin","family":"Tao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yilun","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0330-0028","authenticated-orcid":false,"given":"Shenglin","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Duoming","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Changchang","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuzhe","family":"Cai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weibin","family":"Meng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yongqian","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dan","family":"Pei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,10]]},"reference":[{"key":"10701_CR1","doi-asserted-by":"publisher","unstructured":"Cito J, Leitner P, Fritz T, Gall HC (2015) The making of cloud applications: An empirical study on software development for the cloud. In: Proceedings of the 2015 10th joint meeting on foundations of software engineering. ESEC\/FSE 2015, Association for Computing Machinery New York NY USA, pp 393\u2013403. https:\/\/doi.org\/10.1145\/2786805.2786826","DOI":"10.1145\/2786805.2786826"},{"key":"10701_CR2","doi-asserted-by":"publisher","unstructured":"Li Y, Jiang ZMJ, Li H, Hassan AE, He C, Huang R, Zeng Z, Wang M, Chen P (2020) Predicting node failures in an ultra-large-scale cloud computing platform: An aiops solution. ACM Trans Softw Eng Methodol 29 (2). https:\/\/doi.org\/10.1145\/3385187","DOI":"10.1145\/3385187"},{"key":"10701_CR3","doi-asserted-by":"publisher","unstructured":"Zhang X, Xu Y, Qin S, He S, Qiao B, Li Z, Zhang H, Li X, Dang Y, Lin Q, Chintalapati M, Rajmohan S, Zhang D.: Onion: identifying incident-indicating logs for cloud systems. In: Proceedings of the 29th ACM joint meeting on european software engineering conference and symposium on the foundations of software engineering. ESEC\/FSE 2021, Association for Computing Machinery New York NY USA, pp 1253\u20131263. https:\/\/doi.org\/10.1145\/3468264.3473919","DOI":"10.1145\/3468264.3473919"},{"key":"10701_CR4","doi-asserted-by":"publisher","unstructured":"Nedelkoski S, Bogatinovski J, Acker A, Cardoso J, Kao O (2020) Self-attentive classification-based anomaly detection in unstructured logs. In: 2020 IEEE international conference on data mining (ICDM), pp 1196\u20131201. https:\/\/doi.org\/10.1109\/ICDM50108.2020.00148","DOI":"10.1109\/ICDM50108.2020.00148"},{"key":"10701_CR5","doi-asserted-by":"publisher","unstructured":"Wang J, Chu G, Wang J, Sun H, Qi Q, Wang Y, Qi J, Liao J (2024) Logexpert: Log-based recommended resolutions generation using large language model, pp 42\u201346. https:\/\/doi.org\/10.1145\/3639476.3639773","DOI":"10.1145\/3639476.3639773"},{"key":"10701_CR6","doi-asserted-by":"publisher","unstructured":"Zhong A, Mo D, Liu G, Liu J, Lu Q, Zhou Q, Wu J, Li Q, Wen Q (2024) Logparser-llm: Advancing efficient log parsing with large language models. In: Proceedings of the 30th ACM SIGKDD conference on knowledge discovery and data mining. KDD \u201924, Association for Computing Machinery, New York NY USA, pp 4559\u20134570. https:\/\/doi.org\/10.1145\/3637528.3671810","DOI":"10.1145\/3637528.3671810"},{"key":"10701_CR7","doi-asserted-by":"publisher","unstructured":"Locke S, Li H, Chen THP, Shang W, Liu W (2022) Logassist: Assisting log analysis through log summarization. IEEE Trans Softw Eng 48(9):3227\u20133241. https:\/\/doi.org\/10.1109\/TSE.2021.3083715","DOI":"10.1109\/TSE.2021.3083715"},{"key":"10701_CR8","doi-asserted-by":"publisher","unstructured":"Ma L, Yang W, Xu B, Jiang S, Fei B, Liang J, Zhou M, Xiao Y (2024) Knowlog: Knowledge enhanced pre-trained language model for log understanding. In: ICSE, pp 32\u201313213. https:\/\/doi.org\/10.1145\/3597503.3623304","DOI":"10.1145\/3597503.3623304"},{"key":"10701_CR9","doi-asserted-by":"crossref","unstructured":"Lin Q, Zhang H, Lou JG, Zhang Y, Chen X (2016) Log clustering based problem identification for online service systems. In: 2016 IEEE\/ACM 38th international conference on software engineering companion (ICSE-C), pp 102\u2013111","DOI":"10.1145\/2889160.2889232"},{"issue":"8","key":"10701_CR10","doi-asserted-by":"publisher","first-page":"861","DOI":"10.1016\/j.patrec.2005.10.010","volume":"27","author":"T Fawcett","year":"2006","unstructured":"Fawcett T (2006) An introduction to roc analysis. Patt Recognit Lett 27(8):861\u2013874","journal-title":"Patt Recognit Lett"},{"key":"10701_CR11","doi-asserted-by":"publisher","unstructured":"Meng W, Liu Y, Zaiter F, Zhang S, Chen Y, Zhang Y, Zhu Y, Wang E, Zhang R, Tao S, Yang D, Zhou R, Pei D (2020) Logparse: Making log parsing adaptive through word classification. In: 2020 29th international conference on computer communications and networks (ICCCN), pp 1\u20139. https:\/\/doi.org\/10.1109\/ICCCN49398.2020.9209681","DOI":"10.1109\/ICCCN49398.2020.9209681"},{"issue":"6","key":"10701_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3460345","volume":"54","author":"S He","year":"2021","unstructured":"He S, He P, Chen Z, Yang T, Su Y, Lyu MR (2021) A survey on automated log analysis for reliability engineering. ACM Comput Surv 54(6):1\u201337","journal-title":"ACM Comput Surv"},{"key":"10701_CR13","doi-asserted-by":"publisher","unstructured":"Liu Y, Zhang X, He S, Zhang H, Li L, Kang Y, Xu Y, Ma M, Lin, Q, Dang Y, Rajmohan S, Zhang D.: Uniparser: A unified log parser for heterogeneous log data. In: Proceedings of the ACM Web Conference 2022. WWW \u201922, pp 1893\u20131901. Association for Computing Machinery New York NY USA (2022). https:\/\/doi.org\/10.1145\/3485447.3511993","DOI":"10.1145\/3485447.3511993"},{"key":"10701_CR14","doi-asserted-by":"publisher","unstructured":"Cousti\u00e9 O, Mothe J, Teste O, Baril X (2020) Meting: A robust log parser based on frequent n-gram mining, pp 84\u201388. https:\/\/doi.org\/10.1109\/ICWS49710.2020.00018","DOI":"10.1109\/ICWS49710.2020.00018"},{"key":"10701_CR15","doi-asserted-by":"publisher","unstructured":"Le VH, Zhang H (2023) Log parsing with prompt-based few-shot learning. In: 2023 IEEE\/ACM 45th international conference on software engineering (ICSE) pp. 2438\u20132449. https:\/\/doi.org\/10.1109\/ICSE48619.2023.00204","DOI":"10.1109\/ICSE48619.2023.00204"},{"key":"10701_CR16","doi-asserted-by":"publisher","unstructured":"Xiao T, Quan Z, Wang ZJ, Zhao K, Liao X (2020) Lpv: A log parser based on vectorization for offline and online log parsing. In: 2020 IEEE international conference on data mining (ICDM), pp 1346\u20131351. https:\/\/doi.org\/10.1109\/ICDM50108.2020.00175","DOI":"10.1109\/ICDM50108.2020.00175"},{"key":"10701_CR17","doi-asserted-by":"publisher","unstructured":"Zhu J, He S, Liu J, He P, Xie Q, Zheng Z, Lyu MR (2019) Tools and benchmarks for automated log parsing. In: 2019 IEEE\/ACM 41st international conference on software engineering: Software engineering in practice (ICSE-SEIP), pp 121\u2013130. https:\/\/doi.org\/10.1109\/ICSE-SEIP.2019.00021","DOI":"10.1109\/ICSE-SEIP.2019.00021"},{"key":"10701_CR18","doi-asserted-by":"publisher","unstructured":"Wang X, Zhang X, Li L, He S, Zhang H, Liu Y, Zheng L, Kang Y, Lin Q, Dang Y, Rajmohan S, Zhang D (2022) Spine: a scalable log parser with feedback guidance. In: Proceedings of the 30th ACM joint european software engineering conference and symposium on the foundations of software engineering. ESEC\/FSE 2022, Association for Computing Machinery New York NY USA, pp 1198\u20131208. https:\/\/doi.org\/10.1145\/3540250.3549176","DOI":"10.1145\/3540250.3549176"},{"key":"10701_CR19","doi-asserted-by":"publisher","unstructured":"Du M, Li F, Zheng G, Srikumar V (2017) Deeplog: Anomaly detection and diagnosis from system logs through deep learning. In: Proceedings of the 2017 ACM SIGSAC conference on computer and communications security. CCS \u201917, Association for Computing Machinery New York NY USA, pp. 1285\u20131298. https:\/\/doi.org\/10.1145\/3133956.3134015","DOI":"10.1145\/3133956.3134015"},{"issue":"3","key":"10701_CR20","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1007\/s10922-024-09831-x","volume":"32","author":"E Karlsen","year":"2024","unstructured":"Karlsen E, Luo X, Zincir-Heywood N, Heywood M (2024) Benchmarking large language models for log analysis security and interpretation. J Netw Syst Manag 32(3):59","journal-title":"J Netw Syst Manag"},{"key":"10701_CR21","doi-asserted-by":"publisher","unstructured":"Guo H, Yuan S, Wu X (2021) Logbert: Log anomaly detection via bert. In: 2021 international joint conference on neural networks (IJCNN), pp 1\u20138. https:\/\/doi.org\/10.1109\/IJCNN52387.2021.9534113","DOI":"10.1109\/IJCNN52387.2021.9534113"},{"key":"10701_CR22","doi-asserted-by":"publisher","unstructured":"Le VH, Zhang H (2022) Log-based anomaly detection with deep learning: how far are we? In: Proceedings of the 44th international conference on software engineering. ICSE \u201922, Association for Computing Machinery, New York NY USA, pp 1356\u20131367. https:\/\/doi.org\/10.1145\/3510003.3510155","DOI":"10.1145\/3510003.3510155"},{"key":"10701_CR23","doi-asserted-by":"publisher","unstructured":"Zhao N, Wang H, Li Z, Peng X, Wang G, Pan Z, Wu Y, Feng Z, Wen X, Zhang W, Sui K, Pei D (2021) An empirical investigation of practical log anomaly detection for online service systems. ESEC\/FSE 2021. Association for Computing Machinery New York NY USA. https:\/\/doi.org\/10.1145\/3468264.3473933","DOI":"10.1145\/3468264.3473933"},{"key":"10701_CR24","doi-asserted-by":"publisher","unstructured":"Zhang X, Xu Y, Lin Q, Qiao B, Zhang H, Dang Y, Xie C, Yang X, Cheng Q, Li Z, Chen J, He X, Yao R, Lou JG, Chintalapati M, Shen F, Zhang D (2019) Robust log-based anomaly detection on unstable log data. In: Proceedings of the 2019 27th ACM joint meeting on european software engineering conference and symposium on the foundations of software engineering. ESEC\/FSE 2019, Association for Computing Machinery New York NY USA, pp 807\u2013817. https:\/\/doi.org\/10.1145\/3338906.3338931","DOI":"10.1145\/3338906.3338931"},{"key":"10701_CR25","doi-asserted-by":"publisher","unstructured":"Du Q, Zhao L, Xu J, Han Y, Zhang S (2021) Log-based anomaly detection with multi-head scaled dot-product attention mechanism, pp 335\u2013347. https:\/\/doi.org\/10.1007\/978-3-030-86472-9_31","DOI":"10.1007\/978-3-030-86472-9_31"},{"key":"10701_CR26","volume-title":"Rouge: A package for automatic evaluation of summaries","author":"CY Lin","year":"2004","unstructured":"Lin CY (2004) Rouge: A package for automatic evaluation of summaries. In text summarization branches out. association for computational linguistics, Association for Computational Linguistics Barcelona Spain"},{"key":"10701_CR27","doi-asserted-by":"publisher","unstructured":"Zhou X, Peng X, Xie T, Sun J, Ji C, Liu D, Xiang Q, He C (2019) Latent error prediction and fault localization for microservice applications by learning from system trace logs. In: Proceedings of the 2019 27th ACM joint meeting on european software engineering conference and symposium on the foundations of software engineering. ESEC\/FSE 2019, Association for Computing Machinery New York NY USA, pp 683\u2013694. https:\/\/doi.org\/10.1145\/3338906.3338961","DOI":"10.1145\/3338906.3338961"},{"key":"10701_CR28","doi-asserted-by":"publisher","unstructured":"He S, Lin Q, Lou JG, Zhang H, Lyu MR, Zhang D (2018) Identifying impactful service system problems via log analysis. In: Proceedings of the 2018 26th ACM joint meeting on european software engineering conference and symposium on the foundations of software engineering. ESEC\/FSE 2018, Association for Computing Machinery New York NY USA, pp 60\u201370. https:\/\/doi.org\/10.1145\/3236024.3236083","DOI":"10.1145\/3236024.3236083"},{"key":"10701_CR29","doi-asserted-by":"publisher","unstructured":"Liu Y, Yang H, Zhao P, Ma M, Wen C, Zhang H, Luo C, Lin Q, Yi C, Wang J, Zhang C, Wang P, Dang Y, Rajmohan S, Zhang D (2022) Multi-task hierarchical classification for disk failure prediction in online service systems. In: Proceedings of the 28th ACM SIGKDD conference on knowledge discovery and data mining. KDD \u201922, Association for Computing Machinery New York NY USA, pp 3438\u20133446. https:\/\/doi.org\/10.1145\/3534678.3539176","DOI":"10.1145\/3534678.3539176"},{"key":"10701_CR30","doi-asserted-by":"publisher","unstructured":"Ma M, Liu Y, Tong Y, Li H, Zhao P, Xu Y, Zhang H, He S, Wang L, Dang Y, Rajmohan S, Lin Q (2022) An empirical investigation of missing data handling in cloud node failure prediction. In: Proceedings of the 30th ACM Joint european software engineering conference and symposium on the foundations of software engineering. ESEC\/FSE 2022, Association for Computing Machinery New York NY USA, pp 1453\u20131464. https:\/\/doi.org\/10.1145\/3540250.3558946","DOI":"10.1145\/3540250.3558946"},{"key":"10701_CR31","doi-asserted-by":"publisher","unstructured":"Luo C, Zhao P, Qiao B, Wu Y, Zhang H, Wu W, Lu W, Dang Y, Rajmohan S, Lin Q, Zhang D (2021) Ntam: Neighborhood-temporal attention model for disk failure prediction in cloud platforms. In: Proceedings of the web conference 2021. WWW \u201921, Association for Computing Machinery New York NY USA, pp 1181\u20131191. https:\/\/doi.org\/10.1145\/3442381.3449867","DOI":"10.1145\/3442381.3449867"},{"key":"10701_CR32","doi-asserted-by":"publisher","unstructured":"Zhou P, Wang Y, Li Z, Wang X, Tyson G, Xie G (2020) Logsayer: Log pattern-driven cloud component anomaly diagnosis with machine learning, pp 1\u201310 . https:\/\/doi.org\/10.1109\/IWQoS49365.2020.9212954","DOI":"10.1109\/IWQoS49365.2020.9212954"},{"key":"10701_CR33","doi-asserted-by":"publisher","unstructured":"Meng W, Zaiter F, Zhang Y, Liu Y, Zhang S, Tao S, Zhu Y, Han T, Zhao Y, Wang E, Zhang Y, Pei D (2023) Logsummary: Unstructured log summarization for software systems. IEEE Trans Netw Serv Manag 20(3):3803\u20133815. https:\/\/doi.org\/10.1109\/TNSM.2023.3236994","DOI":"10.1109\/TNSM.2023.3236994"},{"key":"10701_CR34","doi-asserted-by":"publisher","unstructured":"Sui Y, Zhang Y, Sun J, Xu T, Zhang S, Li Z, Sun Y, Guo F, Shen J, Zhang Y, Pei D, Yang X, Yu L (2023) Logkg: Log failure diagnosis through knowledge graph. IEEE Trans Serv Comput16(5):3493\u20133507. https:\/\/doi.org\/10.1109\/TSC.2023.3293890","DOI":"10.1109\/TSC.2023.3293890"},{"key":"10701_CR35","doi-asserted-by":"publisher","unstructured":"Liu F, Wen Y, Zhang D, Jiang X, Xing X, Meng D (2019) Log2vec: A heterogeneous graph embedding based approach for detecting cyber threats within enterprise. In: Proceedings of the 2019 ACM SIGSAC conference on computer and communications security. CCS \u201919, Association for Computing Machinery New York NY USA, pp 1777\u20131794. https:\/\/doi.org\/10.1145\/3319535.3363224","DOI":"10.1145\/3319535.3363224"},{"issue":"9","key":"10701_CR36","doi-asserted-by":"publisher","first-page":"3227","DOI":"10.1109\/TSE.2021.3083715","volume":"48","author":"S Locke","year":"2022","unstructured":"Locke S, Li H, Chen THP, Shang W, Liu W (2022) Logassist: Assisting log analysis through log summarization. IEEE Trans Softw Eng 48(9):3227\u20133241. https:\/\/doi.org\/10.1109\/TSE.2021.3083715","journal-title":"IEEE Trans Softw Eng"},{"key":"10701_CR37","doi-asserted-by":"crossref","unstructured":"He M, Jia T, Duan C, Cai H, Li Y, Huang G (2024) Llmelog: An approach for anomaly detection based on llm-enriched log events. In: 2024 IEEE 35th international symposium on software reliability engineering (ISSRE), IEEE, pp 132\u2013143","DOI":"10.1109\/ISSRE62328.2024.00023"},{"key":"10701_CR38","doi-asserted-by":"crossref","unstructured":"Liu Y, Tao S, Meng W, Wang J, Ma W, Chen Y, Zhao Y, Yang H, Jiang Y (2024) Interpretable online log analysis using large language models with prompt strategies. In: Proceedings of the 32nd IEEE\/ACM international conference on program comprehension, pp 35\u201346","DOI":"10.1145\/3643916.3644408"},{"key":"10701_CR39","unstructured":"OpenAI Achiam J, Adler S, Agarwal S, Ahmad L, Akkaya I, Aleman, FL, Almeida D, Altenschmidt J, Altman S, Anadkat S, Avila R, Babuschkin I, Balaji S et al (2024) GPT-4 technical report. arXiv:2303.08774"},{"issue":"3","key":"10701_CR40","doi-asserted-by":"publisher","first-page":"3803","DOI":"10.1109\/TNSM.2023.3236994","volume":"20","author":"W Meng","year":"2023","unstructured":"Meng W, Zaiter F, Zhang Y, Liu Y, Zhang S, Tao S, Zhu Y, Han T, Zhao Y, Wang E, Zhang Y, Pei D (2023) Logsummary: Unstructured log summarization for software systems. IEEE Trans Netw Serv Manag 20(3):3803\u20133815. https:\/\/doi.org\/10.1109\/TNSM.2023.3236994","journal-title":"IEEE Trans Netw Serv Manag"},{"key":"10701_CR41","unstructured":"THUDM (2024) Thudm\/chatglm4. https:\/\/github.com\/THUDM\/ChatGLM4"},{"key":"10701_CR42","unstructured":"Bai J, Bai S, Chu Y, Cui Z, Dang K, Deng X, Fan Y, Ge W, Han Y, Huang F, Hui B, Ji L, Li M et al (2023) Qwen Technical Report . arXiv:2309.16609"},{"key":"10701_CR43","doi-asserted-by":"crossref","unstructured":"Fawcett T (2006) An introduction to roc analysis. Patt Recognit Lett 27(8):861\u2013874","DOI":"10.1016\/j.patrec.2005.10.010"},{"key":"10701_CR44","unstructured":"Lin CY (2004) Rouge: A package for automatic evaluation of summaries. In text summarization branches out. association for computational linguistics. Association for Computational Linguistics Barcelona Spain"},{"key":"10701_CR45","doi-asserted-by":"crossref","unstructured":"Papineni K, Roukos S, Ward T, Zhu WJ (2002) Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the association for computational linguistics, Association for Computational Linguistics Philadelphia Pennsylvania USA","DOI":"10.3115\/1073083.1073135"},{"key":"10701_CR46","unstructured":"Liang P, Bommasani R, Lee T, Tsipras D, Soylu D, Yasunaga M, Zhang Y, Narayanan D, Wu Y, Kumar A et al (2022) Holistic evaluation of language models. arXiv e-prints"},{"key":"10701_CR47","unstructured":"Srivastava A, Rastogi A, Rao A, Shoeb AAM, Abid A, Fisch A, Brown AR, Santoro A, Gupta A, Garriga-Alonso A, et al (2022) Beyond the imitation game: Quantifying and extrapolating the capabilities of language models. arXiv e-prints"},{"key":"10701_CR48","unstructured":"Zhang L, Cai W, Liu Z, Yang Z, Dai W, Liao Y, Qin Q, Li Y, Liu X, Liu Z et al (2023) Fineval: A chinese financial domain knowledge evaluation benchmark for large language models. arXiv e-prints"},{"key":"10701_CR49","doi-asserted-by":"crossref","unstructured":"Singhal K, Azizi S, Tu T, Mahdavi SS, Wei J, Chung HW, Scales N, Tanwani A, Cole-Lewis H, Pfohl S, et al.: Large language models encode clinical knowledge. Nat 620(7972):172\u2013180","DOI":"10.1038\/s41586-023-06291-2"},{"key":"10701_CR50","unstructured":"Li J, Wang X, Wu X, Zhang Z, Xu X, Fu J, Tiwari P, Wan X, Wang B (2023) Huatuo-26m,a large-scale chinese medical qa dataset. arXiv e-prints"},{"key":"10701_CR51","unstructured":"Miao Y, Bai Y, Li\u00a0Chen HS, Dan\u00a0Li Wang X, Luo Z, Sun D, Xu X, Zhang Q, Xiang C, Li, X (2023) An empirical study of netops capability of pre-trained large language models. arXiv e-prints"},{"key":"10701_CR52","unstructured":"Liu Y, Pei C, Xu L, Chen B, Sun M, Zhang Z, Sun Y, Zhang S, Wang K, Zhang H, Li J, Xie G, Wen X, Nie X, Ma M, Pei D (2023) Opseval: A comprehensive it operations benchmark suite for large language models. arXiv e-prints"},{"key":"10701_CR53","doi-asserted-by":"crossref","unstructured":"Silva A, Monperrus M (2024) Repairbench: Leaderboard of frontier models for program repair. arXiv preprint arXiv:2409.18952","DOI":"10.1109\/LLM4Code66737.2025.00006"},{"key":"10701_CR54","doi-asserted-by":"crossref","unstructured":"Jiang Z, Liu J, Chen Z, Li Y, Huang J, Huo Y, He P, Gu J, Lyu MR (2024) Lilac: Log parsing using llms with adaptive parsing cache. Proc ACM Softw Eng 1(FSE):137\u2013160","DOI":"10.1145\/3643733"},{"issue":"5","key":"10701_CR55","doi-asserted-by":"publisher","first-page":"3493","DOI":"10.1109\/TSC.2023.3293890","volume":"16","author":"Y Sui","year":"2023","unstructured":"Sui Y, Zhang Y, Sun J, Xu T, Zhang S, Li Z, Sun Y, Guo F, Shen J, Zhang Y, Pei D, Yang X, Yu L (2023) Logkg: Log failure diagnosis through knowledge graph. IEEE Trans Serv Comput 16(5):3493\u20133507. https:\/\/doi.org\/10.1109\/TSC.2023.3293890","journal-title":"IEEE Trans Serv Comput"},{"key":"10701_CR56","unstructured":"Zhang W, Cheng X, Zhang Y, Yang J, Guo H, Li Z, Yin X, Guan X, Shi X, Zheng L et al (2024) Eclipse: Semantic entropy-lcs for cross-lingual industrial log parsing. arXiv preprint arXiv:2405.13548"},{"key":"10701_CR57","unstructured":"Liu J, Huang J, Huo Y, Jiang Z, Gu J, Chen Z, Feng C, Yan M, Lyu MR (2023) Scalable and adaptive log-based anomaly detection with expert in the loop. arXiv preprint arXiv:2306.05032"},{"key":"10701_CR58","doi-asserted-by":"crossref","unstructured":"Qi J, Huang S, Luan Z, Yang S, Fung C, Yang H, Qian D, Shang J, Xiao Z, Wu Z (2023) Loggpt: Exploring chatgpt for log-based anomaly detection. In: 2023 IEEE international conference on high performance computing & communications data science & systems smart city & dependability in sensor cloud & big data systems & application (HPCC\/DSS\/SmartCity\/DependSys), IEEE, pp 273\u2013280","DOI":"10.1109\/HPCC-DSS-SmartCity-DependSys60770.2023.00045"},{"key":"10701_CR59","doi-asserted-by":"crossref","unstructured":"Shan S, Huo Y, Su Y, Li Y, Li D, Zheng Z (2024) Face it yourselves: An llm-based two-stage strategy to localize configuration errors via logs. In: Proceedings of the 33rd ACM SIGSOFT international symposium on software testing and analysis, pp 13\u201325","DOI":"10.1145\/3650212.3652106"},{"key":"10701_CR60","doi-asserted-by":"crossref","unstructured":"Xu J, Cui Z, Zhao Y, Zhang X, He S, He P, Li L, Kang Y, Lin Q, Dang Y et al (2024) Unilog: Automatic logging via llm and in-context learning. In: Proceedings of the 46th IEEE\/ACM international conference on software engineering, pp 1\u201312","DOI":"10.1145\/3597503.3623326"},{"key":"10701_CR61","doi-asserted-by":"crossref","unstructured":"Karlsen E, Luo X, Zincir-Heywood N, Heywood M (2024) Benchmarking large language models for log analysis security and interpretation. J Netw Syst Manag 32(3):59","DOI":"10.1007\/s10922-024-09831-x"},{"key":"10701_CR62","unstructured":"Radford A, Wu J, Child R, Luan D, Amodei D, Sutskever I (2019) Language models are unsupervised multitask learners. https:\/\/api.semanticscholar.org\/CorpusID:160025533"},{"key":"10701_CR63","doi-asserted-by":"crossref","unstructured":"Block J, Chen YP, Budharapu A, Anthony L, Dorr B (2023) Summary cycles: Exploring the impact of prompt engineering on large language models\u2019 interaction with interaction log information. In: Proceedings of the 4th workshop on evaluation and comparison of NLP systems, pp 85\u201399","DOI":"10.18653\/v1\/2023.eval4nlp-1.7"},{"key":"10701_CR64","doi-asserted-by":"publisher","unstructured":"Rosado T, Bernardino J (2014) An overview of openstack architecture. In: Proceedings of the 18th international database engineering & applications symposium. IDEAS \u201914, Association for Computing Machinery New York NY USA, pp 366\u2013367. https:\/\/doi.org\/10.1145\/2628194.2628195","DOI":"10.1145\/2628194.2628195"},{"key":"10701_CR65","doi-asserted-by":"crossref","unstructured":"Jiang Z, Liu J, Huang J, Li Y, Huo Y, Gu J, Chen Z, Zhu J, Lyu MR (2024) A large-scale evaluation for log parsing techniques: How far are we? In: Proceedings of the 33rd ACM SIGSOFT international symposium on software testing and analysis, pp 223\u2013234","DOI":"10.1145\/3650212.3652123"},{"key":"10701_CR66","unstructured":"OpenAI (2022) Introducing ChatGPT. https:\/\/openai.com\/blog\/chatgpt"},{"key":"10701_CR67","unstructured":"Anthropic (2023). https:\/\/claude.ai\/"},{"key":"10701_CR68","unstructured":"Team G, Anil R, Borgeaud S, Alayrac JB, Yu J, Soricut R, Schalkwyk J et al (2023) Gemini: A family of highly capable multimodal models. arXiv preprint arXiv:2312.11805"},{"key":"10701_CR69","unstructured":"Jiang AQ, Sablayrolles A, Mensch A, Bamford C, Chaplot DS, Casas D, Bressand F, Lengyel G, Lample G, Saulnier L, Lavaud LR, Lachaux MA, Stock P, Scao TL, Lavril T, Wang T, Lacroix T, Sayed WE (2023) Mistral 7B"},{"key":"10701_CR70","unstructured":"InternLM (2023) InternLM: A multilingual language model with progressively enhanced capabilities. https:\/\/github.com\/InternLM\/InternLM"},{"key":"10701_CR71","unstructured":"CodeFuse (2023). https:\/\/github.com\/codefuse-ai\/CodeFuse-DevOps-Model\/"},{"key":"10701_CR72","unstructured":"BAAI (2023). https:\/\/github.com\/FlagAI-Open\/Aquila2"},{"key":"10701_CR73","unstructured":"Yang A, Xiao B, Wang B, Zhang B, Bian C, Yin C, Lv C, Pan D, Wang D, Yan D, Yang F, Deng F, Wang F, Liu F et al (2023) Baichuan 2: Open large-scale language models. arXiv:2309.10305"},{"key":"10701_CR74","doi-asserted-by":"crossref","unstructured":"Liu Y, Tao S, Meng W, Yao F, Zhao X, Yang H (2024) Logprompt: Prompt engineering towards zero-shot and interpretable log analysis. In: Proceedings of the 2024 IEEE\/ACM 46th international conference on software engineering: Companion proceedings, pp 364\u2013365","DOI":"10.1145\/3639478.3643108"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-025-10701-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10664-025-10701-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-025-10701-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T02:40:45Z","timestamp":1762310445000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10664-025-10701-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,10]]},"references-count":74,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["10701"],"URL":"https:\/\/doi.org\/10.1007\/s10664-025-10701-6","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"value":"1382-3256","type":"print"},{"value":"1573-7616","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,10]]},"assertion":[{"value":"7 July 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 October 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}},{"value":"The authors declare that they have no conflict of interest.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Clinical trial number"}}],"article-number":"173"}}