{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,8]],"date-time":"2026-07-08T01:40:17Z","timestamp":1783474817925,"version":"3.55.0"},"reference-count":71,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,3,31]]},"DOI":"10.1109\/icst62969.2025.10988921","type":"proceedings-article","created":{"date-parts":[[2025,5,20]],"date-time":"2025-05-20T17:05:21Z","timestamp":1747760721000},"page":"314-325","source":"Crossref","is-referenced-by-count":2,"title":["Benchmarking Open-Source Large Language Models for Log Level Suggestion"],"prefix":"10.1109","author":[{"given":"Yi Wen","family":"Heng","sequence":"first","affiliation":[{"name":"Concordia University,Software PErformance, Analysis, and Reliability (SPEAR) lab,Montreal,Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zeyang","family":"Ma","sequence":"additional","affiliation":[{"name":"Concordia University,Software PErformance, Analysis, and Reliability (SPEAR) lab,Montreal,Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhenhao","family":"Li","sequence":"additional","affiliation":[{"name":"York University,Toronto,Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dong Jae","family":"Kim","sequence":"additional","affiliation":[{"name":"DePaul University,Chicago,Illinois,USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tse-Hsun","family":"Chen","sequence":"additional","affiliation":[{"name":"Concordia University,Software PErformance, Analysis, and Reliability (SPEAR) lab,Montreal,Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","first-page":"243","article-title":"Analytics-driven load testing: An industrial experience report on load testing of large-scale systems","volume-title":"2017 IEEE\/ACM 39th International Conference on Software Engineering: Software Engineering in Practice Track (ICSE-SEIP)","author":"Chen","year":"2017"},{"key":"ref2","article-title":"Gated graph sequence neural networks","author":"Li","year":"2015","journal-title":"arXiv: Learning"},{"key":"ref3","doi-asserted-by":"crossref","DOI":"10.1145\/3132747.3132778","article-title":"Log20: Fully automated optimal placement of log printing statements under specified overhead threshold","volume-title":"Proceedings of the 26th Symposium on Operating Systems Principles","author":"Zhao","year":"2017"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/SANER53432.2022.00051"},{"key":"ref5","doi-asserted-by":"crossref","DOI":"10.1145\/3338906.3338961","article-title":"Latent error prediction and fault localization for microservice applications by learning from system trace logs","volume-title":"Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering","author":"Zhou","year":"2019"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"2905","DOI":"10.1109\/TSE.2021.3071473","article-title":"Pathidea: Improving information retrieval-based bug localization by re-constructing execution paths using logs","volume":"48","author":"Chen","year":"2022","journal-title":"IEEE Transactions on Software Engineering"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.1145\/3460319.3464824","article-title":"Log-based slicing for system-level test cases","volume-title":"Proceedings of the 30th ACM SIGSOFT International Symposium on Software Testing and Analysis","author":"Messaoudi","year":"2021"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ASE56229.2023.00136"},{"key":"ref9","first-page":"198","article-title":"Towards providing automated supports to developers on writing logging statements","volume-title":"2020 IEEE\/ACM 42nd International Conference on Software Engineering: Companion Proceedings (ICSE-Companion)","author":"Li","year":"2020"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00079"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-88494-9_16"},{"key":"ref12","doi-asserted-by":"crossref","DOI":"10.1145\/3338906.3338931","article-title":"Robust log-based anomaly detection on unstable log data","volume-title":"Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering","author":"Zhang","year":"2019"},{"key":"ref13","doi-asserted-by":"crossref","DOI":"10.1109\/ICSE-SEIP.2019.00021","volume-title":"Tools and benchmarks for automated log parsing","author":"Zhu","year":"2019"},{"key":"ref14","first-page":"241","article-title":"Kernel-supported cost-effective audit logging for causality tracking","volume-title":"Proceedings of the 2018 USENIX Conference on Usenix Annual Technical Conference, ser. USENIX ATC \u201818","author":"Ma"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2021.3060918"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2020.2970422"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/2076450.2076466"},{"key":"ref18","article-title":"Sherlog: error diagnosis by connecting clues from run-time logs","author":"Yuan","year":"2010","journal-title":"ASPLOS XV"},{"key":"ref19","first-page":"102","article-title":"Characterizing logging practices in open-source software","volume-title":"2012 34th International Conference on Software Engineering (ICSE)","author":"Yuan"},{"key":"ref20","volume-title":"Responsible use of GitHub Copilot Chat in your IDE - GitHub Docs - docs.github.com","year":"2024"},{"key":"ref21","volume-title":"Apple Joins A Growing List Of Companies Cracking Down On Use Of ChatGPT By Staffers-Here\u2019s Why - forbes.com","author":"Ray","year":"2024"},{"key":"ref22","first-page":"02","volume-title":"Samsung Bans ChatGPT Among Employees After Sensitive Code Leak forbes.com","author":"Ray"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE43902.2021.00131"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3533767.3534379"},{"key":"ref25","article-title":"Loglevelllm repository","year":"2024","journal-title":"LogLevelLLM"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-016-9456-2"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"473","DOI":"10.1109\/TSE.2022.3154672","article-title":"An empirical study on log level prediction for multi-component systems","volume":"49","author":"Ouatiti","year":"2023","journal-title":"IEEE Transactions on Software Engineering"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3643754"},{"key":"ref29","first-page":"129","article-title":"Unilog: Automatic logging via llm and in-context learning","volume-title":"2024 IEEE\/ACM 46th International Conference on Software Engineering (ICSE)","author":"Xu"},{"key":"ref30","article-title":"Evaluating large language models trained on code","volume":"abs\/2107.03374","author":"C","year":"2021","journal-title":"ArXiv"},{"key":"ref31","article-title":"Exploring the effectiveness of llms in automated logging generation: An empirical study","volume":"abs\/2307.05950","author":"Li","year":"2023","journal-title":"ArXiv"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3639085"},{"key":"ref33","volume-title":"Soen-101: Code generation by emulating software process models using large language model agents","author":"Lin","year":"2025"},{"key":"ref34","article-title":"Reasoning runtime behavior of a program with llm: How far are we?","volume-title":"Proceedings of the IEEE\/ACM 47th International Conference on Software Engineering","author":"Chen","year":"2025"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3650212.3680359"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3639150"},{"key":"ref37","article-title":"Llmparser: A llm-based log parsing framework","volume":"abs\/2310.01796","author":"Jiang","year":"2023","journal-title":"ArXiv"},{"key":"ref38","volume-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"ref39","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"ref40","volume-title":"Roberta: A robustly optimized bert pretraining approach","author":"Liu","year":"2019"},{"key":"ref41","article-title":"Scaling laws for neural language models","volume":"abs\/2001.08361","author":"Kaplan","year":"2020","journal-title":"ArXiv"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"ref43","volume-title":"Graphcodebert: Pre-training code representations with data flow","author":"Guo","year":"2021"},{"key":"ref44","volume-title":"Code llama: Open foundation models for code","author":"Rozi\u00e8re","year":"2024"},{"key":"ref45","volume-title":"Language models are few-shot learners","author":"Brown","year":"2020"},{"key":"ref46","volume-title":"Generalizing from a few examples: A survey on few-shot learning","author":"Wang","year":"2020"},{"key":"ref47","volume-title":"Fine-tuning pretrained language models: Weight initializations, data orders, and early stopping","author":"Dodge","year":"2020"},{"key":"ref48","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/2021.acl-long.295","volume-title":"Making pre-trained language models better few-shot learners","author":"Gao","year":"2021"},{"key":"ref49","volume-title":"Lora: Low-rank adaptation of large language models","author":"Hu","year":"2021"},{"key":"ref50","first-page":"361","article-title":"Where shall we log? studying and suggesting logging locations in code blocks","volume-title":"2020 35th IEEE\/ACM International Conference on Automated Software Engineering (ASE)","author":"Li","year":"2020"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2019.00032"},{"key":"ref52","volume-title":"SLF4J FAQ","year":"2023"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.859"},{"key":"ref54","doi-asserted-by":"crossref","first-page":"64 323","DOI":"10.1109\/ACCESS.2019.2917620","article-title":"Diversity in machine learning","volume":"7","author":"Gong","year":"2018","journal-title":"IEEE Access"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICPC.2011.17"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3511561"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2019.2941943"},{"key":"ref58","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1023\/A:1010920819831","article-title":"A simple generalisation of the area under the roc curve for multiple class classification problems","volume":"45","author":"Hand","year":"2001","journal-title":"Machine Learning"},{"key":"ref59","article-title":"Openprompt: An open-source framework for prompt-learning","author":"Ding","year":"2021","journal-title":"arXiv preprint"},{"key":"ref60","volume-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2019"},{"key":"ref61","article-title":"A survey of hallucination in large foundation models","author":"Rawte","year":"2023","journal-title":"arXiv preprint"},{"key":"ref62","article-title":"In chatgpt we trust? measuring and characterizing the reliability of chatgpt","author":"Shen","year":"2023","journal-title":"arXiv preprint"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2024.3492204"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1145\/3238147.3238193"},{"key":"ref65","volume-title":"Large language models can be easily distracted by irrelevant context","author":"Shi","year":"2023"},{"key":"ref66","volume-title":"An empirical evaluation of using large language models forautomated unit test generation","author":"Sch\u00e4fer","year":"2023"},{"key":"ref67","volume-title":"Lost in the middle: How language models use long contexts","author":"Liu","year":"2023"},{"key":"ref68","volume-title":"How transferable are features in deep neural networks?","author":"Yosinski","year":"2014"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00078"},{"key":"ref70","volume-title":"Hits: High-coverage llm-based unittest generation via method slicing","author":"Wang","year":"2024"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-018-9595-8"}],"event":{"name":"2025 IEEE Conference on Software Testing, Verification and Validation (ICST)","location":"Napoli, Italy","start":{"date-parts":[[2025,3,31]]},"end":{"date-parts":[[2025,4,4]]}},"container-title":["2025 IEEE Conference on Software Testing, Verification and Validation (ICST)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10988917\/10988918\/10988921.pdf?arnumber=10988921","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T05:10:43Z","timestamp":1747804243000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10988921\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,31]]},"references-count":71,"URL":"https:\/\/doi.org\/10.1109\/icst62969.2025.10988921","relation":{},"subject":[],"published":{"date-parts":[[2025,3,31]]}}}