{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,13]],"date-time":"2026-06-13T01:58:12Z","timestamp":1781315892307,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2024YFB4505903"],"award-info":[{"award-number":["2024YFB4505903"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62202445"],"award-info":[{"award-number":["62202445"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272249"],"award-info":[{"award-number":["62272249"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62302244"],"award-info":[{"award-number":["62302244"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Beijing National Research Center For Information Science And Technology"},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["XXX-63253249"],"award-info":[{"award-number":["XXX-63253249"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,23]]},"DOI":"10.1145\/3696630.3728572","type":"proceedings-article","created":{"date-parts":[[2025,7,28]],"date-time":"2025-07-28T19:09:27Z","timestamp":1753729767000},"page":"503-513","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["OpsEval: A Comprehensive Benchmark Suite for Evaluating Large Language Models\u2019 Capability in IT Operations Domain"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-0324-7749","authenticated-orcid":false,"given":"Yuhe","family":"Liu","sequence":"first","affiliation":[{"name":"Tsinghua University, Beijing, China"},{"name":"BNRist, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9288-4787","authenticated-orcid":false,"given":"Changhua","family":"Pei","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences, Computer Network Information Center, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7364-1307","authenticated-orcid":false,"given":"Longlong","family":"Xu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"},{"name":"BNRist, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6804-7473","authenticated-orcid":false,"given":"Bohan","family":"Chen","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"},{"name":"BNRist, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4205-7182","authenticated-orcid":false,"given":"Mingze","family":"Sun","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"},{"name":"BNRist, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9186-8069","authenticated-orcid":false,"given":"Zhirui","family":"Zhang","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0266-7899","authenticated-orcid":false,"given":"Yongqian","family":"Sun","sequence":"additional","affiliation":[{"name":"Nankai University, Tianjin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0330-0028","authenticated-orcid":false,"given":"Shenglin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Nankai University, Tianjin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1971-0860","authenticated-orcid":false,"given":"Kun","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"},{"name":"BNRist, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9351-3320","authenticated-orcid":false,"given":"Haiming","family":"Zhang","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences, Computer Network Information Center, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6253-9808","authenticated-orcid":false,"given":"Jianhui","family":"Li","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences, Computer Network Information Center, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4964-1135","authenticated-orcid":false,"given":"Gaogang","family":"Xie","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences, Computer Network Information Center, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-4202-3748","authenticated-orcid":false,"given":"Xidao","family":"Wen","sequence":"additional","affiliation":[{"name":"BizSeer, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0371-854X","authenticated-orcid":false,"given":"Xiaohui","family":"Nie","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences, Computer Network Information Center, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6303-1731","authenticated-orcid":false,"given":"Minghua","family":"Ma","sequence":"additional","affiliation":[{"name":"Microsoft, Redmond, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5113-838X","authenticated-orcid":false,"given":"Dan","family":"Pei","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"},{"name":"BNRist, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,7,28]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Scale AI. 2024. SEAL Leaderboards. https:\/\/scale.com\/leaderboard Accessed: 2024-06-03."},{"key":"e_1_3_2_1_2_1","unstructured":"AI@Meta. 2024. Llama 3 Model Card. https:\/\/github.com\/meta-llama\/llama3\/blob\/main\/MODEL_CARD.md"},{"key":"e_1_3_2_1_3_1","volume-title":"Training Verifiers to Solve Math Word Problems. arXiv preprint arXiv:2110.14168","author":"Cobbe Karl","year":"2021","unstructured":"Karl Cobbe, Vineet Kosaraju, Mohammad Bavarian, Mark Chen, Heewoo Jun, Lukasz Kaiser, Matthias Plappert, Jerry Tworek, Jacob Hilton, Reiichiro Nakano, Christopher Hesse, and John Schulman. 2021. Training Verifiers to Solve Math Word Problems. arXiv preprint arXiv:2110.14168 (2021)."},{"key":"e_1_3_2_1_4_1","unstructured":"Matthijs Douze Alexandr Guzhva Chengqi Deng Jeff Johnson Gergely Szilvasy Pierre-Emmanuel Mazar\u00e9 Maria Lomeli Lucas Hosseini and Herv\u00e9 J\u00e9gou. 2024. The Faiss library. (2024). arXiv:2401.08281 [cs.LG]"},{"key":"e_1_3_2_1_5_1","volume-title":"Luis Espinosa Anke, and Steven Schockaert","author":"Es Shahul","year":"2024","unstructured":"Shahul Es, Jithin James, Luis Espinosa Anke, and Steven Schockaert. 2024. RAGAs: Automated Evaluation of Retrieval Augmented Generation. In Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations, Nikolaos Aletras and Orphee De Clercq (Eds.). Association for Computational Linguistics, St. Julians, Malta, 150\u2013158. https:\/\/aclanthology.org\/2024.eacl-demo.16"},{"key":"e_1_3_2_1_6_1","volume-title":"OWL: A Large Language Model for IT Operations. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SZOQ9RKYJu","author":"Guo Hongcheng","year":"2024","unstructured":"Hongcheng Guo, Jian Yang, Jiaheng Liu, Liqun Yang, Linzheng Chai, Jiaqi Bai, Junran Peng, Xiaorong Hu, Chao Chen, Dongfeng Zhang, xu Shi, Tieqiao Zheng, liangfan zheng, Bo Zhang, Ke Xu, and Zhoujun Li. 2024. OWL: A Large Language Model for IT Operations. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SZOQ9RKYJu"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Hendrycks Dan","year":"2021","unstructured":"Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt. 2021. Measuring Massive Multitask Language Understanding. Proceedings of the International Conference on Learning Representations (ICLR) (2021)."},{"key":"e_1_3_2_1_8_1","unstructured":"Yuzhen Huang Yuzhuo Bai Zhihao Zhu Junlei Zhang Jinghan Zhang Tangjun Su Junteng Liu Chuancheng Lv Yikai Zhang Jiayi Lei et al. 2023. C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models. arXiv e-prints (2023) arXiv-2305."},{"key":"e_1_3_2_1_9_1","volume-title":"Jamin Shin, Sean Welleck, Graham Neubig, Moontae Lee, Kyungjae Lee, and Minjoon Seo.","author":"Kim Seungone","year":"2024","unstructured":"Seungone Kim, Juyoung Suk, Shayne Longpre, Bill Yuchen Lin, Jamin Shin, Sean Welleck, Graham Neubig, Moontae Lee, Kyungjae Lee, and Minjoon Seo. 2024. Prometheus 2: An Open Source Language Model Specialized in Evaluating Other Language Models. arXiv:2405.01535 [cs.CL]"},{"key":"e_1_3_2_1_10_1","unstructured":"Andrew Lerner. 2017. AIOps Platforms\u2014Gartner."},{"key":"e_1_3_2_1_11_1","unstructured":"Percy Liang Rishi Bommasani Tony Lee Dimitris Tsipras Dilara Soylu Michihiro Yasunaga Yian Zhang Deepak Narayanan Yuhuai Wu Ananya Kumar et al. 2022. Holistic Evaluation of Language Models. arXiv e-prints (2022) arXiv-2211."},{"key":"e_1_3_2_1_12_1","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out. Association for Computational Linguistics, Barcelona, Spain, 74\u201381. https:\/\/aclanthology.org\/W04-1013"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2309.05557"},{"key":"e_1_3_2_1_14_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_15_1","unstructured":"OpenAI. 2023. GPT-4V(ision) System Card. https:\/\/cdn.openai.com\/papers\/GPTV_System_Card.pdf"},{"key":"e_1_3_2_1_16_1","unstructured":"OpenAI. 2024. Hello GPT-4o. https:\/\/openai.com\/index\/hello-gpt-4o\/ Accessed: 2024-06-01."},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a Method for Automatic Evaluation of Machine Translation. In Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics, Philadelphia, Pennsylvania, USA, 311\u2013318. 10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_18_1","volume-title":"Instruction Tuning with GPT-4. arXiv preprint arXiv:2304.03277","author":"Peng Baolin","year":"2023","unstructured":"Baolin Peng, Chunyuan Li, Pengcheng He, Michel Galley, and Jianfeng Gao. 2023. Instruction Tuning with GPT-4. arXiv preprint arXiv:2304.03277 (2023)."},{"key":"e_1_3_2_1_19_1","unstructured":"QwenLM. 2023. QwenLM\/Qwen-7B. https:\/\/github.com\/QwenLM\/Qwen-7B"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.148"},{"key":"e_1_3_2_1_21_1","volume-title":"Nathan Scales, Ajay Tanwani, Heather Cole-Lewis, Stephen Pfohl, et al.","author":"Singhal Karan","year":"2022","unstructured":"Karan Singhal, Shekoofeh Azizi, Tao Tu, S Sara Mahdavi, Jason Wei, Hyung Won Chung, Nathan Scales, Ajay Tanwani, Heather Cole-Lewis, Stephen Pfohl, et al. 2022. Large Language Models Encode Clinical Knowledge. arXiv preprint arXiv:2212.13138 (2022)."},{"key":"e_1_3_2_1_22_1","volume-title":"Abubakar Abid, Adam Fisch, Adam R Brown, Adam Santoro, Aditya Gupta, Adri\u00e0 Garriga-Alonso, et al.","author":"Srivastava Aarohi","year":"2022","unstructured":"Aarohi Srivastava, Abhinav Rastogi, Abhishek Rao, Abu Awal Md Shoeb, Abubakar Abid, Adam Fisch, Adam R Brown, Adam Santoro, Aditya Gupta, Adri\u00e0 Garriga-Alonso, et al. 2022. Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models. arXiv e-prints (2022), arXiv-2206."},{"key":"e_1_3_2_1_23_1","volume-title":"Hashimoto","author":"Taori Rohan","year":"2023","unstructured":"Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li, Carlos Guestrin, Percy Liang, and Tatsunori B. Hashimoto. 2023. Stanford Alpaca: An Instruction-following LLaMA model. https:\/\/github.com\/tatsu-lab\/stanford_alpaca."},{"key":"e_1_3_2_1_24_1","volume-title":"Dingjie Song, Zhiyi Zhang, Zhihong Chen, Qingying Xiao, Feng Jiang, Jianquan Li, Xiang Wan, Benyou Wang, et al.","author":"Wang Xidong","year":"2023","unstructured":"Xidong Wang, Guiming Hardy Chen, Dingjie Song, Zhiyi Zhang, Zhihong Chen, Qingying Xiao, Feng Jiang, Jianquan Li, Xiang Wan, Benyou Wang, et al. 2023. CMB: A Comprehensive Medical Benchmark in Chinese. arXiv e-prints (2023), arXiv-2308."},{"key":"e_1_3_2_1_25_1","volume-title":"Aakanksha Chowdhery, and Denny Zhou.","author":"Wang Xuezhi","year":"2023","unstructured":"Xuezhi Wang, Jason Wei, Dale Schuurmans, Quoc Le, Ed Chi, Sharan Narang, Aakanksha Chowdhery, and Denny Zhou. 2023. Self-Consistency Improves Chain of Thought Reasoning in Language Models. arXiv:2203.11171 [cs.CL]"},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). Association for Computational Linguistics","author":"Wang Yizhong","year":"1865","unstructured":"Yizhong Wang, Yeganeh Kordi, and Swaroop et al. Mishra. 2023. Self-Instruct: Aligning Language Models with Self-Generated Instructions. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). Association for Computational Linguistics, Toronto, Canada, 13484\u201313508. 10.18653\/v1\/2023.acl-long.754"},{"key":"e_1_3_2_1_27_1","volume-title":"Chi, Quoc Le, and Denny Zhou","author":"Wei Jason","year":"2023","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Brian Ichter, Fei Xia, Ed Chi, Quoc Le, and Denny Zhou. 2023. Chain-of-Thought Prompting Elicits Reasoning in Large Language Models. arXiv:2201.11903 [cs.CL]"},{"key":"e_1_3_2_1_28_1","volume-title":"Skywork: A More Open Bilingual Foundation Model. arXiv:2310.19341 [cs.CL]","author":"Wei Tianwen","year":"2023","unstructured":"Tianwen Wei and et.al. 2023. Skywork: A More Open Bilingual Foundation Model. arXiv:2310.19341 [cs.CL]"},{"key":"e_1_3_2_1_29_1","unstructured":"Shitao Xiao Zheng Liu Peitian Zhang and Niklas Muennighoff. 2023. C-Pack: Packaged Resources To Advance General Chinese Embedding. arXiv:2309.07597 [cs.CL]"},{"key":"e_1_3_2_1_30_1","unstructured":"Aohan Zeng Xiao Liu Zhengxiao Du Zihan Wang Hanyu Lai Ming Ding Zhuoyi Yang Yifan Xu Wendi Zheng Xiao Xia et al. 2022. Glm-130b: An open bilingual pre-trained model. arXiv preprint arXiv:2210.02414 (2022)."}],"event":{"name":"FSE Companion '25: 33rd ACM International Conference on the Foundations of Software Engineering","location":"Clarion Hotel Trondheim Trondheim Norway","acronym":"FSE Companion '25","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering"]},"container-title":["Proceedings of the 33rd ACM International Conference on the Foundations of Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696630.3728572","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,28]],"date-time":"2025-07-28T19:17:17Z","timestamp":1753730237000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696630.3728572"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,23]]},"references-count":30,"alternative-id":["10.1145\/3696630.3728572","10.1145\/3696630"],"URL":"https:\/\/doi.org\/10.1145\/3696630.3728572","relation":{},"subject":[],"published":{"date-parts":[[2025,6,23]]},"assertion":[{"value":"2025-07-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}