{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T12:18:54Z","timestamp":1779365934655,"version":"3.53.0"},"publisher-location":"New York, NY, USA","reference-count":78,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,4,12]],"date-time":"2026-04-12T00:00:00Z","timestamp":1775952000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"The Ministry of Education and Culture, Finland","award":["OKM\/14\/523\/2024"],"award-info":[{"award-number":["OKM\/14\/523\/2024"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,12]]},"DOI":"10.1145\/3786167.3788413","type":"proceedings-article","created":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T11:40:19Z","timestamp":1779363619000},"page":"112-119","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Not All Problems Are Nails, Not All Tools Should Be Hammers: A Position Paper on Agent Usage in Software Engineering Tasks"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-8269-5645","authenticated-orcid":false,"given":"Juuso","family":"Rytilahti","sequence":"first","affiliation":[{"name":"University of Turku, Turku, Finland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6418-1063","authenticated-orcid":false,"given":"Panu","family":"Puhtila","sequence":"additional","affiliation":[{"name":"University of Turku, Turku, Finland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1684-239X","authenticated-orcid":false,"given":"Oshani","family":"Weerakoon","sequence":"additional","affiliation":[{"name":"University of Turku, Turku, Finland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2407-9492","authenticated-orcid":false,"given":"Erkki","family":"Kaila","sequence":"additional","affiliation":[{"name":"University of Turku, Turku, Finland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8799-185X","authenticated-orcid":false,"given":"Tuomas","family":"M\u00e4kil\u00e4","sequence":"additional","affiliation":[{"name":"University of Turku, Turku, Finland"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,5,21]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","unstructured":"Adam Alami Victor Jensen and Neil Ernst. 2025. Accountability in Code Review: The Role of Intrinsic Driversand the Impact of LLMs. ACM Trans. Softw. Eng. Methodol. 34 8 Article 233 (Oct. 2025) 44\u00a0pages. 10.1145\/3721127","DOI":"10.1145\/3721127"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"crossref","unstructured":"Mahmoud Alfadel Diego\u00a0Elias Costa and Emad Shihab. 2023. Empirical analysis of security vulnerabilities in python packages. Empirical Software Engineering 28 3 (2023) 59.","DOI":"10.1007\/s10664-022-10278-4"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.744"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-78623-5_9"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/3644815.3644945"},{"key":"e_1_3_3_2_7_2","unstructured":"Joel Becker Nate Rush Elizabeth Barnes and David Rein. 2025. Measuring the Impact of Early-2025 AI on Experienced Open-Source Developer Productivity. arxiv:https:\/\/arXiv.org\/abs\/2507.09089\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2507.09089"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-032-07938-1_28"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"crossref","unstructured":"Alexander\u00a0C Bock and Ulrich Frank. 2021. Low-code platform. Business & Information Systems Engineering 63 6 (2021) 733\u2013740.","DOI":"10.1007\/s12599-021-00726-8"},{"key":"e_1_3_3_2_10_2","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared\u00a0D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et\u00a0al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877\u20131901."},{"key":"e_1_3_3_2_11_2","unstructured":"Chi-Min Chan Weize Chen Yusheng Su Jianxuan Yu Wei Xue Shanghang Zhang Jie Fu and Zhiyuan Liu. 2023. ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate. arxiv:https:\/\/arXiv.org\/abs\/2308.07201\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2308.07201"},{"key":"e_1_3_3_2_12_2","unstructured":"Linyao Chen Zimian Peng Yingxuan Yang Yikun Wang Wenzheng\u00a0Tom Tang Hiroki\u00a0H. Kobayashi and Weinan Zhang. 2025. EnvX: Agentize Everything with Agentic AI. arxiv:https:\/\/arXiv.org\/abs\/2509.08088\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2509.08088"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","unstructured":"Minyang Chow and Olivia Ng. 2025. From technology adopters to creators: Leveraging AI-assisted vibe coding to transform clinical teaching and learning. Medical Teacher 47 12 (2025) 1927\u20131929. arXiv:10.1080\/0142159X.2025.2488353PMID: 40202513.","DOI":"10.1080\/0142159X.2025.2488353"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEIP66354.2025.00043"},{"key":"e_1_3_3_2_15_2","unstructured":"Alex Cloud Minh Le James Chua Jan Betley Anna Sztyber-Betley Jacob Hilton Samuel Marks and Owain Evans. 2025. Subliminal Learning: Language models transmit behavioral traits via hidden signals in data. arxiv:https:\/\/arXiv.org\/abs\/2507.14805\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2507.14805"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"crossref","unstructured":"Sayed Mehdi\u00a0Hejazi Dehaghani and Nafiseh Hajrahimi. 2013. Which factors affect software projects maintenance cost more? Acta Informatica Medica 21 1 (2013) 63.","DOI":"10.5455\/aim.2012.21.63-66"},{"key":"e_1_3_3_2_17_2","unstructured":"Zane Durante Qiuyuan Huang Naoki Wake Ran Gong Jae\u00a0Sung Park Bidipta Sarkar Rohan Taori Yusuke Noda Demetri Terzopoulos Yejin Choi Katsushi Ikeuchi Hoi Vo Li Fei-Fei and Jianfeng Gao. 2024. Agent AI: Surveying the Horizons of Multimodal Interaction. arxiv:https:\/\/arXiv.org\/abs\/2401.03568\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2401.03568"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","unstructured":"Usman\u00a0Khan Durrani Mustafa Akpinar Muhammed Fatih\u00a0Adak Abdullah Talha\u00a0Kabakus Muhammed Maruf\u00a0\u00d6zt\u00fcrk and Mohammed Saleh. 2024. A Decade of Progress: A Systematic Literature Review on the Integration of AI in Software Engineering Phases and Activities (2013-2023). IEEE Access 12 (2024) 171185\u2013171204. 10.1109\/ACCESS.2024.3488904","DOI":"10.1109\/ACCESS.2024.3488904"},{"key":"e_1_3_3_2_19_2","series-title":"(AAAI\u201996)","first-page":"1322","volume-title":"Proceedings of the Thirteenth National Conference on Artificial Intelligence - Volume 2","author":"Etzioni Oren","year":"1996","unstructured":"Oren Etzioni. 1996. Moving up the information food chain: deploying softbots on the world wide web. In Proceedings of the Thirteenth National Conference on Artificial Intelligence - Volume 2 (Portland, Oregon) (AAAI\u201996). AAAI Press, 1322\u20131326."},{"key":"e_1_3_3_2_20_2","series-title":"(ICML\u201924)","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"Fernando Chrisantha","year":"2024","unstructured":"Chrisantha Fernando, Dylan Banarse, Henryk Michalewski, Simon Osindero, and Tim Rockt\u00e4schel. 2024. Promptbreeder: self-referential self-improvement via prompt evolution. In Proceedings of the 41st International Conference on Machine Learning (Vienna, Austria) (ICML\u201924). JMLR.org, Article 541, 64\u00a0pages."},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1017\/9781009581738"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE55347.2025.00191"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.904"},{"key":"e_1_3_3_2_24_2","volume-title":"Coding on Copilot: 2023 Data Shows Downward Pressure on Code Quality","author":"Harding William","year":"2024","unstructured":"William Harding and Matthew Kloster. 2024. Coding on Copilot: 2023 Data Shows Downward Pressure on Code Quality. Technical Report GitClear Report. GitClear \/ Alloy.dev Research. https:\/\/www.gitclear.com\/coding_on_copilot_data_shows_ais_downward_pressure_on_code_quality Analyzed 153 million changed lines of code from January 2020 through December 2023."},{"key":"e_1_3_3_2_25_2","volume-title":"Context Rot: How Increasing Input Tokens Impacts LLM Performance","author":"Hong Kelly","year":"2025","unstructured":"Kelly Hong, Anton Troynikov, and Jeff Huber. 2025. Context Rot: How Increasing Input Tokens Impacts LLM Performance. Technical Report \u2013. Chroma Research. https:\/\/research.trychroma.com\/context-rot Report published July 14, 2025."},{"key":"e_1_3_3_2_26_2","unstructured":"Evan Hubinger Carson Denison Jesse Mu Mike Lambert Meg Tong Monte MacDiarmid Tamera Lanham Daniel\u00a0M. Ziegler Tim Maxwell Newton Cheng Adam Jermyn Amanda Askell Ansh Radhakrishnan Cem Anil David Duvenaud Deep Ganguli Fazl Barez Jack Clark Kamal Ndousse Kshitij Sachan Michael Sellitto Mrinank Sharma Nova DasSarma Roger Grosse Shauna Kravec Yuntao Bai Zachary Witten Marina Favaro Jan Brauner Holden Karnofsky Paul Christiano Samuel\u00a0R. Bowman Logan Graham Jared Kaplan S\u00f6ren Mindermann Ryan Greenblatt Buck Shlegeris Nicholas Schiefer and Ethan Perez. 2024. Sleeper Agents: Training Deceptive LLMs that Persist Through Safety Training. arxiv:https:\/\/arXiv.org\/abs\/2401.05566\u00a0[cs.CR] https:\/\/arxiv.org\/abs\/2401.05566"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","unstructured":"Laurie Hughes Yogesh\u00a0K. Dwivedi Tegwen Malik Mazen Shawosh Mousa\u00a0Ahmed Albashrawi Il Jeon Vincent Dutot Mandanna Appanderanda Tom Crick Rahul De\u2019 Mark Fenwick Senali\u00a0Madugoda Gunaratnege Paulius Jurcys Arpan\u00a0Kumar Kar Nir Kshetri Keyao Li Sashah Mutasa Spyridon Samothrakis Michael Wade and Paul Walton. 2025. AI Agents and Agentic Systems: A Multi-Expert Analysis. Journal of Computer Information Systems 65 4 (2025) 489\u2013517. arXiv: 10.1080\/08874417.2025.2483832","DOI":"10.1080\/08874417.2025.2483832"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-48765-4_2"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","unstructured":"Ziwei Ji Nayeon Lee Rita Frieske Tiezheng Yu Dan Su Yan Xu Etsuko Ishii Ye\u00a0Jin Bang Andrea Madotto and Pascale Fung. 2023. Survey of Hallucination in Natural Language Generation. ACM Comput. Surv. 55 12 Article 248 (March 2023) 38\u00a0pages. 10.1145\/3571730","DOI":"10.1145\/3571730"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.272"},{"key":"e_1_3_3_2_31_2","unstructured":"Carlos\u00a0E. Jimenez John Yang Alexander Wettig Shunyu Yao Kexin Pei Ofir Press and Karthik Narasimhan. 2024. SWE-bench: Can Language Models Resolve Real-World GitHub Issues? arxiv:https:\/\/arXiv.org\/abs\/2310.06770\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2310.06770"},{"key":"e_1_3_3_2_32_2","unstructured":"Jared Kaplan Sam McCandlish Tom Henighan Tom\u00a0B. Brown Benjamin Chess Rewon Child Scott Gray Alec Radford Jeffrey Wu and Dario Amodei. 2020. Scaling Laws for Neural Language Models. arxiv:https:\/\/arXiv.org\/abs\/2001.08361\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2001.08361"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","unstructured":"Daniel Kelly Frank\u00a0G. Glavin and Enda Barrett. 2021. Denial of wallet\u2014Defining a looming threat to serverless computing. Journal of Information Security and Applications 60 (2021) 102843. 10.1016\/j.jisa.2021.102843","DOI":"10.1016\/j.jisa.2021.102843"},{"key":"e_1_3_3_2_34_2","unstructured":"Dezhang Kong Shi Lin Zhenhua Xu Zhebo Wang Minghao Li Yufeng Li Yilun Zhang Hujin Peng Zeyang Sha Yuyuan Li et\u00a0al. 2025. A survey of llm-driven ai agent communication: Protocols security risks and defense countermeasures."},{"key":"e_1_3_3_2_35_2","unstructured":"Nataliya Kosmyna Eugene Hauptmann Ye\u00a0Tong Yuan Jessica Situ Xian-Hao Liao Ashly\u00a0Vivian Beresnitzky Iris Braunstein and Pattie Maes. 2025. Your brain on ChatGPT: Accumulation of cognitive debt when using an AI assistant for essay writing task."},{"key":"e_1_3_3_2_36_2","unstructured":"Naveen Krishnan. 2025. AI Agents: Evolution Architecture and Real-World Applications. arxiv:https:\/\/arXiv.org\/abs\/2503.12687\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2503.12687"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.586"},{"key":"e_1_3_3_2_38_2","unstructured":"Zhicheng Lin. 2025. Hidden Prompts in Manuscripts Exploit AI-Assisted Peer Review. arxiv:https:\/\/arXiv.org\/abs\/2507.06185\u00a0[cs.CY] https:\/\/arxiv.org\/abs\/2507.06185"},{"key":"e_1_3_3_2_39_2","unstructured":"Bo Liu Yanjie Jiang Yuxia Zhang Nan Niu Guangjie Li and Hui Liu. 2024. An empirical study on the potential of llms in automated software refactoring."},{"key":"e_1_3_3_2_40_2","unstructured":"Fang Liu Yang Liu Lin Shi Zhen Yang Li Zhang Xiaoli Lian Zhongqi Li and Yuchi Ma. 2026. Beyond Functional Correctness: Exploring Hallucinations in LLM-Generated Code. arxiv:https:\/\/arXiv.org\/abs\/2404.00971\u00a0[cs.SE] https:\/\/arxiv.org\/abs\/2404.00971"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/3696630.3728572"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-92611-2_5"},{"key":"e_1_3_3_2_43_2","first-page":"130","volume-title":"International Conference on Agile Software Development","author":"Maranh\u00e3o\u00a0Junior Jo\u00e3o\u00a0Jos\u00e9","year":"2024","unstructured":"Jo\u00e3o\u00a0Jos\u00e9 Maranh\u00e3o\u00a0Junior, Filipe\u00a0F Correia, and Eduardo\u00a0Martins Guerra. 2024. Can ChatGPT Suggest Patterns? An Exploratory Study About Answers Given by AI-Assisted Tools to Design Problems. In International Conference on Agile Software Development. Springer Nature Switzerland Cham, Springer, 130\u2013138."},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"crossref","unstructured":"Boris Martinovi\u0107 and Robert Rozi\u0107. 2025. Perceived Impact of AI-Based Tooling on Software Development Code Quality. SN Computer Science 6 1 (2025) 63.","DOI":"10.1007\/s42979-024-03608-4"},{"key":"e_1_3_3_2_45_2","first-page":"387","volume-title":"International conference on data intelligence and cognitive informatics","author":"Marvin Ggaliwango","year":"2023","unstructured":"Ggaliwango Marvin, Nakayiza Hellen, Daudi Jjingo, and Joyce Nakatumba-Nabende. 2023. Prompt engineering in large language models. In International conference on data intelligence and cognitive informatics. Springer, Springer, 387\u2013402."},{"key":"e_1_3_3_2_46_2","unstructured":"Tula Masterman Sandi Besen Mason Sawtell and Alex Chao. 2024. The landscape of emerging ai agent architectures for reasoning planning and tool calling: A survey."},{"key":"e_1_3_3_2_47_2","unstructured":"Jeremy McHugh Kristina \u0160ekrst and Jon Cefalu. 2025. Prompt Injection 2.0: Hybrid AI Threats. arxiv:https:\/\/arXiv.org\/abs\/2507.13169\u00a0[cs.CR] https:\/\/arxiv.org\/abs\/2507.13169"},{"key":"e_1_3_3_2_48_2","first-page":"38","volume-title":"2025 IEEE\/ACM 47th International Conference on Software Engineering (ICSE)","author":"Miller Courtney","year":"2024","unstructured":"Courtney Miller, Mahmoud Jahanshahi, Audris Mockus, Bogdan Vasilescu, and Christian K\u00e4stner. 2024. Understanding the response to open-source dependency abandonment in the npm ecosystem. In 2025 IEEE\/ACM 47th International Conference on Software Engineering (ICSE). IEEE Computer Society, ACM, 38\u201350."},{"key":"e_1_3_3_2_49_2","unstructured":"Iman Mirzadeh Keivan Alizadeh Hooman Shahrokhi Oncel Tuzel Samy Bengio and Mehrdad Farajtabar. 2024. Gsm-symbolic: Understanding the limitations of mathematical reasoning in large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.05229 (2024)."},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"crossref","unstructured":"Amr Mohamed Maram Assi and Mariam Guizani. 2025. The Impact of LLM-Assistants on Software Developer Productivity: A Systematic Literature Review. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2507.03156 (2025).","DOI":"10.1145\/3809494"},{"key":"e_1_3_3_2_51_2","unstructured":"Ziyi Ni Huacan Wang Shuo Zhang Shuo Lu Ziyang He Wang You Zhenheng Tang Yuntao Du Bill Sun Hongzhang Liu et\u00a0al. 2025. GitTaskBench: A Benchmark for Code Agents Solving Real-World Tasks Through Code Repository Leveraging. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2508.18993 (2025)."},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2018.00005"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/3691620.3695330"},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-54827-7_9"},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME58944.2024.00034"},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16947-2_1"},{"key":"e_1_3_3_2_57_2","doi-asserted-by":"crossref","unstructured":"Daniel Russo. 2024. Navigating the complexity of generative ai adoption in software engineering. ACM Transactions on Software Engineering and Methodology 33 5 (2024) 1\u201350.","DOI":"10.1145\/3652154"},{"key":"e_1_3_3_2_58_2","doi-asserted-by":"crossref","unstructured":"Irfan Samsyudin. 2025. Vibe Coding and AI-Led Conversational Programming: Emerging Trends in Software Development. Available at SSRN 5469367 (2025).","DOI":"10.2139\/ssrn.5469367"},{"key":"e_1_3_3_2_59_2","doi-asserted-by":"publisher","unstructured":"Thomas Schuster Marian Lambert Nico D\u00f6ring and Julius Tr\u00f6gele. 2025. Needle-in-the-Haystack Testing LLMs with\u00a0a\u00a0Complex Reasoning Task. Communications in Computer and Information Science 2581 CCIS (2025) 254 \u2013 266. 10.1007\/978-3-031-96196-0_19Cited by: 0.","DOI":"10.1007\/978-3-031-96196-0_19"},{"key":"e_1_3_3_2_60_2","unstructured":"Idan Shenfeld Jyothish Pari and Pulkit Agrawal. 2025. RL\u2019s Razor: Why Online Reinforcement Learning Forgets Less. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2509.04259 (2025)."},{"key":"e_1_3_3_2_61_2","first-page":"16","volume-title":"Proceedings of the First Workshop on Writing Aids at the Crossroads of AI, Cognitive Science and NLP (WRAICOGS 2025)","author":"Shi Ken","year":"2025","unstructured":"Ken Shi and Gerald Penn. 2025. Semantic masking in a needle-in-a-haystack test for evaluating large language model long-text capabilities. In Proceedings of the First Workshop on Writing Aids at the Crossroads of AI, Cognitive Science and NLP (WRAICOGS 2025). International Committee on Computational Linguistics, 16\u201323."},{"key":"e_1_3_3_2_62_2","unstructured":"Rui Song Yingji Li Lida Shi Fausto Giunchiglia and Hao Xu. 2024. Shortcut learning in in-context learning: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.02018 (2024)."},{"key":"e_1_3_3_2_63_2","unstructured":"Jacob\u00a0Mitchell Springer Sachin Goyal Kaiyue Wen Tanishq Kumar Xiang Yue Sadhika Malladi Graham Neubig and Aditi Raghunathan. 2025. Overtrained Language Models Are Harder to Fine-Tune. arxiv:https:\/\/arXiv.org\/abs\/2503.19206\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2503.19206"},{"key":"e_1_3_3_2_64_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-96-8183-9_23"},{"key":"e_1_3_3_2_65_2","doi-asserted-by":"publisher","DOI":"10.1145\/3696630.3728552"},{"key":"e_1_3_3_2_66_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-68738-9_34"},{"key":"e_1_3_3_2_67_2","doi-asserted-by":"crossref","unstructured":"Jian Wang Zhen Li Jixiang Qu Deqing Zou Shouhuai Xu Ziteng Xu Zhenwei Wang and Hai Jin. 2025. MalPacDetector: An LLM-based Malicious NPM Package Detector. IEEE Transactions on Information Forensics and Security (2025).","DOI":"10.1109\/TIFS.2025.3580336"},{"key":"e_1_3_3_2_68_2","doi-asserted-by":"crossref","unstructured":"Jason Wei Xuezhi Wang Dale Schuurmans Maarten Bosma Fei Xia Ed Chi Quoc\u00a0V Le Denny Zhou et\u00a0al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems 35 (2022) 24824\u201324837.","DOI":"10.52202\/068431-1800"},{"key":"e_1_3_3_2_69_2","unstructured":"Jiaheng Wei Yuanshun Yao Jean-Francois Ton Hongyi Guo Andrew Estornell and Yang Liu. 2024. Measuring and Reducing LLM Hallucination without Gold-Standard Answers. arxiv:https:\/\/arXiv.org\/abs\/2402.10412\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2402.10412"},{"key":"e_1_3_3_2_70_2","unstructured":"Orion Weller Michael Boratko Iftekhar Naim and Jinhyuk Lee. 2025. On the Theoretical Limitations of Embedding-Based Retrieval. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2508.21038 (2025)."},{"key":"e_1_3_3_2_71_2","doi-asserted-by":"crossref","unstructured":"Michael Wooldridge. 1997. Agent-based software engineering. IEE Proceedings-software 144 1 (1997) 26\u201337.","DOI":"10.1049\/ip-sen:19971026"},{"key":"e_1_3_3_2_72_2","unstructured":"Elizabeth Wyss Dominic Tassio Lorenzo De\u00a0Carli and Drew Davidson. 2025. Evaluating LLM-Based Detection of Malicious Package Updates in npm. https:\/\/ldklab.github.io\/assets\/papers\/raid25-llmdetection.pdf"},{"key":"e_1_3_3_2_73_2","doi-asserted-by":"publisher","DOI":"10.1145\/3691620.3695529"},{"key":"e_1_3_3_2_74_2","unstructured":"Wenqian Ye Guangtao Zheng Yunsheng Ma Xu Cao Bolin Lai James\u00a0M Rehg and Aidong Zhang. 2024. Mm-spubench: Towards better understanding of spurious biases in multimodal llms."},{"key":"e_1_3_3_2_75_2","doi-asserted-by":"crossref","unstructured":"Nusrat Zahan Philipp Burckhardt Mikola Lysenko Feross Aboukhadijeh and Laurie Williams. 2024. MalwareBench: Malware samples are not enough. In 2024 IEEE\/ACM 21st International Conference on Mining Software Repositories (MSR).","DOI":"10.1145\/3643991.3644883"},{"key":"e_1_3_3_2_76_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE55347.2025.00146"},{"key":"e_1_3_3_2_77_2","unstructured":"Ruediger Zarnekow and Walter Brenner. 2005. Distribution of cost over the application lifecycle-a multi-case study. ECIS 2005 proceedings (2005) 26."},{"key":"e_1_3_3_2_78_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE55347.2025.00046"},{"key":"e_1_3_3_2_79_2","first-page":"995","volume-title":"28th USENIX Security symposium (USENIX security 19)","author":"Zimmermann Markus","year":"2019","unstructured":"Markus Zimmermann, Cristian-Alexandru Staicu, Cam Tenny, and Michael Pradel. 2019. Small world with high risks: A study of security threats in the npm ecosystem. In 28th USENIX Security symposium (USENIX security 19). 995\u20131010."}],"event":{"name":"AGENT '26: International Workshop on Agentic Engineering","location":"Rio de Janeiro Brazil","acronym":"AGENT '26","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS","Faculty of Engineering of University of Porto"]},"container-title":["Proceedings of the 2026 International Workshop on Agentic Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3786167.3788413","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T12:01:45Z","timestamp":1779364905000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3786167.3788413"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,12]]},"references-count":78,"alternative-id":["10.1145\/3786167.3788413","10.1145\/3786167"],"URL":"https:\/\/doi.org\/10.1145\/3786167.3788413","relation":{},"subject":[],"published":{"date-parts":[[2026,4,12]]},"assertion":[{"value":"2026-05-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}