{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:09:22Z","timestamp":1750219762689,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,30]],"date-time":"2023-11-30T00:00:00Z","timestamp":1701302400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,30]]},"DOI":"10.1145\/3611643.3613093","type":"proceedings-article","created":{"date-parts":[[2023,11,30]],"date-time":"2023-11-30T23:14:38Z","timestamp":1701386078000},"page":"2142-2146","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["On Using Information Retrieval to Recommend Machine Learning Good Practices for Software Engineers"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-0121-8560","authenticated-orcid":false,"given":"Laura","family":"Cabra-Acela","sequence":"first","affiliation":[{"name":"University of Los Andes, Bogota, Colombia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5292-2977","authenticated-orcid":false,"given":"Anamaria","family":"Mojica-Hanke","sequence":"additional","affiliation":[{"name":"University of Passau, Passau, Germany \/ University of Los Andes, Bogota, Colombia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0161-2888","authenticated-orcid":false,"given":"Mario","family":"Linares-V\u00e1squez","sequence":"additional","affiliation":[{"name":"University of Los Andes, Bogota, Colombia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9765-2803","authenticated-orcid":false,"given":"Steffen","family":"Herbold","sequence":"additional","affiliation":[{"name":"University of Passau, Passau, Germany"}]}],"member":"320","published-online":{"date-parts":[[2023,11,30]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Daniel Adiwardana. 2020. Towards a conversational agent that can chat about...anything. https:\/\/ai.googleblog.com\/2020\/01\/towards-conversational-agent-that-can.html"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ESEM.2019.8870187"},{"key":"e_1_3_2_2_3_1","unstructured":"Amazon. 2019. Aidoc Brings Lifesaving AI Advancements to Medical Imaging on AWS. https:\/\/aws.amazon.com\/solutions\/case-studies\/aidoc-case-study\/"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEIP.2019.00042"},{"key":"e_1_3_2_2_5_1","unstructured":"Stella Biderman and Walter J Scheirer. 2020. Pitfalls in machine learning research: Reexamining the development cycle."},{"volume-title":"Natural language processing with Python: analyzing text with the natural language toolkit. \" O\u2019Reilly Media","author":"Bird Steven","key":"e_1_3_2_2_6_1","unstructured":"Steven Bird, Ewan Klein, and Edward Loper. 2009. Natural language processing with Python: analyzing text with the natural language toolkit. \" O\u2019Reilly Media, Inc.\"."},{"key":"e_1_3_2_2_7_1","first-page":"993","article-title":"Latent dirichlet allocation","author":"Blei David M","year":"2003","unstructured":"David M Blei, Andrew Y Ng, and Michael I Jordan. 2003. Latent dirichlet allocation. Journal of machine Learning research, 3, Jan (2003), 993\u20131022.","journal-title":"Journal of machine Learning research, 3"},{"key":"e_1_3_2_2_8_1","unstructured":"Laura Cabra-Acela Anamaria Mojica-Hanke Mario Linares-V\u00e1squez and Steffen Herbold. 2023. Idaka. https:\/\/idaka.onrender.com\/"},{"key":"e_1_3_2_2_9_1","unstructured":"Laura Cabra-Acela Anamaria Mojica-Hanke Mario Linares-V\u00e1squez and Steffen Herbold. 2023. Idaka - Online Appendix. https:\/\/thesoftwaredesignlab.github.io\/Idaka\/"},{"key":"e_1_3_2_2_10_1","unstructured":"Laura Cabra-Acela Anamaria Mojica-Hanke Mario Linares-V\u00e1squez and Steffen Herbold. 2023. Idaka Tool Demo. https:\/\/www.youtube.com\/watch?v=cEb-AhIPxnM"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","unstructured":"Laura Cabra-Acela Anamaria Mojica-Hanke Mario Linares-V\u00e1squez and Steffen Herbold. 2023. TheSoftwareDesignLab\/Idaka: v1.0. Aug https:\/\/doi.org\/10.5281\/zenodo.8275813 10.5281\/zenodo.8275813","DOI":"10.5281\/zenodo.8275813"},{"key":"e_1_3_2_2_12_1","unstructured":"Yihan Cao Siyu Li Yixin Liu Zhiling Yan Yutong Dai Philip S. Yu and Lichao Sun. 2023. A Comprehensive Survey of AI-Generated Content (AIGC): A History of Generative AI from GAN to ChatGPT. arxiv:2303.04226."},{"key":"e_1_3_2_2_13_1","unstructured":"cocktailpeanut. 2023. Dalai. https:\/\/github.com\/cocktailpeanut\/dalai"},{"key":"e_1_3_2_2_14_1","unstructured":"European Commission. 2019. High Level Expert Group on Artificial Intelligence."},{"key":"e_1_3_2_2_15_1","unstructured":"Shane Connelly. 2019. Practical BM25 - part 2: The BM25 algorithm and its variables. https:\/\/www.elastic.co\/blog\/practical-bm25-part-2-the-bm25-algorithm-and-its-variables"},{"key":"e_1_3_2_2_16_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. CoRR, abs\/1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. CoRR, abs\/1810.04805 (2018), arXiv:1810.04805. arxiv:1810.04805"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"crossref","unstructured":"Saad Ezzini Sallam Abualhaija Chetan Arora and Mehrdad Sabetzadeh. 2023. AI-based Question Answering Assistance for Analyzing Natural-language Requirements. arXiv preprint arXiv:2302.04793.","DOI":"10.1109\/ICSE48619.2023.00113"},{"key":"e_1_3_2_2_18_1","unstructured":"Jordan Hoffmann Sebastian Borgeaud Arthur Mensch Elena Buchatskaya Trevor Cai Eliza Rutherford Diego de Las Casas Lisa Anne Hendricks Johannes Welbl Aidan Clark Tom Hennigan Eric Noland Katie Millican George van den Driessche Bogdan Damoc Aurelia Guy Simon Osindero Karen Simonyan Erich Elsen Jack W. Rae Oriol Vinyals and Laurent Sifre. 2022. Training Compute-Optimal Large Language Models. arxiv:2203.15556."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2808332"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10515-023-00377-x"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/967900.968203"},{"key":"e_1_3_2_2_22_1","first-page":"1","article-title":"Pre-train, prompt, and predict: A systematic survey of prompting methods in natural language processing","volume":"55","author":"Liu Pengfei","year":"2023","unstructured":"Pengfei Liu, Weizhe Yuan, Jinlan Fu, Zhengbao Jiang, Hiroaki Hayashi, and Graham Neubig. 2023. Pre-train, prompt, and predict: A systematic survey of prompting methods in natural language processing. Comput. Surveys, 55, 9 (2023), 1\u201335.","journal-title":"Comput. Surveys"},{"key":"e_1_3_2_2_23_1","volume-title":"How to avoid machine learning pitfalls: a guide for academic researchers. CoRR, abs\/2108.02497","author":"Lones Michael A.","year":"2021","unstructured":"Michael A. Lones. 2021. How to avoid machine learning pitfalls: a guide for academic researchers. CoRR, abs\/2108.02497 (2021), arXiv:2108.02497. arxiv:2108.02497"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/1276933.1276934"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2301.10516"},{"key":"e_1_3_2_2_27_1","unstructured":"OpenAI. 2023. Introducing chatgpt. https:\/\/openai.com\/blog\/chatgpt\/"},{"key":"e_1_3_2_2_28_1","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, and Alex Ray. 2022. Training language models to follow instructions with human feedback. Advances in Neural Information Processing Systems, 35 (2022), 27730\u201327744.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_29_1","unstructured":"Google PAIR. 2021. People + AI Guidebook. https:\/\/pair.withgoogle.com\/guidebook\/"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"crossref","unstructured":"Martin F Porter. 1980. An algorithm for suffix stripping. Program.","DOI":"10.1108\/eb046814"},{"key":"e_1_3_2_2_31_1","volume-title":"Language models are unsupervised multitask learners. OpenAI blog, 1, 8","author":"Radford Alec","year":"2019","unstructured":"Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, and Ilya Sutskever. 2019. Language models are unsupervised multitask learners. OpenAI blog, 1, 8 (2019), 9."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1056\/NEJMra1814259"},{"key":"e_1_3_2_2_33_1","volume-title":"The probabilistic relevance framework: BM25 and beyond. Foundations and Trends\u00ae in Information Retrieval, 3, 4","author":"Robertson Stephen","year":"2009","unstructured":"Stephen Robertson and Hugo Zaragoza. 2009. The probabilistic relevance framework: BM25 and beyond. Foundations and Trends\u00ae in Information Retrieval, 3, 4 (2009), 333\u2013389."},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/361219.361220"},{"key":"e_1_3_2_2_35_1","volume-title":"Hidden technical debt in machine learning systems. Advances in neural information processing systems, 28","author":"Sculley David","year":"2015","unstructured":"David Sculley, Gary Holt, Daniel Golovin, Eugene Davydov, Todd Phillips, Dietmar Ebner, Vinay Chaudhary, Michael Young, Jean-Francois Crespo, and Dan Dennison. 2015. Hidden technical debt in machine learning systems. Advances in neural information processing systems, 28 (2015)."},{"volume-title":"Proceedings of the 14th ACM\/IEEE International Symposium on Empirical Software Engineering and Measurement (ESEM). 1\u201312","author":"Serban Alex","key":"e_1_3_2_2_36_1","unstructured":"Alex Serban, Koen van der Blom, Holger Hoos, and Joost Visser. 2020. Adoption and effects of software engineering best practices in machine learning. In Proceedings of the 14th ACM\/IEEE International Symposium on Empirical Software Engineering and Measurement (ESEM). 1\u201312."},{"key":"e_1_3_2_2_37_1","volume-title":"Applying information-retrieval methods to software reuse: a case study. Information processing & management, 39, 1","author":"Stierna Eric J","year":"2003","unstructured":"Eric J Stierna and Neil C Rowe. 2003. Applying information-retrieval methods to software reuse: a case study. Information processing & management, 39, 1 (2003), 67\u201374."},{"key":"e_1_3_2_2_38_1","volume-title":"Machine learning, social learning and the governance of self-driving cars. Social studies of science, 48, 1","author":"Stilgoe Jack","year":"2018","unstructured":"Jack Stilgoe. 2018. Machine learning, social learning and the governance of self-driving cars. Social studies of science, 48, 1 (2018), 25\u201356."},{"key":"e_1_3_2_2_39_1","volume-title":"Alpaca: A Strong, Replicable Instruction-Following Model. https:\/\/crfm.stanford.edu\/2023\/03\/13\/alpaca.html","author":"Taori Rohan","year":"2023","unstructured":"Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li, Carlos Guestrin, Percy Liang, Tatsunori, and B. Hashimoto. 2023. Alpaca: A Strong, Replicable Instruction-Following Model. https:\/\/crfm.stanford.edu\/2023\/03\/13\/alpaca.html"},{"key":"e_1_3_2_2_40_1","unstructured":"Siri Team. 2017. Deep learning for siri\u2019s voice: On-device deep mixture density networks for hybrid unit selection synthesis. https:\/\/machinelearning.apple.com\/research\/siri-voices"},{"key":"e_1_3_2_2_41_1","unstructured":"Tesla. 2023. Artificial Intelligence and Autopilot. https:\/\/www.tesla.com\/AI"},{"key":"e_1_3_2_2_42_1","volume-title":"BEIR: A heterogenous benchmark for zero-shot evaluation of information retrieval models. arXiv preprint arXiv:2104.08663.","author":"Thakur Nandan","year":"2021","unstructured":"Nandan Thakur, Nils Reimers, Andreas R\u00fcckl\u00e9, Abhishek Srivastava, and Iryna Gurevych. 2021. BEIR: A heterogenous benchmark for zero-shot evaluation of information retrieval models. arXiv preprint arXiv:2104.08663."},{"key":"e_1_3_2_2_43_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971.","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, and Faisal Azhar. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485275"},{"volume-title":"Workshop.","year":"2023","key":"e_1_3_2_2_45_1","unstructured":"BigScience Workshop. 2023. BLOOM: A 176B-Parameter Open-Access Multilingual Language Model. arxiv:2211.05100."},{"volume-title":"Best practices for machine learning applications","author":"Wujek Brett","key":"e_1_3_2_2_46_1","unstructured":"Brett Wujek, Patrick Hall, and Funda Gunes. 2016. Best practices for machine learning applications. SAS Institute Inc."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1023760326768"},{"key":"e_1_3_2_2_48_1","unstructured":"Martin Zinkevich. 2021. Rules of machine learning: Best Practices for ML Engineering. https:\/\/developers.google.com\/machine-learning\/guides\/rules-of-ml"}],"event":{"name":"ESEC\/FSE '23: 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering"],"location":"San Francisco CA USA","acronym":"ESEC\/FSE '23"},"container-title":["Proceedings of the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3611643.3613093","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3611643.3613093","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:10Z","timestamp":1750178230000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3611643.3613093"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,30]]},"references-count":47,"alternative-id":["10.1145\/3611643.3613093","10.1145\/3611643"],"URL":"https:\/\/doi.org\/10.1145\/3611643.3613093","relation":{},"subject":[],"published":{"date-parts":[[2023,11,30]]},"assertion":[{"value":"2023-11-30","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}