{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,25]],"date-time":"2026-01-25T13:08:22Z","timestamp":1769346502258,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":87,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,30]],"date-time":"2023-11-30T00:00:00Z","timestamp":1701302400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"FAPESP","award":["2019\/12743-4"],"award-info":[{"award-number":["2019\/12743-4"]}]},{"name":"CNPq\/MCTI\/FNDCT","award":["408812\/2021-4"],"award-info":[{"award-number":["408812\/2021-4"]}]},{"name":"MCTIC\/CGI\/FAPESP","award":["2021\/06662-1"],"award-info":[{"award-number":["2021\/06662-1"]}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2236198, 2247929, 2024561, and 2303042"],"award-info":[{"award-number":["2236198, 2247929, 2024561, and 2303042"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,30]]},"DOI":"10.1145\/3611643.3616288","type":"proceedings-article","created":{"date-parts":[[2023,11,30]],"date-time":"2023-11-30T23:14:38Z","timestamp":1701386078000},"page":"16-28","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":14,"title":["Do CONTRIBUTING Files Provide Information about OSS Newcomers\u2019 Onboarding Barriers?"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2104-6676","authenticated-orcid":false,"given":"Felipe","family":"Fronchetti","sequence":"first","affiliation":[{"name":"Virginia Commonwealth University, Richmond, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2017-7842","authenticated-orcid":false,"given":"David C.","family":"Shepherd","sequence":"additional","affiliation":[{"name":"Louisiana State University, Baton Rouge, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9943-5570","authenticated-orcid":false,"given":"Igor","family":"Wiese","sequence":"additional","affiliation":[{"name":"Federal University of Technology Paran\u00e1, Campo Mour\u00e3o, Brazil"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6919-2149","authenticated-orcid":false,"given":"Christoph","family":"Treude","sequence":"additional","affiliation":[{"name":"University of Melbourne, Victoria, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1399-7535","authenticated-orcid":false,"given":"Marco Aur\u00e9lio","family":"Gerosa","sequence":"additional","affiliation":[{"name":"Northern Arizona University, Flagstaff, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0612-5790","authenticated-orcid":false,"given":"Igor","family":"Steinmacher","sequence":"additional","affiliation":[{"name":"Northern Arizona University, Flagstaff, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,11,30]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2019.00122"},{"key":"e_1_3_2_2_2_1","unstructured":"Amazon. 2023. Amazon Mechanical Turk (Website). https:\/\/www.mturk.com\/ [Accessed on Aug-2023]"},{"key":"e_1_3_2_2_3_1","unstructured":"Apple. 2023. Apple Swift (CONTRIBUTING.md). https:\/\/github.com\/apple\/swift\/blob\/main\/CONTRIBUTING [Accessed on Aug-2023]"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/IoTaIS50849.2021.9359692"},{"key":"e_1_3_2_2_5_1","volume-title":"Proceedings of the 6th World Multiconference on Systemics, Cybernetics and Informatics (SCI-2002)","author":"Biskri Isma\u00efl","year":"2002","unstructured":"Isma\u00efl Biskri and Sylvain Delisle. 2002. Text classification and multilinguism: Getting at words via n-grams of characters. In Proceedings of the 6th World Multiconference on Systemics, Cybernetics and Informatics (SCI-2002), Orlando (Florida, USA). 5, 110\u2013115."},{"key":"e_1_3_2_2_6_1","volume-title":"Machine Learning Algorithms","author":"Bonaccorso Giuseppe","unstructured":"Giuseppe Bonaccorso. 2017. 12.2.4.2 Tf-idf Vectorizing. In Machine Learning Algorithms. Packt Publishing. isbn:978-1-78588-962-2"},{"key":"e_1_3_2_2_7_1","volume-title":"Machine Learning Algorithms","author":"Bonaccorso Giuseppe","unstructured":"Giuseppe Bonaccorso. 2017. 2.1.1.1 One-vs-All. In Machine Learning Algorithms. Packt Publishing. isbn:978-1-78588-962-2"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME.2016.31"},{"key":"e_1_3_2_2_9_1","volume-title":"Asking questions: the definitive guide to questionnaire design\u2013for market research, political polls, and social and health questionnaires","author":"Bradburn Norman M","unstructured":"Norman M Bradburn, Seymour Sudman, and Brian Wansink. 2004. Asking questions: the definitive guide to questionnaire design\u2013for market research, political polls, and social and health questionnaires. John Wiley & Sons."},{"key":"e_1_3_2_2_10_1","volume-title":"Language models are few-shot learners. Advances in neural information processing systems, 33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, and Amanda Askell. 2020. Language models are few-shot learners. Advances in neural information processing systems, 33 (2020), 1877\u20131901."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1002\/pfi.21727"},{"key":"e_1_3_2_2_12_1","volume-title":"Python Machine Learning Cookbook","author":"Ciaburro Giuseppe","unstructured":"Giuseppe Ciaburro and Prateek Joshi. 2019. 2.9.4 There\u2019s More.... In Python Machine Learning Cookbook (2nd Edition). Packt Publishing. isbn:978-1-78980-845-2","edition":"2"},{"key":"e_1_3_2_2_13_1","volume-title":"A coefficient of agreement for nominal scales. Educational and psychological measurement, 20, 1","author":"Cohen Jacob","year":"1960","unstructured":"Jacob Cohen. 1960. A coefficient of agreement for nominal scales. Educational and psychological measurement, 20, 1 (1960), 37\u201346."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/SBESC56799.2022.9964726"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE43902.2021.00093"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3387940.3391534"},{"key":"e_1_3_2_2_17_1","unstructured":"G\u00fcnhan D\u00fcndar and Mustafa Berke Yelten. 2020. 3.6.2 Resampling. In Modelling Methodologies in Analogue Integrated Circuit Design. Institution of Engineering and Technology. isbn:978-1-78561-695-2"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME.2019.00043"},{"key":"e_1_3_2_2_19_1","unstructured":"Fronchetti et al.. 2023. Contributing Files (Website). https:\/\/contributing.streamlit.app\/ [Accessed on Aug-2023]"},{"key":"e_1_3_2_2_20_1","unstructured":"Fronchetti et al.. 2023. Replication Package (Zenodo Repository). https:\/\/zenodo.org\/record\/8270217 [Accessed on Aug-2023]"},{"key":"e_1_3_2_2_21_1","unstructured":"Facebook. 2023. FastText (Website). https:\/\/fasttext.cc\/ [Accessed on Aug-2023]"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/MS.2014.107"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2652524.2652540"},{"key":"e_1_3_2_2_24_1","volume-title":"The curse of expertise: When more knowledge leads to miscalibrated explanatory insight. Cognitive science, 40, 5","author":"Fisher Matthew","year":"2016","unstructured":"Matthew Fisher and Frank C Keil. 2016. The curse of expertise: When more knowledge leads to miscalibrated explanatory insight. Cognitive science, 40, 5 (2016), 1251\u20131269."},{"key":"e_1_3_2_2_25_1","volume-title":"How To Run A Successful Free Software Project - Producing Open Source Software. CreateSpace","author":"Fogel Karl","unstructured":"Karl Fogel. 2009. How To Run A Successful Free Software Project - Producing Open Source Software. CreateSpace, Scotts Valley, CA. isbn:1441437711"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-20883-7_9"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3338906.3338943"},{"key":"e_1_3_2_2_28_1","volume-title":"Proceedings of the 20th international conference on machine learning (ICML-03)","author":"F\u00fcrnkranz Johannes","year":"2003","unstructured":"Johannes F\u00fcrnkranz and Peter A Flach. 2003. An analysis of rule evaluation metrics. In Proceedings of the 20th international conference on machine learning (ICML-03). 202\u2013209."},{"key":"e_1_3_2_2_29_1","unstructured":"GitHub. 2020. GitHub Octoverse. https:\/\/octoverse.github.com\/credits\/ [Accessed on Jun-2023]"},{"key":"e_1_3_2_2_30_1","unstructured":"GitHub. 2022. GitHub Flavored Markdown Specs Paragraphs. https:\/\/github.com\/gfm\/##paragraphs [Accessed on Jun-2023]"},{"key":"e_1_3_2_2_31_1","unstructured":"GitHub. 2022. Setting guidelines for repository contributors. https:\/\/docs.github.com\/en\/communities\/setting-up-your-project-for-healthy-contributions\/setting-guidelines-for-repository-contributors [accessed on Jun-2023]"},{"key":"e_1_3_2_2_32_1","unstructured":"Google. 2023. Google Sanitizers (GitHub Repository). https:\/\/github.com\/google\/sanitizers [Accessed on Aug-2023]"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSR59073.2023.00015"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CHASE.2015.9"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICGSE.2006.261210"},{"key":"e_1_3_2_2_36_1","volume-title":"International journal of data mining & knowledge management process, 5, 2","author":"Hossin Mohammad","year":"2015","unstructured":"Mohammad Hossin and Md Nasir Sulaiman. 2015. A review on evaluation metrics for data classification evaluations. International journal of data mining & knowledge management process, 5, 2 (2015), 1."},{"key":"e_1_3_2_2_37_1","article-title":"An Evaluation of Preprocessing Techniques for Text Classification","volume":"16","author":"Kadhim Ammar Ismael","year":"2018","unstructured":"Ammar Ismael Kadhim. 2018. An Evaluation of Preprocessing Techniques for Text Classification. International Journal of Computer Science and Information Security, 16, 6 (2018).","journal-title":"International Journal of Computer Science and Information Security"},{"key":"e_1_3_2_2_38_1","volume-title":"Hands-on Data Science and Python Machine Learning","author":"Kane Frank","unstructured":"Frank Kane. 2017. 9.7 TF-IDF. In Hands-on Data Science and Python Machine Learning. Packt Publishing. isbn:978-1-78728-074-8"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383219.3383256"},{"key":"e_1_3_2_2_40_1","unstructured":"Imbalanced Learn. 2023. Imbalanced Learn (Website). https:\/\/imbalanced-learn.org\/stable\/ [Accessed on Aug-2023]"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2017.25"},{"key":"e_1_3_2_2_42_1","volume-title":"How software engineers use documentation: The state of the practice","author":"Lethbridge Timothy C","year":"2003","unstructured":"Timothy C Lethbridge, Janice Singer, and Andrew Forward. 2003. How software engineers use documentation: The state of the practice. IEEE software, 20, 6 (2003), 35\u201339."},{"key":"e_1_3_2_2_43_1","unstructured":"Jiawei Li and Iftekhar Ahmed. 2023. Commit Message Matters: Investigating Impact and Evolution of Commit Message Quality."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.3115\/1118108.1118117"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3196398.3196446"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2016.09.015"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CSCI.2017.171"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3180155.3180241"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1177\/0047281617721853"},{"key":"e_1_3_2_2_50_1","unstructured":"Microsoft. 2023. Microsoft PHPSQL (GitHub Repository). https:\/\/github.com\/microsoft\/msphpsql [Accessed on Aug-2023]"},{"key":"e_1_3_2_2_51_1","unstructured":"NVIDIA. 2023. NVIDIA NCCL (GitHub Repository). https:\/\/github.com\/NVIDIA\/nccl [Accessed on Aug-2023]"},{"key":"e_1_3_2_2_52_1","volume-title":"Practical Machine Learning in R","author":"Nwanganga Fred","unstructured":"Fred Nwanganga and Mike Chapple. 2020. 9.1.1.1 k-Fold Cross-Validation. In Practical Machine Learning in R. John Wiley & Sons. isbn:978-1-119-59151-1"},{"key":"e_1_3_2_2_53_1","unstructured":"Open Source Guides. 2022. Open Source Guides \u2013 Starting an Open Source Project. https:\/\/opensource.guide\/starting-a-project\/ [Accessed on Jun-2023]"},{"key":"e_1_3_2_2_54_1","unstructured":"OpenAI. 2023. ChatGPT (Website). https:\/\/chat.openai.com\/ [Accessed on Aug-2023]"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2020.2984173"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSM.2015.7332474"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/VISSOF.2009.5336433"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.5555\/1953048.2078195"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/SANER.2016.68"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2017.2779479"},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-018-9660-3"},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2021.111047"},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/2635868.2635922"},{"key":"e_1_3_2_2_64_1","unstructured":"Brittany Reid Markus Wagner Marcelo d\u2019Amorim and Christoph Treude. 2022. Software Engineering User Study Recruitment on Prolific: An Experience Report. arXiv preprint arXiv:2201.05348."},{"key":"e_1_3_2_2_65_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-014-9323-y"},{"key":"e_1_3_2_2_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME.2017.17"},{"key":"e_1_3_2_2_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544902.3546236"},{"key":"e_1_3_2_2_68_1","doi-asserted-by":"publisher","DOI":"10.17485\/ijst\/2016\/v9i20\/86869"},{"key":"e_1_3_2_2_69_1","unstructured":"Scikit-learn. 2023. Cross-validation: evaluating estimator performance (Documentation). https:\/\/scikit-learn.org\/stable\/modules\/generated\/sklearn.feature_selection.SelectPercentile.html [Accessed on Jun-2023]"},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO.2017.8081530"},{"key":"e_1_3_2_2_71_1","volume-title":"Ten simple rules for helping newcomers become contributors to open projects. PLoS computational biology, 15, 9","author":"Sholler Dan","year":"2019","unstructured":"Dan Sholler, Igor Steinmacher, Denae Ford, Mara Averick, Mike Hoye, and Greg Wilson. 2019. Ten simple rules for helping newcomers become contributors to open projects. PLoS computational biology, 15, 9 (2019), e1007296."},{"key":"e_1_3_2_2_72_1","unstructured":"Spacy. 2023. Rule Based Matching (Documentation). https:\/\/spacy.io\/usage\/rule-based-matching [Accessed on Aug-2023]"},{"key":"e_1_3_2_2_73_1","doi-asserted-by":"publisher","DOI":"10.1145\/2675133.2675215"},{"key":"e_1_3_2_2_74_1","volume-title":"Christoph Treude, and Marco Aur\u00e9lio Gerosa.","author":"Steinmacher Igor","year":"2016","unstructured":"Igor Steinmacher, Tayana Uchoa Conte, Christoph Treude, and Marco Aur\u00e9lio Gerosa. 2016. Overcoming Open Source Project Entry Barriers with a Portal for Newcomers. In ICSE \u201916. Association for Computing Machinery, New York, NY, USA. 273\u2013284. isbn:9781450339001"},{"key":"e_1_3_2_2_75_1","doi-asserted-by":"publisher","DOI":"10.1145\/3180155.3180208"},{"key":"e_1_3_2_2_76_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-55128-4_21"},{"key":"e_1_3_2_2_77_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2014.11.001"},{"key":"e_1_3_2_2_78_1","doi-asserted-by":"publisher","DOI":"10.1109\/MS.2018.110162131"},{"key":"e_1_3_2_2_79_1","doi-asserted-by":"publisher","DOI":"10.1145\/1852786.1852832"},{"key":"e_1_3_2_2_80_1","doi-asserted-by":"crossref","unstructured":"Xin Tan Yiran Chen Haohua Wu Minghui Zhou and Li Zhang. 2023. Is It Enough to Recommend Tasks to Newcomers? Understanding Mentoring on Good First Issues. arXiv preprint arXiv:2302.05058.","DOI":"10.1109\/ICSE48619.2023.00064"},{"key":"e_1_3_2_2_81_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3409746"},{"key":"e_1_3_2_2_82_1","volume-title":"Python Natural Language Processing","author":"Thanaki Jalaj","unstructured":"Jalaj Thanaki. 2017. 5.3.4.1 Understanding TF-IDF. In Python Natural Language Processing. Packt Publishing. isbn:978-1-78712-142-3"},{"key":"e_1_3_2_2_83_1","unstructured":"Valhalla. 2023. Valhalla (GitHub Repository). https:\/\/github.com\/valhalla\/valhalla [Accessed on Aug-2023]"},{"key":"e_1_3_2_2_84_1","doi-asserted-by":"publisher","DOI":"10.1145\/2531602.2531659"},{"key":"e_1_3_2_2_85_1","unstructured":"Sathiyamoorthi Velayutham. 2020. 3.5.1 Precision. In Handbook of Research on Applications and Implementations of Machine Learning Techniques. IGI Global. isbn:978-1-5225-9902-9"},{"key":"e_1_3_2_2_86_1","first-page":"7","article-title":"Preprocessing techniques for text mining-an overview","volume":"5","author":"Vijayarani S","year":"2015","unstructured":"S Vijayarani, Ms J Ilamathi, and Ms Nithya. 2015. Preprocessing techniques for text mining-an overview. International Journal of Computer Science & Communication Networks, 5, 1 (2015), 7\u201316.","journal-title":"International Journal of Computer Science & Communication Networks"},{"key":"e_1_3_2_2_87_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489465"}],"event":{"name":"ESEC\/FSE '23: 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","location":"San Francisco CA USA","acronym":"ESEC\/FSE '23","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering"]},"container-title":["Proceedings of the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3611643.3616288","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3611643.3616288","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:03Z","timestamp":1750178163000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3611643.3616288"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,30]]},"references-count":87,"alternative-id":["10.1145\/3611643.3616288","10.1145\/3611643"],"URL":"https:\/\/doi.org\/10.1145\/3611643.3616288","relation":{},"subject":[],"published":{"date-parts":[[2023,11,30]]},"assertion":[{"value":"2023-11-30","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}