{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T19:13:46Z","timestamp":1775934826963,"version":"3.50.1"},"reference-count":80,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2023,4,1]],"date-time":"2023-04-01T00:00:00Z","timestamp":1680307200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,4,1]],"date-time":"2023-04-01T00:00:00Z","timestamp":1680307200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","award":["RGPAS-2019-00075"],"award-info":[{"award-number":["RGPAS-2019-00075"]}],"id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","award":["RGPIN-2019-06014"],"award-info":[{"award-number":["RGPIN-2019-06014"]}],"id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2023,5]]},"DOI":"10.1007\/s10664-022-10282-8","type":"journal-article","created":{"date-parts":[[2023,4,3]],"date-time":"2023-04-03T06:15:52Z","timestamp":1680502552000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Towards a change taxonomy for machine learning pipelines"],"prefix":"10.1007","volume":"28","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3552-9460","authenticated-orcid":false,"given":"Aaditya","family":"Bhatia","sequence":"first","affiliation":[]},{"given":"Ellis E.","family":"Eghan","sequence":"additional","affiliation":[]},{"given":"Manel","family":"Grichi","sequence":"additional","affiliation":[]},{"given":"William G.","family":"Cavanagh","sequence":"additional","affiliation":[]},{"given":"Zhen Ming","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Bram","family":"Adams","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,4,1]]},"reference":[{"key":"10282_CR1","unstructured":"Adding auto-generated files example (2018). https:\/\/github.com\/alorozco53\/text-detection-ctpn\/commit\/f90326f68522f3af3e4cdf5688138685de66bace"},{"key":"10282_CR2","unstructured":"Adding\/removing dependency example (2019). https:\/\/github.com\/google\/youtube-8m\/commit\/09774db80a515b667a91b14fe21a6134f3856c7a"},{"key":"10282_CR3","doi-asserted-by":"crossref","unstructured":"Amershi S, Begel A, Bird C, DeLine R, Gall H, Kamar E, Nagappan N, Nushi B, Zimmermann T (2019) Software engineering for machine learning: a case study. In: 2019 IEEE\/ACM 41st international conference on software engineering: software engineering in practice (ICSE-SEIP), pp 291\u2013300","DOI":"10.1109\/ICSE-SEIP.2019.00042"},{"key":"10282_CR4","doi-asserted-by":"crossref","unstructured":"Arpteg A, Brinne B, Crnkovic-Friis L, Bosch J (2018) Software engineering challenges of deep learning. In: 2018 44th Euromicro conference on software engineering and advanced applications (SEAA). IEEE, pp 50\u201359","DOI":"10.1109\/SEAA.2018.00018"},{"issue":"6","key":"10282_CR5","doi-asserted-by":"publisher","first-page":"349","DOI":"10.1002\/smr.412","volume":"21","author":"HC Benestad","year":"2009","unstructured":"Benestad HC, Anda B, Arisholm E (2009) Understanding software maintenance and evolution by analyzing individual changes: a literature review. J Softw Maint Evol Res Pract 21(6):349\u2013378","journal-title":"J Softw Maint Evol Res Pract"},{"key":"10282_CR6","doi-asserted-by":"crossref","unstructured":"Biazzini M, Baudry B (2014) may the fork be with you: novel metrics to analyze collaboration on github. In: Proceedings of the 5th international workshop on emerging trends in software metrics, pp 37\u201343","DOI":"10.1145\/2593868.2593875"},{"key":"10282_CR7","doi-asserted-by":"crossref","unstructured":"Bird C, Bachmann A, Aune E, Duffy J, Bernstein A, Filkov V, Devanbu P (2009) Fair and balanced? bias in bug-fix datasets. In: Proceedings of the 7th joint meeting of the european software engineering conference and the ACM SIGSOFT symposium on the foundations of software engineering, pp 121\u2013130","DOI":"10.1145\/1595696.1595716"},{"key":"10282_CR8","doi-asserted-by":"crossref","unstructured":"Bissyand\u00e9 TF, Thung F, Wang S, Lo D, Jiang L, R\u00e9 veill\u00e8re L (2013) Empirical evaluation of bug linking. In: 2013 17th European conference on software maintenance and Reengineering, pp 89\u201398","DOI":"10.1109\/CSMR.2013.19"},{"key":"10282_CR9","doi-asserted-by":"crossref","unstructured":"Bloice MD, Holzinger A (2016) A tutorial on machine learning and data science tools with python. Machine Learning for Health Informatics, pp 435\u2013480","DOI":"10.1007\/978-3-319-50478-0_22"},{"key":"10282_CR10","doi-asserted-by":"publisher","first-page":"112","DOI":"10.1016\/j.jss.2018.09.016","volume":"146","author":"H Borges","year":"2018","unstructured":"Borges H, Valente MT (2018) What\u2019s in a github star? understanding repository starring practices in a social coding platform. J Syst Softw 146:112\u2013129","journal-title":"J Syst Softw"},{"key":"10282_CR11","doi-asserted-by":"crossref","unstructured":"Brisson S, Noei E, Lyons K (2020) We are family: analyzing communication in github software repositories and their forks. In: 2020 IEEE 27th international conference on software analysis Evolution and Reengineering (SANER). IEEE, pp 59\u201369","DOI":"10.1109\/SANER48275.2020.9054834"},{"key":"10282_CR12","unstructured":"Bug fix example 1 (2019). https:\/\/github.com\/piaosonglin1985\/tf-faster-rcnn\/commit\/8e60b9dc92390f1bfb8cf6e62d93bcabbc123c4a"},{"key":"10282_CR13","unstructured":"Bug fix example 2 (2017) https:\/\/github.com\/MarvinTeichmann\/KittiSeg\/commit\/ec6b5ccb6f30ac6591d03faa2fa0bf8b1fdbf3ef"},{"key":"10282_CR14","unstructured":"Change file permission example (2017). https:\/\/api.github.com\/repos\/CodeRecipeJYP\/fast-style-transfer\/commits\/7027a3843fa3d793697da5ba188887629a4d69eb"},{"key":"10282_CR15","doi-asserted-by":"crossref","unstructured":"Chen Z, Zhang JM, Sarro F, Harman M (2022) Maat: a novel ensemble approach to addressing fairness and performance bugs for machine learning software. In: Proceedings of the 30th ACM joint european software engineering conference and symposium on the foundations of software engineering (ESEC\/FSE\u201922). ACM Press","DOI":"10.1145\/3540250.3549093"},{"key":"10282_CR16","doi-asserted-by":"crossref","unstructured":"Cheng D, Cao C, Xu C, Ma X (2018) Manifesting bugs in machine learning code: An explorative study with mutation testing. In: 2018 IEEE international conference on software quality, reliability and security (QRS). IEEE, pp 313\u2013324","DOI":"10.1109\/QRS.2018.00044"},{"key":"10282_CR17","doi-asserted-by":"crossref","unstructured":"Constantino K, Zhou S, Souza M, Figueiredo E, K\u00e4stner C (2020) Understanding collaborative software development: an interview study. In: Proceedings of the 15th international conference on global software engineering, pp 55\u201365","DOI":"10.1145\/3372787.3390442"},{"key":"10282_CR18","doi-asserted-by":"crossref","unstructured":"Cort\u00e9s-Coy LF, Linares-V\u00e1squez M, Aponte J, Poshyvanyk D (2014) On automatically generating commit messages via summarization of source code changes. In: 2014 IEEE 14th international working conference on source code analysis and manipulation, pp 275\u2013284","DOI":"10.1109\/SCAM.2014.14"},{"issue":"1","key":"10282_CR19","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1007\/s10664-017-9589-y","volume":"24","author":"A Decan","year":"2019","unstructured":"Decan A, Mens T, Grosjean P (2019) An empirical comparison of dependency network evolution in seven software packaging ecosystems. Empir Softw Eng 24(1):381\u2013416","journal-title":"Empir Softw Eng"},{"key":"10282_CR20","unstructured":"Dey T, Mockus A (2020) Which pull requests get accepted and why? a study of popular npm packages, arXiv:2003.01153"},{"key":"10282_CR21","doi-asserted-by":"crossref","unstructured":"Dwarakanath A, Ahuja M, Sikand S, Rao RM, Bose RJC, Dubash N, Podder S (2018) Identifying implementation bugs in machine learning based image classifiers using metamorphic testing. In: Proceedings of the 27th ACM SIGSOFT international symposium on software testing and analysis, pp 118\u2013128","DOI":"10.1145\/3213846.3213858"},{"key":"10282_CR22","unstructured":"External documentation example (2017). https:\/\/github.com\/Raochuan89\/TensorBox\/commit\/aeb45e8fdc100f74aa8cf2fa85b1324483a1fff1"},{"issue":"1","key":"10282_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10664-020-09916-6","volume":"26","author":"Y Fan","year":"2021","unstructured":"Fan Y, Xia X, Lo D, Hassan AE, Li S (2021) What makes a popular academic AI repository? Empir Softw Eng 26(1):1\u201335","journal-title":"Empir Softw Eng"},{"key":"10282_CR24","doi-asserted-by":"crossref","unstructured":"Farag\u00f3 C, Heged\u0169s P (2014) R Ferenc, The impact of version control operations on the quality change of the source code. In: International conference on computational science and its applications. Springer, pp 353\u2013369","DOI":"10.1007\/978-3-319-09156-3_26"},{"key":"10282_CR25","unstructured":"Feature example (2018). https:\/\/github.com\/tch\/PointCNN\/commit\/891f3e04b44805b066865aeef1275ac6f217c58f"},{"key":"10282_CR26","unstructured":"Fogel K (2005) Producing open source software: How to run a successful free software project. O\u2019Reilly Media, Inc.,"},{"issue":"1","key":"10282_CR27","doi-asserted-by":"publisher","first-page":"260","DOI":"10.1007\/s10664-014-9356-2","volume":"21","author":"DM German","year":"2016","unstructured":"German DM, Adams B, Hassan AE (2016) Continuously mining distributed version control systems: an empirical study of how linux uses git. Empir. Softw. Eng. 21(1):260\u2013299","journal-title":"Empir. Softw. Eng."},{"key":"10282_CR28","doi-asserted-by":"crossref","unstructured":"Ghadhab L, Jenhani I, Mkaouer MW, Messaoud MB (2021) Augmenting commit classification by using fine-grained source code changes and a pre-trained deep neural language model, vol 135","DOI":"10.1016\/j.infsof.2021.106566"},{"key":"10282_CR29","doi-asserted-by":"crossref","unstructured":"Gousios G, Pinzger M, Deursen AV (2014) An exploratory study of the pull-based software development model. In: Proceedings of the 36th international conference on software engineering, pp 345\u2013355","DOI":"10.1145\/2568225.2568260"},{"key":"10282_CR30","doi-asserted-by":"crossref","unstructured":"Granger B, P\u00e9rez F (2021) Jupyter: thinking and storytelling with code and data Authorea Preprints","DOI":"10.22541\/au.161298309.98344404\/v1"},{"key":"10282_CR31","doi-asserted-by":"crossref","unstructured":"Hindle A, German DM, Godfrey MW, Holt RC (2009) Automatic classication of large changes into maintenance categories. In: 2009 IEEE 17th International Conference on Program Comprehension. IEEE, pp 30\u201339","DOI":"10.1109\/ICPC.2009.5090025"},{"key":"10282_CR32","doi-asserted-by":"publisher","unstructured":"Hindle D, German M, Holt R (2008) What do large commits tell us? a taxonomical study of large commits. In: Proceedings of the 2008 international working conference on mining software repositories, ser. MSR \u201908. New York, NY, USA: association for computing machinery, pp 99\u2013108. [Online]. Available:. https:\/\/doi.org\/10.1145\/1370750.1370773","DOI":"10.1145\/1370750.1370773"},{"issue":"1","key":"10282_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40064-016-2897-7","volume":"5","author":"Y Hu","year":"2016","unstructured":"Hu Y, Zhang J, Bai X, Yu S, Yang Z (2016) Influence analysis of github repositories. SpringerPlus 5(1):1\u201319","journal-title":"SpringerPlus"},{"key":"10282_CR34","doi-asserted-by":"crossref","unstructured":"Idowu S, Str\u00fcber D, Berger T (2021) Asset management in machine learning: a survey. In: 2021 IEEE\/ACM 43rd international conference on software engineering: software engineering in practice (ICSE-SEIP), pp 51\u201360","DOI":"10.1109\/ICSE-SEIP52600.2021.00014"},{"key":"10282_CR35","unstructured":"Input data example (2017). https:\/\/github.com\/google\/youtube-8m\/commit\/4619056162f466293d99e0c59512f8d0f3427fe2"},{"key":"10282_CR36","unstructured":"Internal documentation example-1 (2017). https:\/\/github.com\/google\/youtube-8m\/commit\/3439e33d81df8cd906987ee5889ebc937186114a"},{"key":"10282_CR37","unstructured":"Internal documentation example-2 (2017). https:\/\/github.com\/CharlesShang\/FastMaskRCNN\/commit\/0d8ddfaa55dbd3d553b79aed34f40662c46aa45f"},{"key":"10282_CR38","doi-asserted-by":"publisher","unstructured":"Kalliamvakou E, Gousios G, Blincoe K, Singer L, German DM, Damian D (2014) The promises and perils of mining github. In: Proceedings of the 11th working conference on mining software repositories, ser. MSR 2014. New York, NY, USA: association for computing machinery, pp 92\u2013101. [Online]. Available: https:\/\/doi.org\/10.1145\/2597073.2597074","DOI":"10.1145\/2597073.2597074"},{"key":"10282_CR39","doi-asserted-by":"crossref","unstructured":"Kim M, Cai D, Kim S (2011) An empirical investigation into the role of api-level refactorings during software evolution. In: Proceedings of the 33rd international conference on software engineering, pp 151\u2013160","DOI":"10.1145\/1985793.1985815"},{"key":"10282_CR40","unstructured":"Krippendorff K (2011) Computing krippendorff\u2019s alpha-reliability"},{"key":"10282_CR41","doi-asserted-by":"crossref","unstructured":"Li H, Shang W, Adams B, Sayagh M, Hassan AE (2020) A qualitative study of the benefits and costs of logging from developers\u2019 perspectives. IEEE Transactions on Software Engineering","DOI":"10.1109\/TSE.2020.2970422"},{"key":"10282_CR42","doi-asserted-by":"crossref","unstructured":"Lima A, Rossi L, Musolesi M (2014) Coding together at scale: Github as a collaborative social network. In: Eighth international AAAI conference on weblogs and social media","DOI":"10.1609\/icwsm.v8i1.14552"},{"key":"10282_CR43","doi-asserted-by":"crossref","unstructured":"Mart\u00ednez-Fern\u00e1ndez S, Bogner J, Franch X, Oriol M, Siebert J, Trendowicz A, Vollmer AM, Wagner S (2021) Software engineering for ai-based systems, a survey, arXiv:2105.01984","DOI":"10.1145\/3487043"},{"key":"10282_CR44","unstructured":"Model structure example (2018). https:\/\/github.com\/shikorab\/tf-faster-rcnn\/commit\/327778b2c4f297b307ff0de552d2bfc47278e290"},{"key":"10282_CR45","doi-asserted-by":"crossref","unstructured":"Mukherjee S, Almanza A, Rubio-Gonz\u00e1lez C (2021) Fixing dependency errors for python build reproducibility. In: Proceedings of the 30th ACM SIGSOFT international symposium on software testing and analysis, pp 439\u2013451","DOI":"10.1145\/3460319.3464797"},{"key":"10282_CR46","doi-asserted-by":"crossref","unstructured":"Nahar N, Zhou S, Lewis G, K\u00e4stner C (2022) Collaboration challenges in building ml-enabled systems: communication, documentation, engineering, and process. In: 2022 IEEE\/ACM 44th international conference on software engineering (ICSE)","DOI":"10.1145\/3510003.3510209"},{"key":"10282_CR47","unstructured":"Ng A (2021) Mlops: from model-centric to data-centric ai"},{"key":"10282_CR48","unstructured":"O\u2019Leary K, Uchida M (2020) Common problems with creating machine learning pipelines from existing code"},{"key":"10282_CR49","unstructured":"Output data example (2018). https:\/\/github.com\/Mappy\/tf-faster-rcnn\/commit\/51e0889fbdcd4c48f31def4c1cb05a5a4db04671"},{"issue":"4","key":"10282_CR50","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1109\/MS.2020.2993662","volume":"37","author":"I Ozkaya","year":"2020","unstructured":"Ozkaya I (2020) What is really different in engineering ai-enabled systems? IEEE Softw 37(4):3\u20136","journal-title":"IEEE Softw"},{"key":"10282_CR51","unstructured":"Parameter tuning example (2017). https:\/\/github.com\/google\/youtube-8m\/commit\/0e526caace96d3cf6f0686757d568f9ffba998b4"},{"key":"10282_CR52","unstructured":"Parameter tuning example 2 (2017). https:\/\/github.com\/DeepLabCut\/DeepLabCut\/commit\/6568c2ba6facf5d90b2c39af7b0f024a40f2b15f"},{"key":"10282_CR53","doi-asserted-by":"crossref","unstructured":"Pashchenko I, Vu D-L, Massacci F (2020) A qualitative study of dependency management and its security implications. In: Proceedings of the 2020 ACM SIGSAC conference on computer and communications security, pp 1513\u20131531","DOI":"10.1145\/3372297.3417232"},{"key":"10282_CR54","unstructured":"Pipeline Performance example (2018). https:\/\/github.com\/google\/youtube-8m\/pull\/69"},{"issue":"2","key":"10282_CR55","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1145\/3299887.3299891","volume":"47","author":"N Polyzotis","year":"2018","unstructured":"Polyzotis N, Roy S, Whang SE, Zinkevich M (2018) Data lifecycle challenges in production machine learning: a survey. ACM SIGMOD Rec 47(2):17\u201328","journal-title":"ACM SIGMOD Rec"},{"key":"10282_CR56","unstructured":"Pre-processing example (2018). https:\/\/github.com\/lancele\/Semantic-Segmentation-Suite\/commit\/d50b5c812392614fc2bdaf269921beb1f7086f63"},{"key":"10282_CR57","unstructured":"Project data example (2017). https:\/\/github.com\/Bruceeeee\/facenet\/commit\/d9e6213cd8286334000ddf75529eba3662cef38a#diff-dbc5c3b9f46e69236207956b34904d0dea62ff866d442e97bb397ff49a03a86b"},{"key":"10282_CR58","doi-asserted-by":"crossref","unstructured":"Rahman MM, Roy CK (2014) An insight into the pull requests of github. In: Proceedings of the 11th working conference on mining software repositories, pp 364\u2013367","DOI":"10.1145\/2597073.2597121"},{"key":"10282_CR59","doi-asserted-by":"crossref","unstructured":"Ren L, Zhou S, K\u00e4 stner C (2018) Poster: forks insight: Providing an overview of github forks. In: 2018 IEEE\/ACM 40th international conference on software engineering: companion (ICSE-Companion), pp 179\u2013180","DOI":"10.1145\/3183440.3195085"},{"key":"10282_CR60","doi-asserted-by":"crossref","unstructured":"Salza P, Palomba F, Di Nucci D, D\u2019Uva C, De Lucia A, Ferrucci F (2018) Do developers update third-party libraries in mobile apps?. In: Proceedings of the 26th conference on program comprehension, pp 255\u2013265","DOI":"10.1145\/3196321.3196341"},{"key":"10282_CR61","doi-asserted-by":"crossref","unstructured":"Sambasivan N, Kapania S, Highfill H, Akrong D, Paritosh P, Aroyo LM (2021) Everyone wants to do the model work, not the data work: data cascades in high-stakes ai. In: Proceedings of the 2021 CHI conference on human factors in computing systems, pp 1\u201315","DOI":"10.1145\/3411764.3445518"},{"key":"10282_CR62","unstructured":"Santos JAM, Santos AR, Mendon\u00e7 a MG (2015) Investigating bias in the search phase of software engineering secondary studies. In: CIbSE, pp 488"},{"key":"10282_CR63","unstructured":"Sato D, Wider A, Windheuser C (2019) Continuous delivery for machine learning. https:\/\/martinfowler.com\/articles\/cd4ml.html#DeploymentPipelines"},{"key":"10282_CR64","unstructured":"Sharing example (2016). https:\/\/github.com\/anishathalye\/neural-style\/pull\/40"},{"key":"10282_CR65","unstructured":"Sharing example (2018). https:\/\/github.com\/jerichooconnell\/tf_unet\/commit\/60b67bb964d19dd4a4677f7557dc738838a116e9"},{"issue":"4","key":"10282_CR66","doi-asserted-by":"publisher","first-page":"552","DOI":"10.1109\/TSE.2012.43","volume":"39","author":"S Shivaji","year":"2012","unstructured":"Shivaji S, Whitehead EJ, Akella R, Kim S (2012) Reducing features to improve code change-based bug prediction. IEEE Trans Softw Eng 39 (4):552\u2013569","journal-title":"IEEE Trans Softw Eng"},{"key":"10282_CR67","unstructured":"Swanson EB (1976) The dimensions of maintenance. In: Proceedings of the 2nd international conference on Software engineering, pp 492\u2013497"},{"key":"10282_CR68","doi-asserted-by":"crossref","unstructured":"Tizpaz-Niari S, \u010cerny\u0300 P, Trivedi A (2020) Detecting and understanding real-world differential performance bugs in machine learning libraries. In: Proceedings of the 29th ACM SIGSOFT international symposium on software testing and analysis, pp 189\u2013199","DOI":"10.1145\/3395363.3404540"},{"key":"10282_CR69","unstructured":"Training infrastructure example (2017). https:\/\/github.com\/IAC-Team\/SemSeg\/commit\/efbfffbd202cccbd54fca1125ed6de41b5df2f90"},{"key":"10282_CR70","unstructured":"Update dependency example (2018). https:\/\/github.com\/google\/youtube-8m\/commit\/72f42cd938d3cf4f928614a5fcdca237489e7c92"},{"key":"10282_CR71","unstructured":"Validation example (2017). https:\/\/github.com\/bethesirius\/TensorBox\/commit\/1eb41e944494e721f3c4b1a5d287af99f4035a42"},{"key":"10282_CR72","doi-asserted-by":"crossref","unstructured":"Wang J, Li L, Zeller A (2020) Better code, better sharing: on the need of analyzing jupyter notebooks. In: Proceedings of the ACM\/IEEE 42nd international conference on software engineering: new ideas and emerging results, pp 53\u201356","DOI":"10.1145\/3377816.3381724"},{"key":"10282_CR73","doi-asserted-by":"crossref","unstructured":"Washizaki H, Uchida H, Khomh F, Gu\u00e9 h\u00e9neuc Y-G (2019) Studying software engineering patterns for designing machine learning systems. In: 2019 10th International workshop on empirical software engineering in practice (IWESEP). IEEE, pp 49\u2013495","DOI":"10.1109\/IWESEP49350.2019.00017"},{"key":"10282_CR74","doi-asserted-by":"crossref","unstructured":"Wu R, Zhang H, Kim S, Cheung S-C (2011) Relink: recovering links between bugs and changes. In: Proceedings of the 19th ACM SIGSOFT symposium and the 13th european conference on foundations of software engineering, pp 15\u201325","DOI":"10.1145\/2025113.2025120"},{"key":"10282_CR75","doi-asserted-by":"publisher","first-page":"296","DOI":"10.1016\/j.jss.2015.12.019","volume":"113","author":"M Yan","year":"2016","unstructured":"Yan M, Fu Y, Zhang X, Yang D, Xu L, Kymer JD (2016) Automatically classifying software changes via discriminative topic model: supporting multi-category and cross-project. J Syst Softw 113:296\u2013308","journal-title":"J Syst Softw"},{"key":"10282_CR76","doi-asserted-by":"crossref","unstructured":"Zhang X, Chen Y, Gu Y, Zou W, Xie X, Jia X, Xuan J (2018) How do multiple pull requests change the same code: a study of competing pull requests in github. In: 2018 IEEE international conference on software maintenance and evolution (ICSME).IEEE, pp 228\u2013239","DOI":"10.1109\/ICSME.2018.00032"},{"key":"10282_CR77","doi-asserted-by":"crossref","unstructured":"Zhang T, Gao C, Ma L, Lyu M, Kim M (2019) An empirical study of common challenges in developing deep learning applications. In: 2019 IEEE 30th international symposium on software reliability engineering (ISSRE). IEEE, pp 104\u2013115","DOI":"10.1109\/ISSRE.2019.00020"},{"key":"10282_CR78","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/j.infsof.2017.02.003","volume":"86","author":"Y Zhao","year":"2017","unstructured":"Zhao Y, Leung H, Yang Y, Zhou Y, Xu B (2017) Towards an understanding of change types in bug fixing code. Inf Softw Technol 86:37\u201353","journal-title":"Inf Softw Technol"},{"key":"10282_CR79","doi-asserted-by":"crossref","unstructured":"Zhou S, Vasilescu B, K\u00e4 stner C (2020) How has forking changed in the last 20 years? a study of hard forks on github. In: 2020 IEEE\/ACM 42nd international conference on software engineering (ICSE). IEEE, pp 445\u2013456","DOI":"10.1145\/3377811.3380412"},{"key":"10282_CR80","doi-asserted-by":"crossref","unstructured":"Zhou S, Vasilescu B, Kastner C (2019) What the fork: a study of inefficient and efficient forking practices in social coding. In: Proceedings of the 2019 27th ACM joint meeting on european software engineering conference and symposium on the foundations of software engineering, pp 350\u2013361","DOI":"10.1145\/3338906.3338918"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-022-10282-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10664-022-10282-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-022-10282-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,26]],"date-time":"2023-05-26T09:45:26Z","timestamp":1685094326000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10664-022-10282-8"}},"subtitle":["Empirical study of ML pipelines and forks related to academic publications"],"short-title":[],"issued":{"date-parts":[[2023,4,1]]},"references-count":80,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2023,5]]}},"alternative-id":["10282"],"URL":"https:\/\/doi.org\/10.1007\/s10664-022-10282-8","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"value":"1382-3256","type":"print"},{"value":"1573-7616","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,4,1]]},"assertion":[{"value":"19 December 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 April 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"No ethics approval was required for this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Ethics approval"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Conflict of interests\/Competing interests"}}],"article-number":"60"}}