{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T15:30:21Z","timestamp":1759332621356,"version":"3.37.3"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2023,2,16]],"date-time":"2023-02-16T00:00:00Z","timestamp":1676505600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,2,16]],"date-time":"2023-02-16T00:00:00Z","timestamp":1676505600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100003151","name":"Fonds de recherche du Qu\u00e9bec \u2013 Nature et technologies","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003151","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1007\/s10664-022-10271-x","type":"journal-article","created":{"date-parts":[[2023,2,16]],"date-time":"2023-02-16T01:02:43Z","timestamp":1676509363000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Refactoring practices in the context of data-intensive systems"],"prefix":"10.1007","volume":"28","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8861-9526","authenticated-orcid":false,"given":"Biruk Asmare","family":"Muse","sequence":"first","affiliation":[]},{"given":"Foutse","family":"Khomh","sequence":"additional","affiliation":[]},{"given":"Giuliano","family":"Antoniol","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,2,16]]},"reference":[{"key":"10271_CR1","doi-asserted-by":"crossref","unstructured":"Agrawal R, Imielinski T, Swami A (1993) Mining associations between sets of items in large databases. In: Proceedings of the ACM SIGMOD international conference on management of data, pp 207\u2013216","DOI":"10.1145\/170036.170072"},{"key":"10271_CR2","unstructured":"Agrawal R, Srikant R, et al. (1994) Fast algorithms for mining association rules. In: Proc. 20th int. conf. very large data bases, VLDB, vol 1215. Citeseer, pp 487\u2013499"},{"key":"10271_CR3","doi-asserted-by":"crossref","unstructured":"de Almeida Filho FG, Martins ADF, Vinuto TdS, Monteiro JM, de Sousa \u00cdP, de Castro Machado J, Rocha LS (2019) Prevalence of bad smells in PL\/SQL projects. In: Proceedings of the 27th international conference on program comprehension. IEEE Press, pp 116\u2013121","DOI":"10.1109\/ICPC.2019.00025"},{"issue":"114","key":"10271_CR4","first-page":"176","volume":"167","author":"EA Alomar","year":"2021","unstructured":"Alomar EA, Peruma A, Mkaouer MW, Newman C, Ouni A, Kessentini M (2021) How we refactor and how we document it? On the use of supervised machine learning algorithms to classify refactoring documentation. Expert Syst Appl 167(114):176","journal-title":"Expert Syst Appl"},{"issue":"3","key":"10271_CR5","doi-asserted-by":"publisher","first-page":"421","DOI":"10.1109\/TKDE.2017.2772252","volume":"30","author":"N Arzamasova","year":"2018","unstructured":"Arzamasova N, Sch\u00e4ler M, B\u00f6hm K (2018) Cleaning antipatterns in an SQL query log. IEEE Trans Knowl Data Eng 30(3):421\u2013434","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"10271_CR6","doi-asserted-by":"crossref","unstructured":"Brin S, Motwani R, Ullman JD, Tsur S (1997) Dynamic itemset counting and implication rules for market basket data. In: Proceedings of the 1997 ACM SIGMOD international conference on Management of data, pp 255\u2013264","DOI":"10.1145\/253262.253325"},{"key":"10271_CR7","doi-asserted-by":"crossref","unstructured":"Ch\u00e1vez A, Ferreira I, Fernandes E, Cedrim D, Garcia A (2017) How does refactoring affect internal quality attributes? a multi-project study. In: Proceedings of the 31st Brazilian symposium on software engineering, pp 74\u201383","DOI":"10.1145\/3131151.3131171"},{"key":"10271_CR8","doi-asserted-by":"publisher","first-page":"314","DOI":"10.1016\/j.ins.2014.01.015","volume":"275","author":"CP Chen","year":"2014","unstructured":"Chen CP, Zhang CY (2014) Data-intensive applications, challenges, techniques and technologies: A survey on big data. Inf Sci 275:314\u2013347","journal-title":"Inf Sci"},{"key":"10271_CR9","first-page":"500","volume-title":"Mathematical methods of statistics","author":"H Cramer","year":"1946","unstructured":"Cramer H (1946) Mathematical methods of statistics. Princeton U, Press, Princeton, p 500"},{"key":"10271_CR10","doi-asserted-by":"crossref","unstructured":"Dig D, Comertoglu C, Marinov D, Johnson R (2006) Automated detection of refactorings in evolving components. In: European conference on object-oriented programming. Springer, pp 404\u2013428","DOI":"10.1007\/11785477_24"},{"key":"10271_CR11","doi-asserted-by":"crossref","unstructured":"Falleri JR, Morandat F, Blanc X, Martinez M, Monperrus M (2014) Fine-grained and accurate source code differencing. In: Proceedings of the 29th ACM\/IEEE international conference on Automated software engineering, pp 313\u2013324","DOI":"10.1145\/2642937.2642982"},{"key":"10271_CR12","doi-asserted-by":"crossref","unstructured":"Ferreira I, Fernandes E, Cedrim D, Uch\u00f4a A, Bibiano AC, Garcia A, Correia JL, Santos F, Nunes G, Barbosa C et al (2018) The buggy side of code refactoring: Understanding the relationship between refactorings and bugs. In: Proceedings of the 40th international conference on software engineering: companion proceeedings, pp 406\u2013407","DOI":"10.1145\/3183440.3195030"},{"key":"10271_CR13","doi-asserted-by":"publisher","unstructured":"Foidl H, Felderer M, Biffl S (2019) Technical debt in data-intensive software systems. In: 2019 45th Euromicro conference on software engineering and advanced applications (SEAA). https:\/\/doi.org\/10.1109\/SEAA.2019.00058, pp 338\u2013341","DOI":"10.1109\/SEAA.2019.00058"},{"key":"10271_CR14","doi-asserted-by":"publisher","unstructured":"Foidl H, Felderer M, Ramler R (2022) Data smells: categories, causes and consequences, and detection of suspicious data in ai-based systems. In: Crnkovic I (ed) Proceedings of the 1st international conference on AI engineering: software engineering for AI, CAIN 2022, Pittsburgh, Pennsylvania, May 16-24, 2022. https:\/\/doi.org\/10.1145\/3522664.3528590. ACM, pp 229\u2013239","DOI":"10.1145\/3522664.3528590"},{"key":"10271_CR15","doi-asserted-by":"publisher","first-page":"256","DOI":"10.1007\/3-540-45672-4_31","volume":"2002","author":"M Fowler","year":"2002","unstructured":"Fowler M (2002) Refactoring: Improving the design of existing code. Extreme Program Agile Methods\u2013XP\/Agil Universe 2002:256","journal-title":"Extreme Program Agile Methods\u2013XP\/Agil Universe"},{"key":"10271_CR16","doi-asserted-by":"publisher","unstructured":"Hummel O, Eichelberger H, Giloj A, Werle D, Schmid K (2018) A collection of software engineering challenges for big data system development. In: 2018 44th Euromicro conference on software engineering and advanced applications (SEAA). https:\/\/doi.org\/10.1109\/SEAA.2018.00066, pp 362\u2013369","DOI":"10.1109\/SEAA.2018.00066"},{"key":"10271_CR17","doi-asserted-by":"crossref","unstructured":"Hummel O, Eichelberger H, Giloj A, Werle D, Schmid K (2018) A collection of software engineering challenges for big data system development. In: 2018 44th euromicro conference on software engineering and advanced applications (SEAA). IEEE, pp 362\u2013369","DOI":"10.1109\/SEAA.2018.00066"},{"key":"10271_CR18","doi-asserted-by":"crossref","unstructured":"Iammarino M, Zampetti F, Aversano L, Di Penta M (2019) Self-admitted technical debt removal and refactoring actions: Co-occurrence or more?. In: 2019 IEEE international conference on software maintenance and evolution (ICSME). IEEE, pp 186\u2013190","DOI":"10.1109\/ICSME.2019.00029"},{"key":"10271_CR19","unstructured":"Karwin B (2010) SQL Antipatterns: Avoiding the pitfalls of database programming Pragmatic Bookshelf"},{"key":"10271_CR20","doi-asserted-by":"crossref","unstructured":"Khumnin P, Senivongse T (2017) SQL antipatterns detection and database refactoring process. In: 2017 18th IEEE\/ACIS international conference on software engineering, artificial intelligence, networking and parallel\/distributed computing (SNPD), pp 199\u2013205","DOI":"10.1109\/SNPD.2017.8022723"},{"key":"10271_CR21","doi-asserted-by":"crossref","unstructured":"Kim M, Gee M, Loh A, Rachatasumrit N (2010) Ref-finder: a refactoring reconstruction tool based on logic query templates. In: Proceedings of the 18th ACM SIGSOFT international symposium on Foundations of software engineering, pp 371\u2013372","DOI":"10.1145\/1882291.1882353"},{"issue":"3","key":"10271_CR22","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1007\/s00766-018-0293-2","volume":"23","author":"Z Kurtanovi\u0107","year":"2018","unstructured":"Kurtanovi\u0107 Z, Maalej W (2018) On user rationale in software engineering. Requir Eng 23(3):357\u2013379","journal-title":"Requir Eng"},{"key":"10271_CR23","doi-asserted-by":"crossref","unstructured":"Mahmoudi M, Nadi S, Tsantalis N (2019) Are refactorings to blame? an empirical study of refactorings in merge conflicts. In: 2019 IEEE 26th international conference on software analysis, evolution and reengineering (SANER). IEEE, pp 151\u2013162","DOI":"10.1109\/SANER.2019.8668012"},{"key":"10271_CR24","doi-asserted-by":"crossref","unstructured":"McDonald N, Goggins S (2013) Performance and participation in open source software on Github. In: CHI\u201913 extended abstracts on human factors in computing systems, pp 139\u2013144","DOI":"10.1145\/2468356.2468382"},{"key":"10271_CR25","doi-asserted-by":"crossref","unstructured":"Meurice L, Nagy C, Cleve A (2016) Static analysis of dynamic database usage in Java systems. In: International conference on advanced information systems engineering. Springer, pp 491\u2013506","DOI":"10.1007\/978-3-319-39696-5_30"},{"issue":"4","key":"10271_CR26","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1109\/MS.2006.105","volume":"23","author":"GC Murphy","year":"2006","unstructured":"Murphy GC, Kersten M, Findlater L (2006) How are java software developers using the elipse ide? IEEE Softw 23(4):76\u201383","journal-title":"IEEE Softw"},{"key":"10271_CR27","doi-asserted-by":"crossref","unstructured":"Muse BA, Khomh F, Antoniol G (2022) Do developers refactor data access code? an empirical study. In: the 29th IEEE international conference on software analysis, evolution and reengineering(SANER)","DOI":"10.1109\/SANER53432.2022.00014"},{"key":"10271_CR28","doi-asserted-by":"publisher","unstructured":"Muse BA, Khomh F, Antoniol G (2022) Replication package: Refactoring Practices in the Context of Data-intensive Systems. https:\/\/doi.org\/10.5281\/zenodo.7140854","DOI":"10.5281\/zenodo.7140854"},{"issue":"6","key":"10271_CR29","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1007\/s10664-022-10119-4","volume":"27","author":"BA Muse","year":"2022","unstructured":"Muse BA, Nagy C, Cleve A, Khomh F, Antoniol G (2022) FIXME: synchronize with database! an empirical study of data access self-admitted technical debt. Empir Softw Eng 27(6):130. https:\/\/doi.org\/10.1007\/s10664-022-10119-4","journal-title":"Empir Softw Eng"},{"key":"10271_CR30","doi-asserted-by":"crossref","unstructured":"Muse BA, Rahman MM, Nagy C, Cleve A, Khomh F, Antoniol G (2020) On the prevalence, impact, and evolution of SQL code smells in data-intensive systems. In: Proceedings of the 17th international conference on mining software repositories, pp 327\u2013338","DOI":"10.1145\/3379597.3387467"},{"key":"10271_CR31","doi-asserted-by":"crossref","unstructured":"Nagy C, Cleve A (2017) A static code smell detector for SQL queries embedded in Java code. In: 2017 IEEE 17th international working conference on source code analysis and manipulation (SCAM). IEEE, pp 147\u2013152","DOI":"10.1109\/SCAM.2017.19"},{"key":"10271_CR32","doi-asserted-by":"crossref","unstructured":"Nagy C, Cleve A (2018) SQLInspect: A static analyzer to inspect database usage in Java applications. In: Proceedings of the 40th international conference on software engineering: companion proceedings. ACM, pp 93\u201396","DOI":"10.1145\/3183440.3183496"},{"key":"10271_CR33","doi-asserted-by":"crossref","unstructured":"Park B, Rao DL, Gudivada VN (2021) Dangers of bias in data-intensive information systems. In: Deshpande P, Abraham A, Iyer B, Ma K (eds) Next generation information processing system. Springer Singapore, Singapore, pp 259\u2013271","DOI":"10.1007\/978-981-15-4851-2_28"},{"key":"10271_CR34","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa F, Varoquaux G, Gramfort A, Michel V, Thirion B, Grisel O, Blondel M, Prettenhofer P, Weiss R, Dubourg V, Vanderplas J, Passos A, Cournapeau D, Brucher M, Perrot M, Duchesnay E (2011) Scikit-learn: Machine learning in Python. J Mach Learn Res 12:2825\u20132830","journal-title":"J Mach Learn Res"},{"issue":"6","key":"10271_CR35","doi-asserted-by":"publisher","first-page":"3577","DOI":"10.1007\/s10270-019-00730-3","volume":"18","author":"D Perez-Palacin","year":"2019","unstructured":"Perez-Palacin D, Merseguer J, Requeno JI, Guerriero M, Di Nitto E, Tamburri DA (2019) A UML profile for the design, quality assessment and deployment of data-intensive applications. Softw Syst Model 18(6):3577\u20133614","journal-title":"Softw Syst Model"},{"key":"10271_CR36","doi-asserted-by":"crossref","unstructured":"Peruma A (2019) A preliminary study of android refactorings. In: 2019 IEEE\/ACM 6th international conference on mobile software engineering and systems (MOBILESoft). IEEE, pp 148\u2013149","DOI":"10.1109\/MOBILESoft.2019.00030"},{"key":"10271_CR37","doi-asserted-by":"crossref","unstructured":"Peruma A, Mkaouer MW, Decker MJ, Newman CD (2018) An empirical investigation of how and why developers rename identifiers. In: Proceedings of the 2nd international workshop on refactoring, pp 26\u201333","DOI":"10.1145\/3242163.3242169"},{"issue":"110","key":"10271_CR38","first-page":"704","volume":"169","author":"A Peruma","year":"2020","unstructured":"Peruma A, Mkaouer MW, Decker MJ, Newman CD (2020) Contextualizing rename decisions using refactorings, commit messages, and data types. J Syst Softw 169(110):704","journal-title":"J Syst Softw"},{"key":"10271_CR39","unstructured":"Piatetsky S, Frawley G, William J (1991) Discovery, analysis and presentation of strong rules knowledge discovery in databases"},{"key":"10271_CR40","doi-asserted-by":"crossref","unstructured":"Sharma T, Fragkoulis M, Rizou S, Bruntink M, Spinellis D (2018) Smelly relations: Measuring and understanding database schema quality. In: 2018 IEEE\/ACM 40th international conference on software engineering: software engineering in practice track (ICSE-SEIP), pp 55\u201364","DOI":"10.1145\/3183519.3183529"},{"key":"10271_CR41","doi-asserted-by":"publisher","unstructured":"Shome A, Cruz L, van Deursen A (2022) Data smells in public datasets. In: Crnkovic I (ed) Proceedings of the 1st international conference on AI engineering: software engineering for AI, CAIN 2022, Pittsburgh, Pennsylvania, May 16-24, 2022. https:\/\/doi.org\/10.1145\/3522664.3528621. ACM, pp 205\u2013216","DOI":"10.1145\/3522664.3528621"},{"key":"10271_CR42","doi-asserted-by":"crossref","unstructured":"Silva D, Silva J, Santos GJDS, Terra R, Valente MTO (2020) Refdiff 2.0: A multi-language refactoring detection tool. IEEE Trans Softw Eng","DOI":"10.1109\/TSE.2020.2968072"},{"key":"10271_CR43","doi-asserted-by":"crossref","unstructured":"Silva D, Tsantalis N, Valente MT (2016) Why we refactor? confessions of Github contributors. In: Proceedings of the 2016 24th ACM SIGSOFT international symposium on foundations of software engineering, pp 858\u2013870","DOI":"10.1145\/2950290.2950305"},{"key":"10271_CR44","doi-asserted-by":"publisher","unstructured":"Spadini D, Aniche M, Bacchelli A (2018) PyDriller: Python Framework for Mining Software Repositories. https:\/\/doi.org\/10.1145\/3236024.3264598","DOI":"10.1145\/3236024.3264598"},{"key":"10271_CR45","doi-asserted-by":"publisher","unstructured":"Tsantalis N, Ketkar A, Dig D (2020) Refactoringminer 2.0. IEEE Transactions on Software Engineering. https:\/\/doi.org\/10.1109\/TSE.2020.3007722","DOI":"10.1109\/TSE.2020.3007722"},{"key":"10271_CR46","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.scico.2019.05.002","volume":"180","author":"C Vassallo","year":"2019","unstructured":"Vassallo C, Grano G, Palomba F, Gall HC, Bacchelli A (2019) A large-scale empirical exploration on refactoring activities in open source software projects. Sci Comput Program 180:1\u201315","journal-title":"Sci Comput Program"},{"key":"10271_CR47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-29044-2","volume-title":"Experimentation in software engineering","author":"C Wohlin","year":"2012","unstructured":"Wohlin C, Runeson P, H\u00f6st M, Ohlsson MC, Regnell B, Wessl\u00e9n A (2012) Experimentation in software engineering. Springer Science & Business Media, Berlin"},{"key":"10271_CR48","doi-asserted-by":"crossref","unstructured":"Zhou C, Kuttal SK, Ahmed I (2018) What makes a good developer? an empirical study of developers\u2019 technical and social competencies. In: 2018 IEEE symposium on visual languages and human-centric computing (VL\/HCC). IEEE, pp 319\u2013321","DOI":"10.1109\/VLHCC.2018.8506577"},{"issue":"12","key":"10271_CR49","doi-asserted-by":"publisher","first-page":"2919","DOI":"10.1109\/TSE.2020.2974469","volume":"47","author":"J Zhou","year":"2021","unstructured":"Zhou J, Wang S, Bezemer CP, Zou Y, Hassan AE (2021) Studying the association between bountysource bounties and the issue-addressing likelihood of Github issue reports. IEEE Trans Softw Eng 47(12):2919\u20132933. https:\/\/doi.org\/10.1109\/TSE.2020.2974469","journal-title":"IEEE Trans Softw Eng"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-022-10271-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10664-022-10271-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-022-10271-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,4,3]],"date-time":"2023-04-03T07:07:44Z","timestamp":1680505664000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10664-022-10271-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,16]]},"references-count":49,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,3]]}},"alternative-id":["10271"],"URL":"https:\/\/doi.org\/10.1007\/s10664-022-10271-x","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"type":"print","value":"1382-3256"},{"type":"electronic","value":"1573-7616"}],"subject":[],"published":{"date-parts":[[2023,2,16]]},"assertion":[{"value":"5 December 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 February 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Conflict of Interests"}}],"article-number":"46"}}