{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T08:52:36Z","timestamp":1769763156484,"version":"3.49.0"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2022,10,6]],"date-time":"2022-10-06T00:00:00Z","timestamp":1665014400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,10,6]],"date-time":"2022-10-06T00:00:00Z","timestamp":1665014400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2022,12]]},"DOI":"10.1007\/s10664-022-10200-y","type":"journal-article","created":{"date-parts":[[2022,10,6]],"date-time":"2022-10-06T09:03:07Z","timestamp":1665046987000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Pitfalls and guidelines for using time-based Git data"],"prefix":"10.1007","volume":"27","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8023-9710","authenticated-orcid":false,"given":"Samuel W.","family":"Flint","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jigyasa","family":"Chauhan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9571-5567","authenticated-orcid":false,"given":"Robert","family":"Dyer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,10,6]]},"reference":[{"key":"10200_CR1","doi-asserted-by":"publisher","unstructured":"Ahasanuzzaman M, Asaduzzaman M, Roy CK, Schneider KA (2016) Mining duplicate questions in stack overflow. In: Proceedings of the 13th international conference on mining software repositories, MSR \u201916. Association for Computing Machinery, New York, pp 402\u2013412. https:\/\/doi.org\/10.1145\/2901739.2901770","DOI":"10.1145\/2901739.2901770"},{"key":"10200_CR2","doi-asserted-by":"publisher","unstructured":"Antoniol G, Rollo VF, Venturi G (2005) Linear predictive coding and cepstrum coefficients for mining time variant information from software repositories. In: Proceedings of the 2005 international workshop on mining software repositories, MSR \u201905, vol 2005. Association for Computing Machinery, New York, pp 1\u20135. https:\/\/doi.org\/10.1145\/1083142.1083156","DOI":"10.1145\/1083142.1083156"},{"key":"10200_CR3","doi-asserted-by":"publisher","unstructured":"Baysal O, Holmes R, Godfrey MW (2012) Mining usage data and development artifacts. In: 2012 9th IEEE working conference on mining software repositories (MSR), pp 98\u2013107. https:\/\/doi.org\/10.1109\/MSR.2012.6224305","DOI":"10.1109\/MSR.2012.6224305"},{"key":"10200_CR4","doi-asserted-by":"publisher","unstructured":"Bird C, Rigby PC, Barr ET, Hamilton DJ, German DM, Devanbu P (2009) The promises and perils of mining Git. In: 2009 6th IEEE international working conference on mining software repositories, pp 1\u201310. https:\/\/doi.org\/10.1109\/MSR.2009.5069475","DOI":"10.1109\/MSR.2009.5069475"},{"key":"10200_CR5","doi-asserted-by":"publisher","unstructured":"Cito J, Schermann G, Wittern JE, Leitner P, Zumberi S, Gall HC (2017) An empirical analysis of the Docker container ecosystem on GitHub. In: 2017 IEEE\/ACM 14th international conference on mining software repositories (MSR). IEEE. https:\/\/doi.org\/10.1109\/msr.2017.67","DOI":"10.1109\/msr.2017.67"},{"key":"10200_CR6","doi-asserted-by":"publisher","unstructured":"Claes M, M\u00e4ntyl\u00e4 MV (2020) 20-MAD: 20 years of issues and commits of Mozilla and Apache development. In: Proceedings of the 17th international conference on mining software repositories, MSR \u201920. Association for Computing Machinery, New York, pp 503\u2013507. https:\/\/doi.org\/10.1145\/3379597.3387487","DOI":"10.1145\/3379597.3387487"},{"key":"10200_CR7","doi-asserted-by":"crossref","unstructured":"Cosentino V, Izquierdo JLC, Cabot J (2016) Findings from GitHub: methods, datasets and limitations. In: 2016 IEEE\/ACM 13th working conference on mining software repositories (MSR), pp 137\u2013141","DOI":"10.1145\/2901739.2901776"},{"key":"10200_CR8","unstructured":"Cosmo RD, Zacchiroli S (2017) Software Heritage: why and how to preserve software source code. In: iPRES 2017: 14th international conference on digital preservation. Kyoto, Japan"},{"key":"10200_CR9","doi-asserted-by":"publisher","unstructured":"D\u2019Ambros M, Lanza M, Robbes R (2010) An extensive comparison of bug prediction approaches. In: 2010 7th IEEE working conference on mining software repositories (MSR 2010), pp 31\u201341. https:\/\/doi.org\/10.1109\/MSR.2010.5463279","DOI":"10.1109\/MSR.2010.5463279"},{"key":"10200_CR10","doi-asserted-by":"crossref","unstructured":"Demeyer S, Murgia A, Wyckmans K, Lamkanfi A (2013) Happy birthday! A trend analysis on past MSR papers. In: Proceedings of the 10th working conference on mining software repositories, MSR \u201913. IEEE Press, pp 353\u2013362","DOI":"10.1109\/MSR.2013.6624049"},{"key":"10200_CR11","doi-asserted-by":"publisher","unstructured":"Durieux T, Le Goues C, Hilton M, Abreu R (2020) Empirical study of restarted and flaky builds on Travis CI. In: Proceedings of the 17th international conference on mining software repositories, MSR \u201920. Association for Computing Machinery, New York, pp 254\u2013264. https:\/\/doi.org\/10.1145\/3379597.3387460","DOI":"10.1145\/3379597.3387460"},{"key":"10200_CR12","doi-asserted-by":"publisher","unstructured":"Dyer R, Nguyen HA, Rajan H, Nguyen TN (2013) Boa: a language and infrastructure for analyzing ultra-large-scale software repositories. In: Proceedings of the international conference on software engineering, ICSE \u201913, vol 2013. IEEE Press, pp 422\u2013431. https:\/\/doi.org\/10.5555\/2486788.2486844","DOI":"10.5555\/2486788.2486844"},{"key":"10200_CR13","unstructured":"Dyer R, Nguyen HA, Rajan H, Nguyen TN (2021) Boa: Mining ultra-large-scale software repositories. http:\/\/boa.cs.iastate.edu\/boa\/. Accessed 14 Oct 2021"},{"key":"10200_CR14","doi-asserted-by":"publisher","unstructured":"Flint SW, Chauhan J, Dyer R (2021a) Escaping the time pit: pitfalls and guidelines for using time-based Git data. In: 2021 IEEE\/ACM 18th international conference on mining software repositories (MSR), pp 85\u201396. https:\/\/doi.org\/10.1109\/MSR52588.2021.00022","DOI":"10.1109\/MSR52588.2021.00022"},{"key":"10200_CR15","doi-asserted-by":"publisher","unstructured":"Flint SW, Chauhan J, Dyer R (2021b) Replication package for \u201cPitfalls and Guidelines for Using Time-Based GitData From Java, Kotlin, and Python Projects\u201d. https:\/\/doi.org\/10.5281\/zenodo.5558291","DOI":"10.5281\/zenodo.5558291"},{"key":"10200_CR16","doi-asserted-by":"crossref","unstructured":"Gasser L, Ripoche G, Sandusky RJ (2004) Research infrastructure for empirical science of F\/OSS. In: Proceedings of the 1st international workshop on mining software repositories","DOI":"10.1049\/ic:20040468"},{"key":"10200_CR17","doi-asserted-by":"crossref","unstructured":"Ghezzi G, Gall HC (2013) Replicating mining studies with SOFAS. In: Proceedings of the 10th working conference on mining software repositories, MSR \u201913. IEEE Press, pp 363\u2013372","DOI":"10.1109\/MSR.2013.6624050"},{"key":"10200_CR18","doi-asserted-by":"publisher","unstructured":"Goeminne M, Claes M, Mens T (2013) A historical dataset for the Gnome ecosystem. In: 2013 10th working conference on mining software repositories (MSR), pp 225\u2013228. https:\/\/doi.org\/10.1109\/MSR.2013.6624032","DOI":"10.1109\/MSR.2013.6624032"},{"key":"10200_CR19","doi-asserted-by":"crossref","unstructured":"Gonzalez-Barahona JM, Robles G, Izquierdo-Cortazar D (2015) The MetricsGrimoire database collection. In: Proceedings of the 12th working conference on mining software repositories, MSR \u201915. IEEE Press, pp 478\u2013481","DOI":"10.1109\/MSR.2015.68"},{"key":"10200_CR20","doi-asserted-by":"publisher","unstructured":"Hayashi J, Higo Y, Matsumoto S, Kusumoto S (2019) Impacts of daylight saving time on software development. In: Proceedings of the 16th international conference on mining software repositories, MSR \u201919. IEEE Press, pp 502\u2013506. https:\/\/doi.org\/10.1109\/MSR.2019.00076","DOI":"10.1109\/MSR.2019.00076"},{"key":"10200_CR21","doi-asserted-by":"crossref","unstructured":"Hemmati H, Nadi S, Baysal O, Kononenko O, Wang W, Holmes R, Godfrey MW (2013) The MSR cookbook: mining a decade of research. In: Proceedings of the 10th working conference on mining software repositories, MSR \u201913. IEEE Press, pp 343\u2013352","DOI":"10.1109\/MSR.2013.6624048"},{"key":"10200_CR22","doi-asserted-by":"publisher","unstructured":"Kagdi H, Yusuf S, Maletic JI (2006) Mining sequences of changed-files from version histories. In: Proceedings of the 2006 international workshop on mining software repositories, MSR \u201906. Association for Computing Machinery, New York, pp 47\u201353. https:\/\/doi.org\/10.1145\/1137983.1137996","DOI":"10.1145\/1137983.1137996"},{"key":"10200_CR23","doi-asserted-by":"publisher","unstructured":"Kalliamvakou E, Gousios G, Blincoe K, Singer L, German DM, Damian D (2014) The promises and perils of mining GitHub. In: Proceedings of the 11th working conference on mining software repositories, MSR 2014. Association for Computing Machinery, New York, pp 92\u2013101. https:\/\/doi.org\/10.1145\/2597073.2597074","DOI":"10.1145\/2597073.2597074"},{"issue":"5","key":"10200_CR24","doi-asserted-by":"publisher","first-page":"2035","DOI":"10.1007\/s10664-015-9393-5","volume":"21","author":"E Kalliamvakou","year":"2016","unstructured":"Kalliamvakou E, Gousios G, Blincoe K, Singer L, German DM, Damian D (2016) An in-depth study of the promises and perils of mining GitHub. Empirical Softw Engg 21(5):2035\u20132071. https:\/\/doi.org\/10.1007\/s10664-015-9393-5","journal-title":"Empirical Softw Engg"},{"key":"10200_CR25","doi-asserted-by":"publisher","unstructured":"Karampatsis RM, Sutton C (2020) How often do single-statement bugs occur?. In: Proceedings of the 17th international conference on mining software repositories. ACM. https:\/\/doi.org\/10.1145\/3379597.3387491","DOI":"10.1145\/3379597.3387491"},{"key":"10200_CR26","doi-asserted-by":"publisher","unstructured":"Kikas R, Dumas M, Pfahl D (2016) Using dynamic and contextual features to predict issue lifetime in GitHub projects. In: Proceedings of the 13th international conference on mining software repositories, MSR \u201916. Association for Computing Machinery, New York, pp 291\u2013302. https:\/\/doi.org\/10.1145\/2901739.2901751","DOI":"10.1145\/2901739.2901751"},{"key":"10200_CR27","doi-asserted-by":"publisher","unstructured":"Kotti Z, Spinellis D (2019) Standing on shoulders or feet? The usage of the MSR data papers. In: Proceedings of the 16th international conference on mining software repositories, MSR \u201919. IEEE Press, pp 565\u2013576. https:\/\/doi.org\/10.1109\/MSR.2019.00085","DOI":"10.1109\/MSR.2019.00085"},{"key":"10200_CR28","doi-asserted-by":"publisher","unstructured":"Liu Y, Lin J, Cleland-Huang J (2020) Traceability support for multi-lingual software projects. In: Proceedings of the 17th international conference on mining software repositories, MSR \u201920. Association for Computing Machinery, New York, pp 443\u2013454. https:\/\/doi.org\/10.1145\/3379597.3387440","DOI":"10.1145\/3379597.3387440"},{"key":"10200_CR29","doi-asserted-by":"crossref","unstructured":"Pietri A, Rousseau G, Zacchiroli S (2020) Forking without clicking: on how to identify software repository forks. In: Proceedings of the 17th international conference on mining software repositories. Association for Computing Machinery, New York, pp 277\u2013287","DOI":"10.1145\/3379597.3387450"},{"key":"10200_CR30","doi-asserted-by":"publisher","unstructured":"Pimentel JaF, Murta L, Braganholo V, Freire J (2019) A large-scale study about quality and reproducibility of Jupyter notebooks. In: Proceedings of the 16th international conference on mining software repositories, MSR \u201919. IEEE Press, pp 507\u2013517. https:\/\/doi.org\/10.1109\/MSR.2019.00077","DOI":"10.1109\/MSR.2019.00077"},{"key":"10200_CR31","doi-asserted-by":"publisher","unstructured":"Robles G (2010) Replicating MSR: A study of the potential replicability of papers published in the Mining Software Repositories proceedings. In: 7th IEEE working conference on mining software repositories, MSR \u201910, pp 171\u2013180. https:\/\/doi.org\/10.1109\/MSR.2010.5463348","DOI":"10.1109\/MSR.2010.5463348"},{"key":"10200_CR32","doi-asserted-by":"publisher","unstructured":"Robles G, Gonz\u00e1lez-Barahona JM, Cervig\u00f3n C, Capiluppi A, Izquierdo-Cort\u00e1zar D (2014) Estimating development effort in free\/open source software projects by mining software repositories: a case study of OpenStack. In: Proceedings of the 11th working conference on mining software repositories, MSR 2014. Association for Computing Machinery, New York, pp 222\u2013231. https:\/\/doi.org\/10.1145\/2597073.2597107","DOI":"10.1145\/2597073.2597107"},{"key":"10200_CR33","doi-asserted-by":"publisher","unstructured":"Sadowski C, Lewis C, Lin Z, Zhu X, Whitehead EJ (2011) An empirical analysis of the FixCache algorithm. In: Proceedings of the 8th working conference on mining software repositories, MSR \u201911. Association for Computing Machinery, New York, pp 219\u2013222. https:\/\/doi.org\/10.1145\/1985441.1985475","DOI":"10.1145\/1985441.1985475"},{"key":"10200_CR34","unstructured":"Software Heritage developers (2020) Software Heritage archive. https:\/\/archive.softwareheritage.org\/. Accessed 28 Dec 2020"},{"key":"10200_CR35","doi-asserted-by":"crossref","unstructured":"Steff M, Russo B (2012) Co-evolution of logical couplings and commits for defect estimation. In: Proceedings of the 9th IEEE working conference on mining software repositories, MSR \u201912. IEEE Press, pp 213\u2013216","DOI":"10.1109\/MSR.2012.6224283"},{"key":"10200_CR36","doi-asserted-by":"publisher","unstructured":"Walker RJ, Holmes R, Hedgeland I, Kapur P, Smith A (2006) A lightweight approach to technical risk estimation via probabilistic impact analysis. In: Proceedings of the 2006 international workshop on mining software repositories, MSR \u201906. Association for Computing Machinery, New York, pp 98\u2013104. https:\/\/doi.org\/10.1145\/1137983.1138008","DOI":"10.1145\/1137983.1138008"},{"key":"10200_CR37","doi-asserted-by":"publisher","unstructured":"Wang P, Brown C, Jennings JA, Stolee KT (2020) An empirical study on regular expression bugs. In: Proceedings of the 17th international conference on mining software repositories. ACM. https:\/\/doi.org\/10.1145\/3379597.3387464","DOI":"10.1145\/3379597.3387464"},{"key":"10200_CR38","doi-asserted-by":"publisher","unstructured":"Xu Y, Zhou M (2018) A multi-level dataset of Linux kernel patchwork. In: Proceedings of the 15th international conference on mining software repositories, MSR \u201918. Association for Computing Machinery, New York, pp 54\u201357. https:\/\/doi.org\/10.1145\/3196398.3196475","DOI":"10.1145\/3196398.3196475"},{"key":"10200_CR39","doi-asserted-by":"publisher","unstructured":"Zhu J, Wei J (2019) An empirical study of multiple names and email addresses in OSS version control repositories. In: 2019 IEEE\/ACM 16th international conference on mining software repositories (MSR). IEEE. https:\/\/doi.org\/10.1109\/msr.2019.00068","DOI":"10.1109\/msr.2019.00068"},{"key":"10200_CR40","doi-asserted-by":"crossref","unstructured":"Zimmermann T, Wei\u00dfgerber P (2004) Preprocessing CVS data for fine-grained analysis. In: Proceedings of the 1st international workshop on mining software repositories, MSR \u201904, pp 2\u20136","DOI":"10.1049\/ic:20040466"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-022-10200-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10664-022-10200-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-022-10200-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,21]],"date-time":"2022-11-21T02:22:07Z","timestamp":1668997327000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10664-022-10200-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,6]]},"references-count":40,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2022,12]]}},"alternative-id":["10200"],"URL":"https:\/\/doi.org\/10.1007\/s10664-022-10200-y","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"value":"1382-3256","type":"print"},{"value":"1573-7616","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10,6]]},"assertion":[{"value":"2 July 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 October 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Conflict of interest"}}],"article-number":"194"}}