{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T19:56:00Z","timestamp":1776110160779,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,5,8]],"date-time":"2021-05-08T00:00:00Z","timestamp":1620432000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,5,8]]},"DOI":"10.1145\/3411763.3451617","type":"proceedings-article","created":{"date-parts":[[2021,5,8]],"date-time":"2021-05-08T00:07:23Z","timestamp":1620432443000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":19,"title":["What Makes a Well-Documented Notebook? A Case Study of Data Scientists\u2019 Documentation Practices in Kaggle"],"prefix":"10.1145","author":[{"given":"April Yi","family":"Wang","sequence":"first","affiliation":[{"name":"School of Information University of Michigan, United States"}]},{"given":"Dakuo","family":"Wang","sequence":"additional","affiliation":[{"name":"IBM Research, United States"}]},{"given":"Jaimie","family":"Drozdal","sequence":"additional","affiliation":[{"name":"Cognitive and Immersive Systems Lab Rensselaer Polytechnic Institute, United States"}]},{"given":"Xuye","family":"Liu","sequence":"additional","affiliation":[{"name":"Rensselaer Polytechnic Institute, United States"}]},{"given":"Soya","family":"Park","sequence":"additional","affiliation":[{"name":"CSAIL MIT, United States"}]},{"given":"Steve","family":"Oney","sequence":"additional","affiliation":[{"name":"School of Information University of Michigan, United States"}]},{"given":"Christopher","family":"Brooks","sequence":"additional","affiliation":[{"name":"School of Information University of Michigan, United States"}]}],"member":"320","published-online":{"date-parts":[[2021,5,8]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3210713.3210745"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376729"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3415250"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1085313.1085331"},{"key":"e_1_3_2_1_5_1","volume-title":"Energy generation prediction: Lessons learned from the use of Kaggle in Machine Learning Course","author":"Fernandez-Bes Jesus","unstructured":"Jesus Fernandez-Bes, Jer\u00f3nimo Arenas-Garc\u00eda, and Jes\u00fas Cid-Sueiro. [n.d.]. Energy generation prediction: Lessons learned from the use of Kaggle in Machine Learning Course. Group 7, 8 ([n.\u00a0d.]), 9."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10606-018-9333-1"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3134688"},{"key":"e_1_3_2_1_8_1","unstructured":"JavaDoc 2020. JavaDoc. https:\/\/docs.oracle.com\/javase\/8\/docs\/technotes\/tools\/windows\/javadoc.html."},{"key":"e_1_3_2_1_9_1","unstructured":"Kaggle Competition 2020. House Prices - Advanced Regression Techniques. https:\/\/www.kaggle.com\/c\/house-prices-advanced-regression-techniques."},{"key":"e_1_3_2_1_10_1","unstructured":"Kaggle Competition 2020. Titanic - Machine Learning from Disaster. https:\/\/www.kaggle.com\/c\/titanic."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:LIDA.0000048322.42751.ca"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Malin K\u00e4ll\u00e9n Ulf Sigvardsson and Tobias Wrigstad. 2020. Jupyter Notebooks on GitHub: Characteristics and Code Clones. arXiv preprint arXiv:2007.10146(2020).","DOI":"10.22152\/programming-journal.org\/2021\/5\/15"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173748"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173748"},{"key":"e_1_3_2_1_15_1","unstructured":"Thomas Kluyver Benjamin Ragan-Kelley Fernando P\u00e9rez Brian\u00a0E Granger Matthias Bussonnier Jonathan Frederic Kyle Kelley Jessica\u00a0B Hamrick Jason Grout Sylvain Corlay 2016. Jupyter Notebooks \u2013 a publishing format for reproducible computational workflows.. In ELPUB. 87\u201390."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2019.00087"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2019.2934593"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2013.12"},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the 2020 ACM conference on GROUP. ACM.","author":"Mao Yaoli","year":"2020","unstructured":"Yaoli Mao, Dakuo Wang, Michael Muller, KUSH VARSHNEY, IOANA Baldini, CASEY Dugan, and ALEKSANDRA MOJSILOVI\u00c4\u2020. 2020. How Data Scientists Work Together With Domain Experts in Scientific Collaborations. In Proceedings of the 2020 ACM conference on GROUP. ACM."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300356"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2009.5070533"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1038\/d41586-018-07196-1"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3299869.3300107"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2012.6227188"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173606"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-19811-3_29"},{"key":"e_1_3_2_1_27_1","volume-title":"Casual Notebooks and Rigid Scripts: Understanding Data Science Programming. In 2020 IEEE Symposium on Visual Languages and Human-Centric Computing (VL\/HCC). IEEE, 1\u20135.","author":"Subramanian Krishna","year":"2020","unstructured":"Krishna Subramanian, Nur Hamdan, and Jan Borchers. 2020. Casual Notebooks and Rigid Scripts: Understanding Data Science Programming. In 2020 IEEE Symposium on Visual Languages and Human-Centric Computing (VL\/HCC). IEEE, 1\u20135."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.24251\/HICSS.2020.029"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445526"},{"key":"e_1_3_2_1_30_1","unstructured":"Dakuo Wang Q.\u00a0Vera Liao Yunfeng Zhang Udayan Khurana Horst Samulowitz Soya Park Michael Muller and Lisa Amini. 2021. How Much Automation Does a Data Scientist Want?. In preprint."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3359313"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Amy\u00a0X Zhang Michael Muller and Dakuo Wang. 2020. How do Data Science Workers Collaborate? Roles Workflows and Tools. arXiv preprint arXiv:2001.06684(2020).","DOI":"10.1145\/3392826"},{"key":"e_1_3_2_1_33_1","volume-title":"CORAL: COde RepresentAtion Learning with Weakly-Supervised Transformers for Analyzing Data Analysis. arXiv preprint arXiv:2008.12828(2020).","author":"Zhang Ge","year":"2020","unstructured":"Ge Zhang, Mike\u00a0A Merrill, Yang Liu, Jeffrey Heer, and Tim Althoff. 2020. CORAL: COde RepresentAtion Learning with Weakly-Supervised Transformers for Analyzing Data Analysis. arXiv preprint arXiv:2008.12828(2020)."}],"event":{"name":"CHI '21: CHI Conference on Human Factors in Computing Systems","location":"Yokohama Japan","acronym":"CHI '21","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Extended Abstracts of the 2021 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3411763.3451617","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3411763.3451617","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:28:17Z","timestamp":1750195697000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3411763.3451617"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,8]]},"references-count":33,"alternative-id":["10.1145\/3411763.3451617","10.1145\/3411763"],"URL":"https:\/\/doi.org\/10.1145\/3411763.3451617","relation":{},"subject":[],"published":{"date-parts":[[2021,5,8]]},"assertion":[{"value":"2021-05-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}