{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,5]],"date-time":"2025-08-05T12:48:26Z","timestamp":1754398106666,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,20]],"date-time":"2022-06-20T00:00:00Z","timestamp":1655683200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"publisher","award":["DP180101985"],"award-info":[{"award-number":["DP180101985"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,21]]},"DOI":"10.1145\/3531146.3533175","type":"proceedings-article","created":{"date-parts":[[2022,6,20]],"date-time":"2022-06-20T14:27:10Z","timestamp":1655735230000},"page":"1136-1153","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Smallset Timelines: A Visual Representation of Data Preprocessing Decisions"],"prefix":"10.1145","author":[{"given":"Lydia R.","family":"Lucchesi","sequence":"first","affiliation":[{"name":"Australian National University, Australia and CSIRO's Data61, Australia"}]},{"given":"Petra M.","family":"Kuhnert","sequence":"additional","affiliation":[{"name":"CSIRO's Data61, Australia and Australian National University, Australia"}]},{"given":"Jenny L.","family":"Davis","sequence":"additional","affiliation":[{"name":"Australian National University, Australia"}]},{"given":"Lexing","family":"Xie","sequence":"additional","affiliation":[{"name":"Australian National University, Australia and CSIRO's Data61, Australia"}]}],"member":"320","published-online":{"date-parts":[[2022,6,20]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1147\/JRD.2019.2942288"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00041"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.3150\/13-BEJSP16"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCG.2019.2941856"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1142473.1142574"},{"key":"e_1_3_2_1_6_1","unstructured":"Scott Chamberlain and Kyle Voytovich. 2020. charlatan: Make Fake Data. https:\/\/CRAN.R-project.org\/package=charlatan R package version 0.4.0."},{"volume-title":"Introduction to algorithms","author":"Cormen H","key":"e_1_3_2_1_7_1","unstructured":"Thomas\u00a0H Cormen, Charles\u00a0E Leiserson, Ronald\u00a0L Rivest, and Clifford Stein. 2009. Introduction to algorithms. MIT press."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2000.839437"},{"key":"e_1_3_2_1_9_1","volume-title":"Representative subset selection. Analytica chimica acta 468, 1","author":"Daszykowski Michal","year":"2002","unstructured":"Michal Daszykowski, Beata Walczak, and DL Massart. 2002. Representative subset selection. Analytica chimica acta 468, 1 (2002), 91\u2013103."},{"volume-title":"How artifacts afford: The power and politics of everyday things","author":"Davis L","key":"e_1_3_2_1_10_1","unstructured":"Jenny\u00a0L Davis. 2020. How artifacts afford: The power and politics of everyday things. MIT Press."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1017\/pan.2017.44"},{"volume-title":"Data feminism","author":"D\u2019Ignazio Catherine","key":"e_1_3_2_1_12_1","unstructured":"Catherine D\u2019Ignazio and Lauren\u00a0F Klein. 2020. Data feminism. Mit Press."},{"key":"e_1_3_2_1_13_1","volume-title":"Retiring Adult: New Datasets for Fair Machine Learning. Advances in Neural Information Processing Systems 34","author":"Ding Frances","year":"2021","unstructured":"Frances Ding, Moritz Hardt, John Miller, and Ludwig Schmidt. 2021. Retiring Adult: New Datasets for Fair Machine Learning. Advances in Neural Information Processing Systems 34 (2021)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2090236.2090255"},{"key":"e_1_3_2_1_15_1","volume-title":"Situating methods in the magic of Big Data and AI. Communication monographs 85, 1","author":"Elish Madeleine\u00a0Clare","year":"2018","unstructured":"Madeleine\u00a0Clare Elish and Danah Boyd. 2018. Situating methods in the magic of Big Data and AI. Communication monographs 85, 1 (2018), 57\u201380."},{"key":"e_1_3_2_1_16_1","volume-title":"Data preprocessing and intelligent data analysis. Intelligent data analysis 1, 1","author":"Famili A","year":"1997","unstructured":"A Famili, Wei-Min Shen, Richard Weber, and Evangelos Simoudis. 1997. Data preprocessing and intelligent data analysis. Intelligent data analysis 1, 1 (1997), 3\u201323."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3287560.3287589"},{"volume-title":"Data preprocessing in data mining. Vol.\u00a072","author":"Garc\u00eda Salvador","key":"e_1_3_2_1_18_1","unstructured":"Salvador Garc\u00eda, Juli\u00e1n Luengo, and Francisco Herrera. 2015. Data preprocessing in data mining. Vol.\u00a072. Springer."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458723"},{"key":"e_1_3_2_1_20_1","volume-title":"Retrieved","author":"Goodger David","year":"2001","unstructured":"David Goodger. 2001. PEP 257 \u2013 Docstring Conventions. Retrieved December 28, 2021 from https:\/\/www.python.org\/dev\/peps\/pep-0257\/"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1049\/ic.2011.0012"},{"key":"e_1_3_2_1_22_1","volume-title":"Reflections on the NASA MDP data sets. IET software 6, 6","author":"Gray David","year":"2012","unstructured":"David Gray, David Bowes, Neil Davey, Yi Sun, and Bruce Christianson. 2012. Reflections on the NASA MDP data sets. IET software 6, 6 (2012), 549\u2013558."},{"key":"e_1_3_2_1_23_1","unstructured":"Gurobi Optimization LLC. 2022. Gurobi Optimizer Reference Manual. https:\/\/www.gurobi.com"},{"key":"e_1_3_2_1_24_1","volume-title":"Equality of opportunity in supervised learning. Advances in neural information processing systems 29","author":"Hardt Moritz","year":"2016","unstructured":"Moritz Hardt, Eric Price, and Nati Srebro. 2016. Equality of opportunity in supervised learning. Advances in neural information processing systems 29 (2016)."},{"key":"e_1_3_2_1_25_1","unstructured":"Sarah Holland Ahmed Hosny Sarah Newman Joshua Joseph and Kasia Chmielinski. 2018. The dataset nutrition label: A framework to drive higher data quality standards. arXiv preprint arXiv:1805.03677(2018)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445918"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3372829"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2020.3030462"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.14778\/3055330.3055333"},{"key":"e_1_3_2_1_30_1","unstructured":"R. Kohavi and B. Becker. 1996. UCI Adult data set. https:\/\/archive.ics.uci.edu\/ml\/datasets\/adult"},{"volume-title":"Big data preprocessing: enabling smart data","author":"Luengo Juli\u00e1n","key":"e_1_3_2_1_31_1","unstructured":"Juli\u00e1n Luengo, Diego Garc\u00eda-Gil, Sergio Ram\u00edrez-Gallego, Salvador Garc\u00eda, and Francisco Herrera. 2020. Big data preprocessing: enabling smart data. Springer Nature."},{"key":"e_1_3_2_1_32_1","volume-title":"Deeper data minding and fuller data confession. Journal of the Royal Statistical Society: Series A (Statistics in Society)","author":"Meng Xiao-Li","year":"2021","unstructured":"Xiao-Li Meng. 2021. Enhancing (publications on) data quality: Deeper data minding and fuller data confession. Journal of the Royal Statistical Society: Series A (Statistics in Society) (2021)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445880"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3287560.3287596"},{"key":"e_1_3_2_1_35_1","volume-title":"Two-sided confidence intervals for the single proportion: comparison of seven methods. Statistics in medicine 17, 8","author":"Newcombe G","year":"1998","unstructured":"Robert\u00a0G Newcombe. 1998. Two-sided confidence intervals for the single proportion: comparison of seven methods. Statistics in medicine 17, 8 (1998), 857\u2013872."},{"key":"e_1_3_2_1_36_1","volume-title":"TACO: visualizing changes in tables over time","author":"Niederer Christina","year":"2017","unstructured":"Christina Niederer, Holger Stitz, Reem Hourieh, Florian Grassinger, Wolfgang Aigner, and Marc Streit. 2017. TACO: visualizing changes in tables over time. IEEE transactions on visualization and computer graphics 24, 1(2017), 677\u2013686."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Brian\u00a0A Nosek Tom\u00a0E Hardwicke Hannah Moshontz Aur\u00e9lien Allard Katherine\u00a0S Corker Anna\u00a0Dreber Almenberg Fiona Fidler Joseph Hilgard Melissa Kline Mich\u00e8le\u00a0B Nuijten 2021. Replicability robustness and reproducibility in psychological science. (2021).","DOI":"10.31234\/osf.io\/ksfvq"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/2998181.2998331"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.5555\/1953048.2078195"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2915970.2916007"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445063"},{"volume-title":"Retrieved","year":"2021","key":"e_1_3_2_1_42_1","unstructured":"RStudio. 2021. Data transformation with dplyr::cheat sheet. Retrieved December 29, 2021 from https:\/\/github.com\/rstudio\/cheatsheets\/blob\/main\/data-transformation.pdf"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3476058"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2013.11"},{"key":"e_1_3_2_1_45_1","volume-title":"Inter-rater reliability of preprocessing EEG data: Impact of subjective artifact removal on associative memory task ERP results. Frontiers in neuroscience 11","author":"Shirk D","year":"2017","unstructured":"Steven\u00a0D Shirk, Donald\u00a0G McLaren, Jessica\u00a0S Bloomfield, Alex Powers, Alec Duffy, Meghan\u00a0B Mitchell, Ali Ezzati, Brandon\u00a0A Ally, and Alireza Atri. 2017. Inter-rater reliability of preprocessing EEG data: Impact of subjective artifact removal on associative memory task ERP results. Frontiers in neuroscience 11 (2017), 322."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1177\/1745691616658637"},{"key":"e_1_3_2_1_47_1","first-page":"15","article-title":"Missing data in interactive high-dimensional data visualization","volume":"13","author":"Swayne F","year":"1998","unstructured":"Deborah\u00a0F Swayne and Andreas Buja. 1998. Missing data in interactive high-dimensional data visualization. Computational Statistics 13, 1 (1998), 15\u201326.","journal-title":"Computational Statistics"},{"key":"e_1_3_2_1_48_1","unstructured":"Chakkrit Tantithamthavorn. 2016. NASADefectDataset. https:\/\/github.com\/klainfo\/NASADefectDataset."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"Anissa Tanweer Emily\u00a0Kalah Gade PM Krafft and Sarah\u00a0K Dreier. 2021. Why the Data Revolution Needs Qualitative Thinking. Harvard Data Science Review(2021).","DOI":"10.1162\/99608f92.eee0b0da"},{"key":"e_1_3_2_1_50_1","unstructured":"Nicholas Tierney Di Cook Miles McBain and Colin Fay. 2021. naniar: Data Structures Summaries and Visualisations for Missing Data. https:\/\/CRAN.R-project.org\/package=naniar R package version 0.6.1."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2020.3030433"},{"key":"e_1_3_2_1_52_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Wei Kai","year":"2015","unstructured":"Kai Wei, Rishabh Iyer, and Jeff Bilmes. 2015. Submodularity in data subset selection and active learning. In International Conference on Machine Learning. PMLR, 1954\u20131963."},{"key":"e_1_3_2_1_53_1","unstructured":"Hadley Wickham Peter Danenberg G\u00e1bor Cs\u00e1rdi and Manuel Eugster. 2020. roxygen2: In-Line Documentation for R. https:\/\/CRAN.R-project.org\/package=roxygen2 R package version 7.1.1."},{"key":"e_1_3_2_1_54_1","volume-title":"Fairness-Aware Instrumentation of PreprocessingPipelines for Machine Learning. In Workshop on Human-In-the-Loop Data Analytics (HILDA\u201920)","author":"Yang Ke","year":"2020","unstructured":"Ke Yang, Biao Huang, Julia Stoyanovich, and Sebastian Schelter. 2020. Fairness-Aware Instrumentation of PreprocessingPipelines for Machine Learning. In Workshop on Human-In-the-Loop Data Analytics (HILDA\u201920)."},{"key":"e_1_3_2_1_55_1","volume-title":"Data comics: Sequential art for data-driven storytelling. tech. report","author":"Zhao Zhenpeng","year":"2015","unstructured":"Zhenpeng Zhao, Rachael Marr, and Niklas Elmqvist. 2015. Data comics: Sequential art for data-driven storytelling. tech. report (2015)."}],"event":{"name":"FAccT '22: 2022 ACM Conference on Fairness, Accountability, and Transparency","sponsor":["ACM Association for Computing Machinery"],"location":"Seoul Republic of Korea","acronym":"FAccT '22"},"container-title":["2022 ACM Conference on Fairness Accountability and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3531146.3533175","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3531146.3533175","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:09Z","timestamp":1750186929000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3531146.3533175"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,20]]},"references-count":55,"alternative-id":["10.1145\/3531146.3533175","10.1145\/3531146"],"URL":"https:\/\/doi.org\/10.1145\/3531146.3533175","relation":{},"subject":[],"published":{"date-parts":[[2022,6,20]]},"assertion":[{"value":"2022-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}