{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T00:40:21Z","timestamp":1776127221516,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":67,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,1,27]],"date-time":"2020-01-27T00:00:00Z","timestamp":1580083200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000936","name":"Gordon and Betty Moore Foundation","doi-asserted-by":"publisher","award":["GBMF3834"],"award-info":[{"award-number":["GBMF3834"]}],"id":[{"id":"10.13039\/100000936","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000879","name":"Alfred P. Sloan Foundation","doi-asserted-by":"publisher","award":["2013-10-27"],"award-info":[{"award-number":["2013-10-27"]}],"id":[{"id":"10.13039\/100000879","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,1,27]]},"DOI":"10.1145\/3351095.3372862","type":"proceedings-article","created":{"date-parts":[[2020,9,12]],"date-time":"2020-09-12T18:50:44Z","timestamp":1599936644000},"page":"325-336","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":110,"title":["Garbage in, garbage out?"],"prefix":"10.1145","author":[{"given":"R. Stuart","family":"Geiger","sequence":"first","affiliation":[{"name":"University of California"}]},{"given":"Kevin","family":"Yu","sequence":"additional","affiliation":[{"name":"University of California"}]},{"given":"Yanlai","family":"Yang","sequence":"additional","affiliation":[{"name":"University of California"}]},{"given":"Mindy","family":"Dai","sequence":"additional","affiliation":[{"name":"University of California"}]},{"given":"Jie","family":"Qiu","sequence":"additional","affiliation":[{"name":"University of California"}]},{"given":"Rebekah","family":"Tang","sequence":"additional","affiliation":[{"name":"University of California"}]},{"given":"Jenny","family":"Huang","sequence":"additional","affiliation":[{"name":"University of California"}]}],"member":"320","published-online":{"date-parts":[[2020,1,27]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Green, Longman, Roberts, and Green","author":"Babbage Charles"},{"key":"e_1_3_2_2_2_1","volume-title":"Towards Trace-ability in Data Ecosystems using a Bill of Materials Model. arXiv preprint arXiv:1904.04253","author":"Barclay Iain","year":"2019"},{"key":"e_1_3_2_2_3_1","first-page":"587","article-title":"Data statements for NLP: Toward mitigating system bias and enabling better science","volume":"6","author":"Bender Emily M","year":"2018","journal-title":"Transactions of the ACL"},{"key":"e_1_3_2_2_4_1","volume-title":"Ethical and Socially-Aware Data Labels. In Annual International Symposium on Information Management and Big Data. Springer, 320--327","author":"Beretta Elena","year":"2018"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-013-9215-6"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1002\/asi.22634"},{"key":"e_1_3_2_2_7_1","volume-title":"Sorting Things Out: Classification and its Consequences","author":"Bowker Geoffrey C"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/9384.001.0001"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1177\/2053951715622512"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3026044"},{"key":"e_1_3_2_2_11_1","volume-title":"AAAI Spring Symposium on Machine Learning in Information Access","volume":"18","author":"William","year":"1996"},{"key":"e_1_3_2_2_12_1","unstructured":"Linguistic Data Consortium. 2008. ACE (Automatic Content Extraction) English annotation guidelines for entities version 6.6. https:\/\/www.ldc.upenn.edu\/sites\/www.ldc.upenn.edu\/files\/english-entities-guidelines-v6.6.pdf  Linguistic Data Consortium. 2008. ACE (Automatic Content Extraction) English annotation guidelines for entities version 6.6. https:\/\/www.ldc.upenn.edu\/sites\/www.ldc.upenn.edu\/files\/english-entities-guidelines-v6.6.pdf"},{"key":"e_1_3_2_2_13_1","volume-title":"Automating inequality: How high-tech tools profile, police, and punish the poor","author":"Eubanks Virginia"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-00026-8_2"},{"key":"e_1_3_2_2_15_1","volume-title":"The Elements of Statistical Learning: Data Mining, Inference, and Prediction","author":"Friedman Jerome","edition":"2"},{"key":"e_1_3_2_2_16_1","volume-title":"Hanna Wallach, Hal Daume\u00e9 III, and Kate Crawford.","author":"Gebru Timnit","year":"2018"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3329486.3329495"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1002\/2015EA000136"},{"key":"e_1_3_2_2_19_1","volume-title":"The discovery of grounded theory","author":"Glaser Barney G","year":"1968"},{"key":"e_1_3_2_2_20_1","volume-title":"Deep Learning","author":"Goodfellow Ian"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pcbi.1003542"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1525\/aa.1994.96.3.02a00100"},{"key":"e_1_3_2_2_23_1","volume-title":"ORES: Lowering Barriers with Participatory Machine Learning in Wikipedia. arXiv preprint arXiv:1909.05189","author":"Halfaker Aaron","year":"2019"},{"key":"e_1_3_2_2_24_1","volume-title":"Alexandra Olteanu, and Kush R Varshney.","author":"Hind Michael","year":"2018"},{"key":"e_1_3_2_2_25_1","volume-title":"The dataset nutrition label: A framework to drive higher data quality standards. arXiv preprint arXiv:1805.03677","author":"Holland Sarah","year":"2018"},{"key":"e_1_3_2_2_26_1","first-page":"13","article-title":"Towards a \u00e2\u0102&Yuml;science\u00e2\u0102&Zacute; of corpus annotation: a new methodological challenge for corpus linguistics","volume":"22","author":"Hovy Eduard","year":"2010","journal-title":"International Journal of Translation"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2007.55"},{"key":"e_1_3_2_2_28_1","volume-title":"An introduction to statistical learning","author":"James Gareth"},{"key":"e_1_3_2_2_29_1","unstructured":"Eric Jones Travis Oliphant Pearu Peterson etal 2001. SciPy: Open source scientific tools for Python. http:\/\/www.scipy.org\/  Eric Jones Travis Oliphant Pearu Peterson et al. 2001. SciPy: Open source scientific tools for Python. http:\/\/www.scipy.org\/"},{"key":"e_1_3_2_2_30_1","volume-title":"The Practice of Reproducible Research : Case Studies and Lessons from the Data-Intensive Sciences","author":"Kitzes Justin"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.3233\/978-1-61499-649-1-87"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2899409"},{"key":"e_1_3_2_2_33_1","volume-title":"Circulating Reference: Sampling the Soil in the Amazon Forest. In Pandora's Hope","author":"Latour Bruno","year":"1999"},{"key":"e_1_3_2_2_34_1","volume-title":"Laboratory Life: The Social Construction of Scientific Facts","author":"Latour Bruno","year":"1979"},{"key":"e_1_3_2_2_35_1","volume-title":"Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)","volume":"8","author":"Maeda Kazuaki","year":"2008"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3359174"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-92bf1922-00a"},{"key":"e_1_3_2_2_38_1","volume-title":"Empirical Economics: The Pedagogy of Reproducible Science in Undergraduate Education. In Undergraduate Research and the Academic Librarian: Case Studies and Best Practices","author":"Medeiros N.","year":"2017"},{"key":"e_1_3_2_2_39_1","first-page":"66","article-title":"Work with new electronic 'brains' opens field for army math experts","volume":"10","author":"Mellin WD","year":"1957","journal-title":"The Hammond Times"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3287560.3287596"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0155036"},{"key":"e_1_3_2_2_42_1","unstructured":"Hiroki Nakayama Takahiro Kubo Junya Kamura Yasufumi Taniguchi and Xu Liang. 2018. doccano: Text Annotation Tool for Human. https:\/\/github.com\/doccano\/doccano Software available from https:\/\/github.com\/doccano\/doccano.  Hiroki Nakayama Takahiro Kubo Junya Kamura Yasufumi Taniguchi and Xu Liang. 2018. doccano: Text Annotation Tool for Human. https:\/\/github.com\/doccano\/doccano Software available from https:\/\/github.com\/doccano\/doccano."},{"key":"e_1_3_2_2_43_1","volume-title":"Computational grounded theory: A methodological framework. Sociological Methods & Research","author":"Nelson Laura K","year":"2017"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11135-013-9919-0"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"crossref","volume-title":"The Black Box Society: The Secret Algorithms That Control Money and Information","author":"Pasquale Frank","DOI":"10.4159\/harvard.9780674736061"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2007.53"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-4af1f417-011"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cmpb.2014.11.005"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1080\/00031305.2016.1141708"},{"key":"e_1_3_2_2_51_1","first-page":"491","article-title":"Eliminating spammers and ranking annotators for crowdsourced labeling tasks","author":"Raykar Vikas C","year":"2012","journal-title":"Journal of Machine Learning Research 13"},{"key":"e_1_3_2_2_52_1","volume-title":"Analyzing media messages: Using quantitative content analysis in research","author":"Riff Daniel"},{"key":"e_1_3_2_2_53_1","volume-title":"Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)","author":"Sabou Marta","year":"2014"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.2218\/ijdc.v7i2.235"},{"key":"e_1_3_2_2_55_1","volume-title":"Machine Learning Systems workshop at NIPS.","author":"Schelter Sebastian","year":"2017"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.14778\/3229863.3229867"},{"key":"e_1_3_2_2_57_1","volume-title":"Academic research record-keeping: Best practices for individuals, group leaders, and institutions","author":"Schreier Alan A"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3180492"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/2567948.2579215"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2887201"},{"key":"e_1_3_2_2_61_1","volume-title":"CrowdSem 2013 Workshop.","author":"Sober\u00f3n Guillermo","year":"2013"},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.2307\/20202391"},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.1093\/intqhc\/mzm042"},{"key":"e_1_3_2_2_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2011.37"},{"key":"e_1_3_2_2_65_1","unstructured":"Guido van Rossum. 1995. Python Library Reference. https:\/\/ir.cwi.nl\/pub\/5009\/05009D.pdf  Guido van Rossum. 1995. Python Library Reference. https:\/\/ir.cwi.nl\/pub\/5009\/05009D.pdf"},{"key":"e_1_3_2_2_66_1","volume-title":"recaptcha: Human-based character recognition via web security measures. Science 321, 5895","author":"Ahn Luis Von","year":"2008"},{"key":"e_1_3_2_2_67_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.592845"},{"key":"e_1_3_2_2_68_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pcbi.1005510"}],"event":{"name":"FAT* '20: Conference on Fairness, Accountability, and Transparency","location":"Barcelona Spain","acronym":"FAT* '20","sponsor":["ACM Association for Computing Machinery"]},"container-title":["Proceedings of the 2020 Conference on Fairness, Accountability, and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3351095.3372862","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3351095.3372862","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:41:22Z","timestamp":1750200082000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3351095.3372862"}},"subtitle":["do machine learning application papers in social computing report where human-labeled training data comes from?"],"short-title":[],"issued":{"date-parts":[[2020,1,27]]},"references-count":67,"alternative-id":["10.1145\/3351095.3372862","10.1145\/3351095"],"URL":"https:\/\/doi.org\/10.1145\/3351095.3372862","relation":{},"subject":[],"published":{"date-parts":[[2020,1,27]]},"assertion":[{"value":"2020-01-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}