{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T02:32:52Z","timestamp":1768876372640,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,6,21]],"date-time":"2021-06-21T00:00:00Z","timestamp":1624233600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100004489","name":"Mitacs","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004489","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100013020","name":"Compute Canada","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100013020","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000155","name":"Social Sciences and Humanities Research Council of Canada","doi-asserted-by":"publisher","award":["BPF-162659"],"award-info":[{"award-number":["BPF-162659"]}],"id":[{"id":"10.13039\/501100000155","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,6,21]]},"DOI":"10.1145\/3462741.3466809","type":"proceedings-article","created":{"date-parts":[[2021,6,21]],"date-time":"2021-06-21T13:35:55Z","timestamp":1624282555000},"page":"69-75","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Bigger Isn\u2019t Better: The Ethical and Scientific Vices of Extra-Large Datasets in Language Models"],"prefix":"10.1145","author":[{"given":"Trystan S.","family":"Goetze","sequence":"first","affiliation":[{"name":"Dalhousie University, Canada"}]},{"given":"Darren","family":"Abramson","sequence":"additional","affiliation":[{"name":"Dalhousie University, Canada"}]}],"member":"320","published-online":{"date-parts":[[2021,6,21]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Darren Abramson. 2021. nlp-data-explorer. https:\/\/github.com\/DarrenAbramson\/nlp-data-explorer  Darren Abramson. 2021. nlp-data-explorer. https:\/\/github.com\/DarrenAbramson\/nlp-data-explorer"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1527-2001.1995.tb00737.x"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00041"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445922"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11948-002-0048-8"},{"key":"e_1_3_2_2_6_1","first-page":"1","article-title":"Gender Shades: Intersectional Accuracy Disparities in Commercial Gender Classification","volume":"81","author":"Buolamwini Joy","year":"2018","journal-title":"Proceedings of Machine Learning Research"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.5555\/136999"},{"key":"e_1_3_2_2_8_1","unstructured":"Free\u00a0Software Foundation. [n.d.]. What is free software?https:\/\/www.gnu.org\/philosophy\/free-sw.html  Free\u00a0Software Foundation. [n.d.]. What is free software?https:\/\/www.gnu.org\/philosophy\/free-sw.html"},{"key":"e_1_3_2_2_9_1","volume-title":"Hal Daum\u00e9\u00a0III au2, and Kate Crawford","author":"Gebru Timnit","year":"2020"},{"key":"e_1_3_2_2_10_1","volume-title":"Escalating AI Saga. Bloomberg","author":"Grant Nico","year":"2021"},{"key":"e_1_3_2_2_11_1","volume-title":"Houghton Mifflin Harcourt","author":"Gray L.","year":"2019"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445929"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3174023"},{"key":"e_1_3_2_2_14_1","volume-title":"Whose Science? Whose Knowledge? Thinking from Women\u2019s Lives","author":"Harding Sandra"},{"key":"e_1_3_2_2_15_1","unstructured":"Sarah Holland Ahmed Hosny Sarah Newman Joshua Joseph and Kasia Chmielinski. 2018. The Dataset Nutrition Label: A Framework To Drive Higher Data Quality Standards. arxiv:1805.03677\u00a0[cs.DB]  Sarah Holland Ahmed Hosny Sarah Newman Joshua Joseph and Kasia Chmielinski. 2018. The Dataset Nutrition Label: A Framework To Drive Higher Data Quality Standards. arxiv:1805.03677\u00a0[cs.DB]"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2441776.2441923"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"crossref","unstructured":"Tamara Kneese Alex Rosenblat and danah boyd. 2014. Understanding Fair Labor Practices in a Networked Age. 17\u00a0pages. https:\/\/www.datasociety.net\/pubs\/fow\/FairLabor.pdf  Tamara Kneese Alex Rosenblat and danah boyd. 2014. Understanding Fair Labor Practices in a Networked Age. 17\u00a0pages. https:\/\/www.datasociety.net\/pubs\/fow\/FairLabor.pdf","DOI":"10.2139\/ssrn.2536619"},{"key":"e_1_3_2_2_18_1","volume-title":"Kuhn and Richard Stallman. 2001\/2015. Free Software","author":"M."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/3031843.3031909"},{"key":"e_1_3_2_2_20_1","volume-title":"The Fate of Knowledge","author":"Longino E."},{"key":"e_1_3_2_2_21_1","volume-title":"Evaluation in the Crowd: Crowdsourcing and Human-Centered Experiments","author":"Martin David"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2531602.2531663"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2858036.2858539"},{"key":"e_1_3_2_2_24_1","unstructured":"Microsoft. [n.d.]. Microsoft Artificial Intelligence. https:\/\/microsoft.com\/ai  Microsoft. [n.d.]. Microsoft Artificial Intelligence. https:\/\/microsoft.com\/ai"},{"key":"e_1_3_2_2_25_1","volume-title":"Algorithms of Oppression: How Search Engines Reinforce Racism","author":"Noble Safiya\u00a0Umoja"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1177\/0963721414531598"},{"key":"e_1_3_2_2_27_1","volume-title":"Peirce and Nathan Houser and Christian Kloesel, (eds.). 1878\/1992","author":"S."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6399"},{"key":"e_1_3_2_2_29_1","volume-title":"The Internet is Enabling a New Kind of Poorly Paid Hell. The Atlantic (jan","author":"Semuels Alana","year":"2018"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/1869086.1869100"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3180492"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"crossref","unstructured":"Robyn Speer Joshua Chin and Catherine Havasi. 2018. ConceptNet 5.5: An Open Multilingual Graph of General Knowledge. arxiv:1612.03975\u00a0[cs.CL]  Robyn Speer Joshua Chin and Catherine Havasi. 2018. ConceptNet 5.5: An Open Multilingual Graph of General Knowledge. arxiv:1612.03975\u00a0[cs.CL]","DOI":"10.1609\/aaai.v31i1.11164"},{"key":"e_1_3_2_2_33_1","unstructured":"Richard Stallman. 1993. The GNU Manifesto. https:\/\/www.gnu.org\/gnu\/manifesto.html  Richard Stallman. 1993. The GNU Manifesto. https:\/\/www.gnu.org\/gnu\/manifesto.html"},{"key":"e_1_3_2_2_34_1","volume-title":"A Critical Perspective on Technological Innovation and Corporatism","author":"Suarez-Villa Luis"},{"key":"e_1_3_2_2_35_1","unstructured":"Alon Talmor Jonathan Herzig Nicholas Lourie and Jonathan Berant. 2019. CommonsenseQA: A Question Answering Challenge Targeting Commonsense Knowledge. arxiv:1811.00937\u00a0[cs.CL]  Alon Talmor Jonathan Herzig Nicholas Lourie and Jonathan Berant. 2019. CommonsenseQA: A Question Answering Challenge Targeting Commonsense Knowledge. arxiv:1811.00937\u00a0[cs.CL]"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-1606"},{"key":"e_1_3_2_2_37_1","volume-title":"Discriminating Systems: Gender, Race, and Power in AI. Technical Report","author":"West Sarah\u00a0Myers","year":"2019"},{"key":"e_1_3_2_2_38_1","volume-title":"Proceedings of the AAAI Conference on Human Computation and Crowdsourcing 7, 1 (Oct.","author":"Whiting E.","year":"2019"}],"event":{"name":"WebSci '21: WebSci '21 13th ACM Web Science Conference 2021","location":"Virtual Event United Kingdom","acronym":"WebSci '21","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["13th ACM Web Science Conference 2021"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3462741.3466809","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3462741.3466809","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:28:31Z","timestamp":1750195711000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3462741.3466809"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,21]]},"references-count":38,"alternative-id":["10.1145\/3462741.3466809","10.1145\/3462741"],"URL":"https:\/\/doi.org\/10.1145\/3462741.3466809","relation":{},"subject":[],"published":{"date-parts":[[2021,6,21]]},"assertion":[{"value":"2021-06-21","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}