{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T08:55:00Z","timestamp":1773392100375,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,20]],"date-time":"2022-06-20T00:00:00Z","timestamp":1655683200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,21]]},"DOI":"10.1145\/3531146.3533148","type":"proceedings-article","created":{"date-parts":[[2022,6,20]],"date-time":"2022-06-20T14:27:10Z","timestamp":1655735230000},"page":"839-849","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":15,"title":["Learning to Limit Data Collection via Scaling Laws: A Computational Interpretation for the Legal Principle of Data Minimization"],"prefix":"10.1145","author":[{"given":"Divya","family":"Shanmugam","sequence":"first","affiliation":[{"name":"MIT, USA"}]},{"given":"Fernando","family":"Diaz","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"given":"Samira","family":"Shabanian","sequence":"additional","affiliation":[{"name":"Microsoft Research, Canada, Canada"}]},{"given":"Michele","family":"Finck","sequence":"additional","affiliation":[{"name":"University of Tuebigen, Germany"}]},{"given":"Asia","family":"Biega","sequence":"additional","affiliation":[{"name":"Max Planck Institute for Security and Privacy, Germany"}]}],"member":"320","published-online":{"date-parts":[[2022,6,20]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Datatilsynet: The Norwegian Data\u00a0Protection Authority. [n.d.]. Artificial Intelligence and Privacy."},{"key":"e_1_3_2_1_2_1","unstructured":"Bowen Baker Otkrist Gupta Ramesh Raskar and Nikhil Naik. 2017. Accelerating neural architecture search using performance prediction. arXiv preprint arXiv:1705.10823(2017)."},{"key":"e_1_3_2_1_3_1","volume-title":"Biega and Mich\u00e8le Finck","author":"J.","year":"2021","unstructured":"Asia\u00a0J. Biega and Mich\u00e8le Finck. 2021. Reviving Purpose Limitation and Data Minimisation in Data-Driven Systems. Technology and Regulation(2021)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Asia\u00a0J. Biega Peter Potash Hal Daum\u00e9 Fernando Diaz and Mich\u00e8le Finck. 2020. Operationalizing the Legal Principle of Data Minimization for Personalization. In ACM(43) SIGIR \u201920. 399\u2013408.","DOI":"10.1145\/3397271.3401034"},{"key":"e_1_3_2_1_5_1","unstructured":"Reuben Binns and Valeria Gallo. 2019. Data minimisation and privacy-preserving techniques in AI systems. https:\/\/ico.org.uk\/about-the-ico\/news-and-events\/ai-blog-data-minimisation-and-privacy-preserving-techniques-in-ai-systems\/"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Shayok Chakraborty Jiayu Zhou Vineeth Balasubramanian Sethuraman Panchanathan Ian Davidson and Jieping Ye. 2013. Active Matrix Completion. ICDM.","DOI":"10.1109\/ICDM.2013.69"},{"key":"e_1_3_2_1_7_1","unstructured":"Irene Chen Fredrik\u00a0D Johansson and David Sontag. 2018. Why is my classifier discriminatory?. In NeurIPS. 3539\u20133550."},{"key":"e_1_3_2_1_8_1","volume-title":"Medical Image Deep Learning with Hospital PACS Dataset. CoRR","author":"Cho Junghwan","year":"2015","unstructured":"Junghwan Cho, Kyewook Lee, Ellie Shin, Garry Choy, and Synho Do. 2015. Medical Image Deep Learning with Hospital PACS Dataset. CoRR (2015)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Richard Chow Hongxia Jin Bart Knijnenburg and Gokay Saldamli. 2013. Differential data analysis for recommender systems. In ACM7-RecSys. 323\u2013326.","DOI":"10.1145\/2507157.2507190"},{"key":"e_1_3_2_1_10_1","unstructured":"Norwegian Data Protection Authority Datatilsynet. 2018. Artificial Intelligence and Privacy. https:\/\/www.datatilsynet.no\/en\/regulations-and-tools\/reports-on-specific-subjects\/ai-and-privacy\/"},{"key":"e_1_3_2_1_11_1","volume-title":"How Large a Training Set is Needed to Develop a Classifier for Microarray Data?Clinical Cancer Research 14, 1","author":"Dobbin K.","year":"2008","unstructured":"Kevin\u00a0K. Dobbin, Yingdong Zhao, and Richard\u00a0M. Simon. 2008. How Large a Training Set is Needed to Develop a Classifier for Microarray Data?Clinical Cancer Research 14, 1 (2008), 108\u2013114."},{"key":"e_1_3_2_1_12_1","unstructured":"Tobias Domhan Jost\u00a0Tobias Springenberg and Frank Hutter. 2015. Speeding up automatic hyperparameter optimization of deep neural networks by extrapolation of learning curves. In IJCAI-24."},{"key":"e_1_3_2_1_13_1","volume-title":"Predicting sample size required for classification performance. BMC Medical Informatics and Decision Making 12, 1","author":"Figueroa L","year":"2012","unstructured":"Rosa\u00a0L Figueroa, Qing Zeng-Treitler, Sasikiran Kandula, and Long\u00a0H Ngo. 2012. Predicting sample size required for classification performance. BMC Medical Informatics and Decision Making 12, 1 (2012)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Yoav Freund H\u00a0Sebastian Seung Eli Shamir and Naftali Tishby. 1997. Selective sampling using the query by committee algorithm. Machine learning 28(1997).","DOI":"10.1023\/A:1007330508534"},{"key":"e_1_3_2_1_15_1","unstructured":"Simon Funk. 2006. Netflix update: Try this at home. https:\/\/sifter.org\/\u00a0simon\/journal\/20061211.html(2006)."},{"key":"e_1_3_2_1_16_1","unstructured":"GDPR. 2016. Regulation (EU) 2016\/679 of the European Parliament and of the Council. Official Journal of the European Union(2016)."},{"key":"e_1_3_2_1_17_1","volume-title":"Data minimization for GDPR Compliance in machine learning models. AI and Ethics","author":"Goldsteen Abigail","year":"2021","unstructured":"Abigail Goldsteen, Gilad Ezov, Ron Shmelkin, Micha Moffie, and Ariel Farkash. 2021. Data minimization for GDPR Compliance in machine learning models. AI and Ethics (2021), 1\u201315."},{"key":"e_1_3_2_1_18_1","volume-title":"Amsterdam Privacy Conference. 1\u201321","author":"G\u00fcrses Seda","year":"2015","unstructured":"Seda G\u00fcrses, Carmela Troncoso, and Claudia Diaz. 2015. Engineering privacy by design reloaded. In Amsterdam Privacy Conference. 1\u201321."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2014.02.013"},{"key":"e_1_3_2_1_20_1","first-page":"1","article-title":"The movielens datasets: History and context","volume":"5","author":"Harper F\u00a0Maxwell","year":"2015","unstructured":"F\u00a0Maxwell Harper and Joseph\u00a0A Konstan. 2015. The movielens datasets: History and context. Acm-TiiS 5, 4 (2015), 1\u201319.","journal-title":"Acm-TiiS"},{"key":"e_1_3_2_1_21_1","unstructured":"Ruining He Wang-Cheng Kang and Julian McAuley. 2017. Translation-based recommendation. In ACM11-RecSys. 161\u2013169."},{"key":"e_1_3_2_1_22_1","unstructured":"J Hestness S Narang N Ardalani G Diamos H Jun H Kianinejad M Patwary Y Yang and Y Zhou. 2017. Deep Learning Scaling is Predictable Empirically. arXiv preprint arXiv:1712.00409(2017)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Sheng-Jun Huang Miao Xu Ming-Kun Xie Masashi Sugiyama Gang Niu and Songcan Chen. 2018. Active feature acquisition with supervised matrix completion. In ACM24-SIGKDD. 1571\u20131579.","DOI":"10.1145\/3219819.3220084"},{"key":"e_1_3_2_1_24_1","volume-title":"ICO","author":"Commisioner\u2019s\u00a0Office K\u00a0Information","year":"2018","unstructured":"UK\u00a0Information Commisioner\u2019s\u00a0Office: ICO. 2018. Guide to Data Protection. Some basic concepts. Retrieved Jan 22, 2020 from https:\/\/ico.org.uk\/for-organisations\/guide-to-data-protection\/introduction-to-data-protection\/some-basic-concepts\/"},{"key":"e_1_3_2_1_25_1","volume-title":"ICO","author":"Commisioner\u2019s\u00a0Office K\u00a0Information","year":"2018","unstructured":"UK\u00a0Information Commisioner\u2019s\u00a0Office: ICO. 2018. Guide to the General Data Protection Regulation (GDPR). Principle (c): Data minimisation.Retrieved Jan 22, 2020 from https:\/\/ico.org.uk\/for-organisations\/guide-to-data-protection\/guide-to-the-general-data-protection-regulation-gdpr\/principles\/data-minimisation\/"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.1983.4767459"},{"key":"e_1_3_2_1_27_1","unstructured":"Prasanth Kolachina Nicola Cancedda Marc Dymetman and Sriram Venkatapathy. 2012. Prediction of learning curves in machine translation. In ACL \u201912."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Bert-Jaap Koops. [n.d.]. The trouble with European data protection law\u2019(2014). International Data Privacy Law 4 ([n. d.]) 250.","DOI":"10.1093\/idpl\/ipu023"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/1946417.1946431"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Mark MacCarthy. 2018. In Defense of Big Data Analytics. The Cambridge Handbook of Consumer Privacy(2018) 47\u201378.","DOI":"10.1017\/9781316831960.003"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Prem Melville Maytal Saar-Tsechansky Foster Provost and Raymond Mooney. 2005. An expected utility approach to active feature-value acquisition. In ICDM.","DOI":"10.2139\/ssrn.936445"},{"key":"e_1_3_2_1_32_1","volume-title":"Towards Robust and Reproducible Active Learning Using Neural Networks. arXiv","author":"Munjal Prateek","year":"2020","unstructured":"Prateek Munjal, Nasir Hayat, Munawar Hayat, Jamshid Sourati, and Shadab Khan. 2020. Towards Robust and Reproducible Active Learning Using Neural Networks. arXiv (2020), arXiv\u20132002."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Rajiv Pasricha and Julian McAuley. 2018. Translation-based factorization machines for sequential recommendation. In ACM-12-RecSys. 63\u201371.","DOI":"10.1145\/3240323.3240356"},{"key":"e_1_3_2_1_34_1","volume-title":"Auditing Black-Box Prediction Models for Data Minimization Compliance. Advances in Neural Information Processing Systems 34","author":"Rastegarpanah Bashir","year":"2021","unstructured":"Bashir Rastegarpanah, Krishna Gummadi, and Mark Crovella. 2021. Auditing Black-Box Prediction Models for Data Minimization Compliance. Advances in Neural Information Processing Systems 34 (2021)."},{"key":"e_1_3_2_1_35_1","unstructured":"Awanthika Senarath and Nalin Asanka\u00a0Gamagedara Arachchilage. 2018. Understanding Software Developers\u2019 Approach towards Implementing Data Minimization. arXiv preprint arXiv:1808.01479(2018)."},{"key":"e_1_3_2_1_36_1","volume-title":"Slice Tuner: A Selective Data Collection Framework for Accurate and Fair Machine Learning Models.","author":"Tae Ki\u00a0Hyun","year":"2020","unstructured":"Ki\u00a0Hyun Tae and Steven\u00a0Euijong Whang. 2020. Slice Tuner: A Selective Data Collection Framework for Accurate and Fair Machine Learning Models. (2020)."},{"key":"e_1_3_2_1_37_1","volume-title":"New Form of Collective Action Against Technology Companies. In The World Wide Web Conference. ACM.","author":"Vincent Nicholas","year":"2019","unstructured":"Nicholas Vincent, Brent Hecht, and Shilad Sen. 2019. \u201cData Strikes\u201d: Evaluating the Effectiveness of a New Form of Collective Action Against Technology Companies. In The World Wide Web Conference. ACM."},{"key":"e_1_3_2_1_38_1","unstructured":"Duy Vu Mikhail Bilenko Maytal Saar-tsechansky and Prem Melville. 2007. Intelligent Information Acquisition for Improved Clustering."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Hongyi Wen Longqi Yang Michael Sobolev and Deborah Estrin. 2018. Exploring recommendations under user-controlled data filtering. In ACM12 RecSys. 72\u201376.","DOI":"10.1145\/3240323.3240399"},{"key":"e_1_3_2_1_40_1","volume-title":"International Conference on Machine Learning. PMLR, 10303\u201310312","author":"Wistuba Martin","year":"2020","unstructured":"Martin Wistuba and Tejaswini Pedapati. 2020. Learning to Rank Learning Curves. In International Conference on Machine Learning. PMLR, 10303\u201310312."}],"event":{"name":"FAccT '22: 2022 ACM Conference on Fairness, Accountability, and Transparency","location":"Seoul Republic of Korea","acronym":"FAccT '22","sponsor":["ACM Association for Computing Machinery"]},"container-title":["2022 ACM Conference on Fairness Accountability and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3531146.3533148","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3531146.3533148","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:08Z","timestamp":1750186928000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3531146.3533148"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,20]]},"references-count":40,"alternative-id":["10.1145\/3531146.3533148","10.1145\/3531146"],"URL":"https:\/\/doi.org\/10.1145\/3531146.3533148","relation":{},"subject":[],"published":{"date-parts":[[2022,6,20]]},"assertion":[{"value":"2022-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}