{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T17:01:48Z","timestamp":1776099708336,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","funder":[{"name":"FAPESP","award":["2023\/12086-9; 2024\/01210-3; 2024\/07969-1; 2024\/09375-1; 2024\/09372-2; 2022\/14690-8; 2020\/09838-0; 2013\/08293-7"],"award-info":[{"award-number":["2023\/12086-9; 2024\/01210-3; 2024\/07969-1; 2024\/09375-1; 2024\/09372-2; 2022\/14690-8; 2020\/09838-0; 2013\/08293-7"]}]},{"name":"PIND\/FAEPEX\/UNICAMP","award":["2597\/23"],"award-info":[{"award-number":["2597\/23"]}]},{"name":"H.IAAC","award":["01245.003479\/ 2024-10"],"award-info":[{"award-number":["01245.003479\/ 2024-10"]}]},{"name":"CNPq","award":["316489\/2023-9"],"award-info":[{"award-number":["316489\/2023-9"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,23]]},"DOI":"10.1145\/3715275.3732166","type":"proceedings-article","created":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T17:03:13Z","timestamp":1750698193000},"page":"2542-2553","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Neglected Risks: The Disturbing Reality of Children\u2019s Images in Datasets and the Urgent Call for Accountability"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1546-3740","authenticated-orcid":false,"given":"Carlos","family":"Caetano","sequence":"first","affiliation":[{"name":"Universidade Estadual de Campinas (UNICAMP), Instituto de Computa\u00e7\u00e3o, Campinas, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2835-1331","authenticated-orcid":false,"given":"Gabriel O. dos","family":"Santos","sequence":"additional","affiliation":[{"name":"Universidade Estadual de Campinas (UNICAMP), Instituto de Computa\u00e7\u00e3o, Campinas, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9881-208X","authenticated-orcid":false,"given":"Caio","family":"Petrucci","sequence":"additional","affiliation":[{"name":"Universidade Estadual de Campinas (UNICAMP), Instituto de Computa\u00e7\u00e3o, Campinas, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-4209-6293","authenticated-orcid":false,"given":"Artur","family":"Barros","sequence":"additional","affiliation":[{"name":"Universidade Estadual de Campinas (UNICAMP), Instituto de Computa\u00e7\u00e3o, Campinas, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0521-0432","authenticated-orcid":false,"given":"Camila","family":"Laranjeira","sequence":"additional","affiliation":[{"name":"Universidade Federal de Minas Gerais (UFMG), Departamento de Ci\u00eancia da Computa\u00e7\u00e3o, Belo Horizonte, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1781-2630","authenticated-orcid":false,"given":"Leo Sampaio Ferraz","family":"Ribeiro","sequence":"additional","affiliation":[{"name":"Universidade de S\u00e3o Paulo (USP), Instituto de Ci\u00eancias Matem\u00e1ticas e de Computa\u00e7\u00e3o, S\u00e3o Carlos, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7912-0002","authenticated-orcid":false,"given":"J\u00falia Fernandes","family":"de Mendon\u00e7a","sequence":"additional","affiliation":[{"name":"Instituto Alana, Salvador, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8889-1586","authenticated-orcid":false,"given":"Jefersson A.","family":"dos Santos","sequence":"additional","affiliation":[{"name":"University of Sheffield, School of Computer Science, Sheffield, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9068-938X","authenticated-orcid":false,"given":"Sandra","family":"Avila","sequence":"additional","affiliation":[{"name":"Universidade Estadual de Campinas (UNICAMP), Instituto de Computa\u00e7\u00e3o, Campinas, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,6,23]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"crossref","unstructured":"Sadam Al-Azani Sadiq\u00a0M. Sait and Khaled\u00a0A. Al-Utaibi. 2022. A Comprehensive Literature Review on Children\u2019s Databases for Machine Learning Applications. IEEE Access (2022).","DOI":"10.1109\/ACCESS.2022.3146008"},{"key":"e_1_3_3_2_3_2","unstructured":"Rodrigo Benenson and Vittorio Ferrari. 2022. From colouring-in to pointillism: revisiting semantic segmentation supervision. arXiv (2022). https:\/\/arxiv.org\/abs\/2210.14142"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658955"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658968"},{"key":"e_1_3_3_2_6_2","volume-title":"International Conference on Neural Information Processing Systems (NeurIPS)","author":"Birhane Abeba","year":"2024","unstructured":"Abeba Birhane, Vinay Prabhu, Sang Han, Vishnu\u00a0Naresh Boddeti, and Alexandra\u00a0Sasha Luccioni. 2024. Into the LAION\u2019s den: investigating hate in multimodal datasets. In International Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00158"},{"key":"e_1_3_3_2_8_2","unstructured":"Abeba Birhane Vinay\u00a0Uday Prabhu and Emmanuel Kahembwe. 2021. Multimodal datasets: misogyny pornography and malignant stereotypes. arXiv (2021). https:\/\/arxiv.org\/abs\/2110.01963"},{"key":"e_1_3_3_2_9_2","unstructured":"Megan\u00a0A. Brown Andrew Gruen Gabe Maldoff Solomon Messing Zeve Sanderson and Michael Zimmer. 2024. Web Scraping for Research: Legal Ethical Institutional and Scientific Considerations. arxiv:https:\/\/arXiv.org\/abs\/2410.23432"},{"key":"e_1_3_3_2_10_2","unstructured":"Wei-Lin Chiang Zhuohan Li Zi Lin Ying Sheng Zhanghao Wu Hao Zhang Lianmin Zheng Siyuan Zhuang Yonghao Zhuang Joseph\u00a0E. Gonzalez Ion Stoica and Eric\u00a0P. Xing. 2023. Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90%* ChatGPT Quality. https:\/\/lmsys.org\/blog\/2023-03-30-vicuna\/"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.11171501"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.37016\/mr-2020-15"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.mrl-1.15"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Gabriel\u00a0O. dos Santos Esther\u00a0L. Colombini and Sandra Avila. 2022. #PraCegoVer: A Large Dataset for Image Captioning in Portuguese. Data (2022).","DOI":"10.3390\/data7020013"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-65343-8_21"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","unstructured":"Jayasankar Jayachandran and Vijay Arni. 2023. Traversing the Ethical Landscape of Data Scraping for AI. https:\/\/doi.org\/10.2139\/ssrn.4666354","DOI":"10.2139\/ssrn.4666354"},{"key":"e_1_3_3_2_17_2","unstructured":"Albert\u00a0Q. Jiang Alexandre Sablayrolles Arthur Mensch Chris Bamford Devendra\u00a0Singh Chaplot Diego de\u00a0las Casas Florian Bressand Gianna Lengyel Guillaume Lample Lucile Saulnier L\u00e9lio\u00a0Renard Lavaud Marie-Anne Lachaux Pierre Stock Teven\u00a0Le Scao Thibaut Lavril Thomas Wang Timoth\u00e9e Lacroix and William\u00a0El Sayed. 2023. Mistral 7B. https:\/\/arxiv.org\/abs\/2310.06825"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"crossref","unstructured":"Alina Kuznetsova Hassan Rom Neil Alldrin Jasper Uijlings Ivan Krasin Jordi Pont-Tuset Shahab Kamali et\u00a0al. 2020. The open images dataset v4: Unified image classification object detection and visual relationship detection at scale. International journal of computer vision (2020).","DOI":"10.1007\/s11263-020-01316-z"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3534636"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658963"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"e_1_3_3_2_22_2","unstructured":"Haotian Liu Chunyuan Li Yuheng Li Bo Li Yuanhan Zhang Sheng Shen and Yong\u00a0Jae Lee. 2024. LLaVA-NeXT: Improved reasoning OCR and world knowledge. https:\/\/llava-vl.github.io\/blog\/2024-01-30-llava-next\/"},{"key":"e_1_3_3_2_23_2","volume-title":"Advances in Neural Information Processing Systems","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong\u00a0Jae Lee. 2023. Visual Instruction Tuning. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_3_2_24_2","unstructured":"Stephen Pragasam\u00a0Singaraju Luis\u00a0Arango and Outi Niininen. 2023. Consumer Responses to AI-Generated Charitable Giving Ads. Journal of Advertising (2023)."},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/SIBGRAPI.2018.00065"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"crossref","unstructured":"Moreno Mancosu and Federico Vegetti. 2020. What You Can Scrape and What Is Right to Scrape: A Proposal for a Tool to Collect Public Facebook Data. Social Media + Society (2020).","DOI":"10.1177\/2056305120940703"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"crossref","unstructured":"Michael\u00a0M. Moore Einat Slonimsky Aaron\u00a0D. Long Raymond\u00a0W. Sze and Ramesh\u00a0S. Iyer. 2019. Machine learning concepts concerns and opportunities for a pediatric radiologist. Pediatric Radiology (2019).","DOI":"10.1007\/s00247-018-4277-7"},{"key":"e_1_3_3_2_28_2","unstructured":"NousResearch. 2024. Nous Hermes 2 - Yi-34B. https:\/\/huggingface.co\/NousResearch\/Nous-Hermes-2-Yi-34B. Accessed: January 15 2025."},{"key":"e_1_3_3_2_29_2","first-page":"8748","volume-title":"International Conference on Machine Learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning. 8748\u20138763."},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"crossref","unstructured":"Valentin Rupp and Max von Grafenstein. 2024. Clarifying \u201cpersonal data\u201d and the role of anonymisation in data protection law: Including and excluding data from the scope of the GDPR (more clearly) through refining the concept of data protection. Computer Law & Security Review (2024).","DOI":"10.2139\/ssrn.4409587"},{"key":"e_1_3_3_2_32_2","volume-title":"International Conference on Neural Information Processing Systems (NeurIPSF)","author":"Schuhmann Christoph","year":"2022","unstructured":"Christoph Schuhmann, Romain Beaumont, Richard Vencu, Cade Gordon, Ross Wightman, Mehdi Cherti, Theo Coombes, Aarush Katta, Clayton Mullis, Mitchell Wortsman, Patrick Schramowski, Srivatsa Kundurthy, Katherine Crowson, Ludwig Schmidt, Robert Kaczmarczyk, and Jenia Jitsev. 2022. LAION-5B: an open large-scale dataset for training next generation image-text models. In International Conference on Neural Information Processing Systems (NeurIPSF)."},{"key":"e_1_3_3_2_33_2","volume-title":"Data Centric AI - NeurIPS Workshop","author":"Schuhmann Christoph","year":"2021","unstructured":"Christoph Schuhmann, Richard Vencu, Romain Beaumont, Robert Kaczmarczyk, Clayton Mullis, Aarush Katta, Theo Coombes, Jenia Jitsev, and Aran Komatsuzaki. 2021. LAION-400M: Open Dataset of CLIP-Filtered 400 Million Image-Text Pairs. In Data Centric AI - NeurIPS Workshop."},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCA59364.2023.10401723"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.25740\/kh752sm9123"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.25740\/jv206yg3793"},{"key":"e_1_3_3_2_37_2","unstructured":"Ryan Webster Julien Rabin Loic Simon and Frederic Jurie. 2023. On the De-duplication of LAION-2B. https:\/\/arxiv.org\/abs\/2303.12733"},{"key":"e_1_3_3_2_38_2","unstructured":"Xiongbiao Ye Yuhong Yan Jia Li and Bo Jiang. 2024. Privacy and personal data risk governance for generative artificial intelligence: A Chinese perspective. Telecommunications Policy (2024)."},{"key":"e_1_3_3_2_39_2","volume-title":"IEEE Symposium on Security and Privacy (SP)","author":"Yu Yaman","year":"2025","unstructured":"Yaman Yu, Tanusree Sharma, Melinda Hu, Justin Wang, and Yang Wang. 2025. Exploring Parent-Child Perspectives on Safety in Generative AI: Concerns, Mitigation Strategies, and Design Implications. In IEEE Symposium on Security and Privacy (SP)."}],"event":{"name":"FAccT '25: The 2025 ACM Conference on Fairness, Accountability, and Transparency","location":"Athens Greece","acronym":"FAccT '25"},"container-title":["Proceedings of the 2025 ACM Conference on Fairness, Accountability, and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3715275.3732166","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,24]],"date-time":"2025-06-24T11:10:38Z","timestamp":1750763438000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3715275.3732166"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,23]]},"references-count":38,"alternative-id":["10.1145\/3715275.3732166","10.1145\/3715275"],"URL":"https:\/\/doi.org\/10.1145\/3715275.3732166","relation":{},"subject":[],"published":{"date-parts":[[2025,6,23]]},"assertion":[{"value":"2025-06-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}