{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,8]],"date-time":"2026-03-08T01:55:05Z","timestamp":1772934905062,"version":"3.50.1"},"reference-count":48,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T00:00:00Z","timestamp":1765152000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T00:00:00Z","timestamp":1765152000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,8]]},"DOI":"10.1109\/bigdata66926.2025.11402472","type":"proceedings-article","created":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T20:57:57Z","timestamp":1772830677000},"page":"5400-5409","source":"Crossref","is-referenced-by-count":0,"title":["Copyright Infringement Issues and Mitigations in Data for Training Generative AI"],"prefix":"10.1109","author":[{"given":"Anna","family":"Arnaudo","sequence":"first","affiliation":[{"name":"Politecnico di Torino,Department of Control and Computer Engineering,Torino,Italy"}]},{"given":"Riccardo","family":"Coppola","sequence":"additional","affiliation":[{"name":"Politecnico di Torino,Department of Control and Computer Engineering,Torino,Italy"}]},{"given":"Maurizio","family":"Morisio","sequence":"additional","affiliation":[{"name":"Politecnico di Torino,Department of Control and Computer Engineering,Torino,Italy"}]},{"given":"Antonio","family":"Vetr\u00f2","sequence":"additional","affiliation":[{"name":"Politecnico di Torino,Department of Control and Computer Engineering,Torino,Italy"}]},{"given":"Maurizio","family":"Borghi","sequence":"additional","affiliation":[{"name":"Universit&#x00E0; di Torino,Departement of Law,Torino,Italy"}]},{"given":"Bryan","family":"Khan","sequence":"additional","affiliation":[{"name":"Universit&#x00E0; di Torino,Departement of Law,Torino,Italy"}]},{"given":"Riccardo","family":"Raso","sequence":"additional","affiliation":[{"name":"Politecnico di Torino,Department of Control and Computer Engineering,Torino,Italy"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"On the opportunities and risks of foundation models","author":"B.","year":"2022"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s40319-024-01515-y"},{"key":"ref3","article-title":"Intersection of generative artificial intelligence and copyright: an indian perspective","volume-title":"vol. ahead-of-print","author":"Vig","year":"2024"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"101","DOI":"10.1016\/j.infsof.2018.09.006","article-title":"Guidelines for including grey literature and conducting multivocal literature reviews in software engineering","volume":"106","author":"Garousi","year":"2019","journal-title":"Information and Software Technology"},{"key":"ref5","volume-title":"Guidelines for performing systematic literature reviews in software engineering","author":"Kitchenham","year":"2007"},{"issue":"8040","key":"ref6","first-page":"827","volume-title":"Why \u2019open\u2019 AI systems are actually closed, and why this matters","volume":"635","author":"Widder","year":"2024"},{"issue":"11","key":"ref7","first-page":"1","volume-title":"Explainable generative AI (GenXAI): a survey, conceptualization, and research agenda","volume":"57","author":"Schneider","year":"2024"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3659005"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3597151"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"20","DOI":"10.1109\/ICWS62655.2024.00020","article-title":"Towards collecting royalties for copyrighted data for generative models","volume-title":"2024 IEEE International Conference on Web Services (ICWS)","author":"Ludwig","year":"2024"},{"key":"ref11","volume-title":"Publishers target common crawl in fight over AI training data | WIRED","year":"2024"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref13","volume-title":"LAION-5b: An open large-scale dataset for training next generation image-text models","author":"Schuhmann","year":"2022"},{"key":"ref14","volume-title":"The data provenance initiative: A large scale audit of dataset licensing & attribution in AI","author":"Longpre","year":"2023"},{"key":"ref15","volume-title":"An image is worth one word: Personalizing text-to-image generation using textual inversion","author":"Gal","year":"2022"},{"key":"ref16","volume-title":"DreamBooth: Fine tuning text-to-image diffusion models for subject-driven generation","author":"Ruiz","year":"2023"},{"key":"ref17","volume-title":"Multiconcept customization of text-to-image diffusion","author":"Kumari","year":"2023"},{"key":"ref18","volume-title":"LoRA: Low-rank adaptation of large language models","author":"Hu","year":"2021"},{"key":"ref19","volume-title":"Extracting training data from diffusion models","author":"Carlini","year":"2023"},{"key":"ref20","volume-title":"Quantifying memorization across neural language models","author":"Carlini","year":"2023"},{"key":"ref21","volume-title":"The files are in the computer: Copyright, memorization, and generative AI","author":"Cooper","year":"2025"},{"key":"ref22","volume-title":"Diffusion art or digital forgery? investigating data replication in diffusion models","author":"Somepalli","year":"2022"},{"key":"ref23","first-page":"913","article-title":"EKILA: Synthetic media provenance and attribution for generative art","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Balan","year":"2023"},{"key":"ref24","volume-title":"The secret revealer: Generative model-inversion attacks against deep neural networks","author":"Zhang","year":"2020"},{"key":"ref25","first-page":"8600","volume-title":"Silent guardian: Protecting text from malicious exploitation by large language models","volume":"19","author":"Zhao","year":"2024"},{"key":"ref26","volume-title":"CoProtector: Protect opensource code against unauthorized training usage with data poisoning","author":"Sun","year":"2022"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"1561","DOI":"10.1145\/3611643.3616297","article-title":"CodeMark: Imperceptible watermarking for code datasets against neural code completion models","volume-title":"Proceedings of the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","author":"Sun","year":"2023"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3674399.3674447"},{"key":"ref29","volume-title":"Glaze: Protecting artists from style mimicry by text-to-image models","author":"Shan","year":"2025"},{"key":"ref30","volume-title":"Anti-DreamBooth: Protecting users from personalized text-to-image synthesis","author":"Le","year":"2023"},{"key":"ref31","first-page":"807825","article-title":"Nightshade: Prompt-specific poisoning attacks on text-to-image generative models","author":"Shan","year":"2024","journal-title":"IEEE Computer Society"},{"key":"ref32","volume-title":"DUAW: Data-free universal adversarial watermark against stable diffusion customization","author":"Ye","year":"2023"},{"key":"ref33","volume-title":"Unlearnable examples for diffusion models: Protect data from unauthorized exploitation","author":"Zhao","year":"2024"},{"key":"ref34","doi-asserted-by":"crossref","first-page":"24398","DOI":"10.1109\/CVPR52733.2024.02303","article-title":"Can protective perturbation safeguard personal data from being exploited by stable diffusion?","volume-title":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Zhao","year":"2024"},{"key":"ref35","doi-asserted-by":"crossref","DOI":"10.54364\/AAIML.2023.1163","volume-title":"Should ChatGPT and bard share revenue with their data providers? a new business model for the AI era","author":"Zhang","year":"2023"},{"key":"ref36","volume-title":"WASA: WAtermark-based source attribution for large language model-generated data","author":"Lu","year":"2023"},{"key":"ref37","first-page":"2357","article-title":"Efficient authorship attribution method using ensemble models built by knowledge distillation","volume-title":"2023 9th International Conference on Computer and Communications (ICCC)","author":"Sakurai"},{"key":"ref38","volume-title":"An economic solution to copyright challenges of generative AI","author":"Wang","year":"2024"},{"key":"ref39","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1515\/9781400829156-011","article-title":"7. a value for n-person games. contributions to the theory of games II (1953) 307\u2013317","volume-title":"Classics in Game Theory","author":"Kuhn","year":"1997"},{"key":"ref40","volume-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"ref41","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2018.00068","volume-title":"The unreasonable effectiveness of deep features as a perceptual metric","author":"Zhang","year":"2018"},{"key":"ref42","volume-title":"SinGAN: Learning a generative model from a single natural image","author":"Shaham","year":"2019"},{"key":"ref43","first-page":"2024017","volume-title":"Advancing membership inference attacks: The present and the future","volume":"4","author":"Li","year":"2025"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0665"},{"key":"ref45","volume-title":"An update on web publisher controls","year":"2023"},{"key":"ref46","volume-title":"D\u00e9cision 24-d-03 du 15 mars","year":"2024"},{"key":"ref47","volume-title":"How we\u2019re increasing transparency for gen AI content with the c2pa","author":"Richardson","year":"2024"},{"key":"ref48","volume-title":"Sony delivers highly anticipated firmware updates including c2pa compliancy and ensuring authenticity of images","author":"Goodman","year":"2024"}],"event":{"name":"2025 IEEE International Conference on Big Data (BigData)","location":"Macau, China","start":{"date-parts":[[2025,12,8]]},"end":{"date-parts":[[2025,12,11]]}},"container-title":["2025 IEEE International Conference on Big Data (BigData)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11400704\/11400712\/11402472.pdf?arnumber=11402472","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T07:20:00Z","timestamp":1772868000000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11402472\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,8]]},"references-count":48,"URL":"https:\/\/doi.org\/10.1109\/bigdata66926.2025.11402472","relation":{},"subject":[],"published":{"date-parts":[[2025,12,8]]}}}