{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,7]],"date-time":"2026-07-07T15:17:59Z","timestamp":1783437479377,"version":"3.54.6"},"publisher-location":"New York, NY, USA","reference-count":69,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62271307,61771310"],"award-info":[{"award-number":["62271307,61771310"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755002","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:47:42Z","timestamp":1761371262000},"page":"11337-11346","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Revisiting Data Auditing in Large Vision-Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-0570-8626","authenticated-orcid":false,"given":"Hongyu","family":"Zhu","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6798-1118","authenticated-orcid":false,"given":"Sichu","family":"Liang","sequence":"additional","affiliation":[{"name":"Southeast University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9198-575X","authenticated-orcid":false,"given":"Wenwen","family":"Wang","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9921-7215","authenticated-orcid":false,"given":"Boheng","family":"Li","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8167-8311","authenticated-orcid":false,"given":"Tongxin","family":"Yuan","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7965-5170","authenticated-orcid":false,"given":"Fangqi","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1899-6750","authenticated-orcid":false,"given":"Hanyi","family":"Wang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8214-6809","authenticated-orcid":false,"given":"Shi-Lin","family":"Wang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4183-3645","authenticated-orcid":false,"given":"Zhuosheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Anthropic. 2024. Introducing Claude 3.5 Sonnet. https:\/\/www.anthropic.com\/news\/claude-3-5-sonnet"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the 12th International Conference on Learning Representations (ICLR).","author":"Antoniades Antonis","year":"2025","unstructured":"Antonis Antoniades, Xinyi Wang, Yanai Elazar, Alfonso Amayuelas, Alon Albalak, Kexun Zhang, and William Yang Wang. 2025. Generalization v.s. Memorization: Tracing Language Models' Capabilities Back to Pretraining Data. In Proceedings of the 12th International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_3_1","volume-title":"The Pitfalls of Next-Token Prediction. In International Conference on Machine Learning. PMLR, 2296-2318","author":"Bachmann Gregor","year":"2024","unstructured":"Gregor Bachmann and Vaishnavh Nagarajan. 2024. The Pitfalls of Next-Token Prediction. In International Conference on Machine Learning. PMLR, 2296-2318."},{"key":"e_1_3_2_1_4_1","unstructured":"Shuai Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Sibo Song Kai Dang Peng Wang Shijie Wang Jun Tang et al. 2025. Qwen2. 5-vl technical report. arXiv preprint arXiv:2502.13923 (2025)."},{"key":"e_1_3_2_1_5_1","unstructured":"Sebastian Bischoff Alana Darcher Michael Deistler Richard Gao Franziska Gerken Manuel Gloeckler Lisa Haxel Jaivardhan Kapoor Janne K Lappalainen Jakob H Macke et al. 2024. A Practical Guide to Sample-based Statistical Distances for Evaluating Generative Models in Science. Transactions on Machine Learning Research (2024)."},{"key":"e_1_3_2_1_6_1","volume-title":"Random forests. Machine learning","author":"Breiman Leo","year":"2001","unstructured":"Leo Breiman. 2001. Random forests. Machine learning, Vol. 45 (2001), 5-32."},{"key":"e_1_3_2_1_7_1","unstructured":"Nicholas Carlini Florian Tramer Eric Wallace Matthew Jagielski Ariel Herbert-Voss Katherine Lee Adam Roberts Tom Brown Dawn Song Ulfar Erlingsson et al. 2021. Extracting training data from large language models. In 30th USENIX security symposium (USENIX Security 21). 2633-2650."},{"key":"e_1_3_2_1_8_1","volume-title":"European Conference on Computer Vision. Springer, 370-387","author":"Chen Lin","year":"2024","unstructured":"Lin Chen, Jinsong Li, Xiaoyi Dong, Pan Zhang, Conghui He, Jiaqi Wang, Feng Zhao, and Dahua Lin. 2024. Sharegpt4v: Improving large multi-modal models with better captions. In European Conference on Computer Vision. Springer, 370-387."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3240194"},{"key":"e_1_3_2_1_10_1","volume-title":"Microsoft coco captions: Data collection and evaluation server. arXiv preprint arXiv:1504.00325","author":"Chen Xinlei","year":"2015","unstructured":"Xinlei Chen, Hao Fang, Tsung-Yi Lin, Ramakrishna Vedantam, Saurabh Gupta, Piotr Doll\u00e1r, and C Lawrence Zitnick. 2015. Microsoft coco captions: Data collection and evaluation server. arXiv preprint arXiv:1504.00325 (2015)."},{"key":"e_1_3_2_1_11_1","volume-title":"Blind baselines beat membership inference attacks for foundation models. arXiv preprint arXiv:2406.16201","author":"Das Debeshee","year":"2024","unstructured":"Debeshee Das, Jie Zhang, and Florian Tram\u00e8r. 2024. Blind baselines beat membership inference attacks for foundation models. arXiv preprint arXiv:2406.16201 (2024)."},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Deitke Matt","unstructured":"Matt Deitke, Christopher Clark, Sangho Lee, and et al., 2025. Molmo and PixMo: Open Weights and Open Data for State-of-the-Art Vision-Language Models. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_13_1","volume-title":"SoK: Dataset Copyright Auditing in Machine Learning Systems. In 2025 IEEE Symposium on Security and Privacy (SP). IEEE Computer Society, 25-25","author":"Du Linkang","year":"2024","unstructured":"Linkang Du, Xuanru Zhou, Min Chen, Chusong Zhang, Zhou Su, Peng Cheng, Jiming Chen, and Zhikun Zhang. 2024. SoK: Dataset Copyright Auditing in Machine Learning Systems. In 2025 IEEE Symposium on Security and Privacy (SP). IEEE Computer Society, 25-25."},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 41st International Conference on Machine Learning. 11940-11956","author":"Duarte Andr\u00e9 V","year":"2024","unstructured":"Andr\u00e9 V Duarte, Xuandong Zhao, Arlindo L Oliveira, and Lei Li. 2024. DE-COP: detecting copyrighted content in language models training data. In Proceedings of the 41st International Conference on Machine Learning. 11940-11956."},{"key":"e_1_3_2_1_15_1","volume-title":"Computer age statistical inference, student edition: algorithms, evidence, and data science","author":"Efron Bradley","unstructured":"Bradley Efron and Trevor Hastie. 2021. Computer age statistical inference, student edition: algorithms, evidence, and data science. Vol. 6. Cambridge University Press."},{"key":"e_1_3_2_1_16_1","volume-title":"International Conference on Machine Learning. PMLR, 2922-2932","author":"Engstrom Logan","year":"2020","unstructured":"Logan Engstrom, Andrew Ilyas, Shibani Santurkar, Dimitris Tsipras, Jacob Steinhardt, and Aleksander Madry. 2020. Identifying statistical bias in dataset replication. In International Conference on Machine Learning. PMLR, 2922-2932."},{"key":"e_1_3_2_1_17_1","volume-title":"Flickr: Online photo management and sharing application. https:\/\/www.flickr.com.","year":"2025","unstructured":"Flickr. 2025. Flickr: Online photo management and sharing application. https:\/\/www.flickr.com."},{"key":"e_1_3_2_1_18_1","first-page":"27092","article-title":"Datacomp: In search of the next generation of multimodal datasets","volume":"36","author":"Gadre Samir Yitzhak","year":"2023","unstructured":"Samir Yitzhak Gadre, Gabriel Ilharco, Alex Fang, Jonathan Hayase, Georgios Smyrnis, Thao Nguyen, Ryan Marten, Mitchell Wortsman, Dhruba Ghosh, Jieyu Zhang, et al., 2023. Datacomp: In search of the next generation of multimodal datasets. Advances in Neural Information Processing Systems, Vol. 36 (2023), 27092-27112.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.3891"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681488"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-020-00257-z"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00550"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the 2024 on ACM SIGSAC Conference on Computer and Communications Security. 4822-4836","author":"Jeong Ha Anna Yoo","year":"2024","unstructured":"Anna Yoo Jeong Ha, Josephine Passananti, Ronik Bhaskar, Shawn Shan, Reid Southen, Haitao Zheng, and Ben Y Zhao. 2024. Organic or diffused: Can we distinguish human art from ai-generated images?. In Proceedings of the 2024 on ACM SIGSAC Conference on Computer and Communications Security. 4822-4836."},{"key":"e_1_3_2_1_24_1","volume-title":"The Elements of Statistical Learning: Data Mining, Inference, and Prediction","author":"Hastie Trevor","unstructured":"Trevor Hastie, Robert Tibshirani, and Jerome Friedman. 2009. The Elements of Statistical Learning: Data Mining, Inference, and Prediction (2nd ed.). Springer, New York, NY. https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-0-387-84858-7.pdf","edition":"2"},{"key":"e_1_3_2_1_25_1","volume-title":"LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=nZeVKeeFYf9","author":"Hu Edward J","year":"2022","unstructured":"Edward J Hu, yelong shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2022. LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the 34th USENIX Security Symposium. USENIX Association.","author":"Hu Yuke","year":"2025","unstructured":"Yuke Hu, Zheng Li, Zhihao Liu, Yang Zhang, Zhan Qin, Kui Ren, and Chun Chen. 2025. Membership Inference Attacks Against Vision-Language Models. In Proceedings of the 34th USENIX Security Symposium. USENIX Association."},{"key":"e_1_3_2_1_27_1","volume-title":"Measuring Forgetting of Memorized Training Examples. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=7bJizxLKrR","author":"Jagielski Matthew","year":"2023","unstructured":"Matthew Jagielski, Om Thakkar, Florian Tramer, Daphne Ippolito, Katherine Lee, Nicholas Carlini, Eric Wallace, Shuang Song, Abhradeep Guha Thakurta, Nicolas Papernot, and Chiyuan Zhang. 2023. Measuring Forgetting of Memorized Training Examples. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=7bJizxLKrR"},{"key":"e_1_3_2_1_28_1","volume-title":"International conference on machine learning. PMLR, 4904-4916","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling up visual and vision-language representation learning with noisy text supervision. In International conference on machine learning. PMLR, 4904-4916."},{"key":"e_1_3_2_1_29_1","unstructured":"Yuri Kageyama. 2025. ChatGPT's viral Studio Ghibli-style images highlight AI copyright concerns. https:\/\/apnews.com\/article\/studio-ghibli-chatgpt-images-hayao-miyazaki-openai-0f4cb487ec3042dd5b43ad47879b91f4"},{"key":"e_1_3_2_1_30_1","first-page":"20750","article-title":"Propile: Probing privacy leakage in large language models","volume":"36","author":"Kim Siwon","year":"2023","unstructured":"Siwon Kim, Sangdoo Yun, Hwaran Lee, Martin Gubri, Sungroh Yoon, and Seong Joon Oh. 2023. Propile: Probing privacy leakage in large language models. Advances in Neural Information Processing Systems, Vol. 36 (2023), 20750-20762.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_31_1","volume-title":"LLaVA-OneVision: Easy Visual Task Transfer. Transactions on Machine Learning Research","author":"Li Bo","year":"2025","unstructured":"Bo Li, Yuanhan Zhang, Dong Guo, Renrui Zhang, Feng Li, Hao Zhang, Kaichen Zhang, Peiyuan Zhang, Yanwei Li, Ziwei Liu, and Chunyuan Li. 2025. LLaVA-OneVision: Easy Visual Task Transfer. Transactions on Machine Learning Research (2025)."},{"key":"e_1_3_2_1_32_1","first-page":"28541","article-title":"Llava-med: Training a large language-and-vision assistant for biomedicine in one day","volume":"36","author":"Li Chunyuan","year":"2023","unstructured":"Chunyuan Li, Cliff Wong, Sheng Zhang, Naoto Usuyama, Haotian Liu, Jianwei Yang, Tristan Naumann, Hoifung Poon, and Jianfeng Gao. 2023. Llava-med: Training a large language-and-vision assistant for biomedicine in one day. Advances in Neural Information Processing Systems, Vol. 36 (2023), 28541-28564.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_33_1","first-page":"98645","article-title":"Membership inference attacks against large vision-language models","volume":"37","author":"Li Zhan","year":"2024","unstructured":"Zhan Li, Yongtao Wu, Yihang Chen, Francesco Tonin, Elias Abad Rocamora, and Volkan Cevher. 2024. Membership inference attacks against large vision-language models. Advances in Neural Information Processing Systems, Vol. 37 (2024), 98645-98674.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_34_1","first-page":"740","volume-title":"Switzerland","author":"Lin Tsung-Yi","year":"2014","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C Lawrence Zitnick. 2014. Microsoft coco: Common objects in context. In Computer vision-ECCV 2014: 13th European conference, zurich, Switzerland, September 6-12, 2014, proceedings, part v 13. Springer, 740-755."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"e_1_3_2_1_36_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023. Visual instruction tuning. Advances in neural information processing systems, Vol. 36 (2023), 34892-34916."},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR).","author":"Liu Zhuang","year":"2025","unstructured":"Zhuang Liu and Kaiming He. 2025. A Decade's Battle on Dataset Bias: Are We There Yet?. In Proceedings of the International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.86"},{"key":"e_1_3_2_1_39_1","volume-title":"Revisiting Classifier Two-Sample Tests. In International Conference on Learning Representations.","author":"Lopez-Paz David","year":"2017","unstructured":"David Lopez-Paz and Maxime Oquab. 2017. Revisiting Classifier Two-Sample Tests. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_40_1","first-page":"25435","article-title":"Seeing is not always believing: Benchmarking human and model perception of ai-generated images","volume":"36","author":"Lu Zeyu","year":"2023","unstructured":"Zeyu Lu, Di Huang, Lei Bai, Jingjing Qu, Chengyue Wu, Xihui Liu, and Wanli Ouyang. 2023. Seeing is not always believing: Benchmarking human and model perception of ai-generated images. Advances in Neural Information Processing Systems, Vol. 36 (2023), 25435-25447.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_41_1","first-page":"124069","article-title":"LLM Dataset Inference: Did you train on my dataset","volume":"37","author":"Maini Pratyush","year":"2024","unstructured":"Pratyush Maini, Hengrui Jia, Nicolas Papernot, and Adam Dziedzic. 2024. LLM Dataset Inference: Did you train on my dataset? Advances in Neural Information Processing Systems, Vol. 37 (2024), 124069-124092.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_42_1","volume-title":"Dataset Inference: Ownership Resolution in Machine Learning. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=hvdKKV2yt7T","author":"Maini Pratyush","year":"2021","unstructured":"Pratyush Maini, Mohammad Yaghini, and Nicolas Papernot. 2021. Dataset Inference: Ownership Resolution in Machine Learning. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=hvdKKV2yt7T"},{"key":"e_1_3_2_1_43_1","volume-title":"Jia Qing Tan, Shafiq Joty, and Enamul Hoque.","author":"Masry Ahmed","year":"2022","unstructured":"Ahmed Masry, Xuan Long Do, Jia Qing Tan, Shafiq Joty, and Enamul Hoque. 2022. ChartQA: A Benchmark for Question Answering about Charts with Visual and Logical Reasoning. In Findings of the Association for Computational Linguistics: ACL 2022. 2263-2279."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00225"},{"key":"e_1_3_2_1_45_1","volume-title":"Embodied large language models enable robots to complete complex tasks in unpredictable environments. Nature Machine Intelligence","author":"Mon-Williams Ruaridh","year":"2025","unstructured":"Ruaridh Mon-Williams, Gen Li, Ran Long, Wenqian Du, and Christopher G Lucas. 2025. Embodied large language models enable robots to complete complex tasks in unpredictable environments. Nature Machine Intelligence (2025), 1-10."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3489030"},{"key":"e_1_3_2_1_47_1","first-page":"50358","article-title":"Scaling data-constrained language models","volume":"36","author":"Muennighoff Niklas","year":"2023","unstructured":"Niklas Muennighoff, Alexander Rush, Boaz Barak, Teven Le Scao, Nouamane Tazi, Aleksandra Piktus, Sampo Pyysalo, Thomas Wolf, and Colin A Raffel. 2023. Scaling data-constrained language models. Advances in Neural Information Processing Systems, Vol. 36 (2023), 50358-50376.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_48_1","volume-title":"Production Language Models. In The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=vjel3nWP2a","author":"Nasr Milad","year":"2025","unstructured":"Milad Nasr, Javier Rando, Nicholas Carlini, Jonathan Hayase, Matthew Jagielski, A. Feder Cooper, Daphne Ippolito, Christopher A. Choquette-Choo, Florian Tram\u00e8r, and Katherine Lee. 2025. Scalable Extraction of Training Data from Aligned, Production Language Models. In The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=vjel3nWP2a"},{"key":"e_1_3_2_1_49_1","unstructured":"OpenAI. 2021. DALL\u00b7E: Creating Images from Text. https:\/\/openai.com\/dall-e."},{"key":"e_1_3_2_1_50_1","unstructured":"OpenAI. 2022. Introducing ChatGPT. https:\/\/openai.com\/blog\/chatgpt\/"},{"key":"e_1_3_2_1_51_1","unstructured":"OpenAI. 2023. GPT-4V(ision) System Card. https:\/\/cdn.openai.com\/papers\/GPTV_System_Card.pdf"},{"key":"e_1_3_2_1_52_1","unstructured":"OpenAI. 2024. GPT-4o System Card. arXiv preprint arXiv:2410.21276."},{"key":"e_1_3_2_1_53_1","volume-title":"Proving Test Set Contamination in Black-Box Language Models. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=KS8mIvetg2","author":"Oren Yonatan","year":"2024","unstructured":"Yonatan Oren, Nicole Meister, Niladri S. Chatterji, Faisal Ladhak, and Tatsunori Hashimoto. 2024. Proving Test Set Contamination in Black-Box Language Models. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=KS8mIvetg2"},{"key":"e_1_3_2_1_54_1","unstructured":"Kylie Robison. 2025. Meta got caught gaming AI benchmarks. https:\/\/www.theverge.com\/meta\/645012\/meta-llama-4-maverick-benchmarks-gaming"},{"key":"e_1_3_2_1_55_1","unstructured":"Christoph Schuhmann Romain Beaumont Richard Vencu Cade Gordon Ross Wightman Mehdi Cherti Theo Coombes Aarush Katta Clayton Mullis Mitchell Wortsman et al. 2022. Laion-5b: An open large-scale dataset for training next generation image-text models. Advances in neural information processing systems Vol. 35 (2022) 25278-25294."},{"key":"e_1_3_2_1_56_1","volume-title":"The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=zWqr3MQuNs","author":"Shi Weijia","year":"2024","unstructured":"Weijia Shi, Anirudh Ajith, Mengzhou Xia, Yangsibo Huang, Daogao Liu, Terra Blevins, Danqi Chen, and Luke Zettlemoyer. 2024. Detecting Pretraining Data from Large Language Models. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=zWqr3MQuNs"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2017.41"},{"key":"e_1_3_2_1_58_1","volume-title":"International conference on machine learning. PMLR, 6105-6114","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In International conference on machine learning. PMLR, 6105-6114."},{"key":"e_1_3_2_1_59_1","first-page":"38274","article-title":"Memorization without overfitting: Analyzing the training dynamics of large language models","volume":"35","author":"Tirumala Kushal","year":"2022","unstructured":"Kushal Tirumala, Aram Markosyan, Luke Zettlemoyer, and Armen Aghajanyan. 2022. Memorization without overfitting: Analyzing the training dynamics of large language models. Advances in Neural Information Processing Systems, Vol. 35 (2022), 38274-38290.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_60_1","first-page":"87310","article-title":"Cambrian-1: A fully open, vision-centric exploration of multimodal llms","volume":"37","author":"Tong Peter","year":"2024","unstructured":"Peter Tong, Ellis Brown, Penghao Wu, Sanghyun Woo, Adithya Jairam Vedagiri IYER, Sai Charitha Akula, Shusheng Yang, Jihan Yang, Manoj Middepogu, Ziteng Wang, et al., 2024. Cambrian-1: A fully open, vision-centric exploration of multimodal llms. Advances in Neural Information Processing Systems, Vol. 37 (2024), 87310-87356.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_61_1","unstructured":"Ultralytics. 2024. YOLOv11: Real-Time Object Detection. https:\/\/github.com\/ultralytics\/ultralytics."},{"key":"e_1_3_2_1_62_1","first-page":"268","article-title":"Privacy risk in machine learning: Analyzing the connection to overfitting. In 2018 IEEE 31st computer security foundations symposium (CSF)","author":"Yeom Samuel","year":"2018","unstructured":"Samuel Yeom, Irene Giacomelli, Matt Fredrikson, and Somesh Jha. 2018. Privacy risk in machine learning: Analyzing the connection to overfitting. In 2018 IEEE 31st computer security foundations symposium (CSF). IEEE, 268-282.","journal-title":"IEEE"},{"key":"e_1_3_2_1_63_1","volume-title":"Berg","author":"Yu Licheng","year":"2016","unstructured":"Licheng Yu, Patrick Poirson, Shan Yang, Alexander C. Berg, and Tamara L. Berg. 2016. Modeling Context in Referring Expressions. In Computer Vision - ECCV 2016, Bastian Leibe, Jiri Matas, Nicu Sebe, and Max Welling (Eds.). Springer International Publishing, Cham, 69-85."},{"key":"e_1_3_2_1_64_1","volume-title":"Proceedings of the 38th International Conference on Neural Information Processing Systems","author":"Zeng Boya","year":"2025","unstructured":"Boya Zeng, Yida Yin, and Zhuang Liu. 2025. Understanding bias in large-scale visual datasets. In Proceedings of the 38th International Conference on Neural Information Processing Systems (Vancouver, BC, Canada) (NIPS '24). Curran Associates Inc., Red Hook, NY, USA, Article 1976, 33 pages."},{"key":"e_1_3_2_1_65_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Zhang Jingyang","year":"2025","unstructured":"Jingyang Zhang, Jingwei Sun, Eric Yeats, Yang Ouyang, Martin Kuo, Jianyi Zhang, Hao Frank Yang, and Hai Li. 2025. Min-K%: Improved Baseline for Pre-Training Data Detection from Large Language Models. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.3233\/FAIA250907"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681610"},{"key":"e_1_3_2_1_68_1","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV).","author":"Zhu Hongyu","year":"2025","unstructured":"Hongyu Zhu, Sichu Liang, Wenwen Wang, et al., 2025b. Evading Data Provenance in Deep Neural Networks. In Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.5555\/3041838.3041953"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755002","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:18:16Z","timestamp":1765307896000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755002"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":69,"alternative-id":["10.1145\/3746027.3755002","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755002","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}