{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,28]],"date-time":"2026-06-28T18:30:31Z","timestamp":1782671431305,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":26,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,5,8]]},"DOI":"10.1145\/3701716.3715306","type":"proceedings-article","created":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T14:10:32Z","timestamp":1750687832000},"page":"733-736","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["RU-AI: A Large Multimodal Dataset for Machine-Generated Content Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-5585-6363","authenticated-orcid":false,"given":"Liting","family":"Huang","sequence":"first","affiliation":[{"name":"University of Technology Sydney, Sydney, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0166-3442","authenticated-orcid":false,"given":"Zhihao","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of New South Wales, Sydney, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2156-3642","authenticated-orcid":false,"given":"Yiran","family":"Zhang","sequence":"additional","affiliation":[{"name":"Macquarie University, Sydney, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7629-3193","authenticated-orcid":false,"given":"Xiyue","family":"Zhou","sequence":"additional","affiliation":[{"name":"The University of Sydney, Sydney, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1133-9379","authenticated-orcid":false,"given":"Shoujin","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Technology Sydney, Sydney, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,5,23]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Detecting AI-Synthesized Speech Using Bispectral Analysis. In CVPR Workshops. 104--109","author":"AlBadawy Ehab A.","year":"2019","unstructured":"Ehab A. AlBadawy, Siwei Lyu, and Hany Farid. 2019. Detecting AI-Synthesized Speech Using Bispectral Analysis. In CVPR Workshops. 104--109."},{"key":"e_1_3_2_2_2_1","unstructured":"Rohan Anil and et al. 2023. PaLM2 Technical Report. CoRR abs\/2305.10403 (2023)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"crossref","unstructured":"Rowel Atienza. 2023. EfficientSpeech: An On-Device Text to Speech Model. In ICASSP. 1--5.","DOI":"10.1109\/ICASSP49357.2023.10094639"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"crossref","unstructured":"Edresson Casanova Kelly Davis and et al. 2024. XTTS: a Massively Multilingual Zero-Shot Text-to-Speech Model. In INTERSPEECH. 4978--4982.","DOI":"10.21437\/Interspeech.2024-2016"},{"key":"e_1_3_2_2_5_1","unstructured":"Edresson Casanova and et al. 2022. YourTTS: Towards Zero-Shot Multi-Speaker TTS and Zero-Shot Voice Conversion for Everyone. In ICML. 2709--2720."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"crossref","unstructured":"Rohit Girdhar Alaaeldin El-Nouby and et al. 2023. ImageBind One Embedding Space to Bind Them All. In CVPR. 15180--15190.","DOI":"10.1109\/CVPR52729.2023.01457"},{"key":"e_1_3_2_2_7_1","unstructured":"Oliver Guhr Anne-Kathrin Schumann and et al. 2021. FullStop: Multilingual Deep Models for Punctuation Prediction. In SwissText."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"crossref","unstructured":"Ganesh Jawahar Muhammad Abdul-Mageed and et al. 2020. Automatic Detection of Machine Generated Text: A Critical Survey. In COLING. 2296--2309.","DOI":"10.18653\/v1\/2020.coling-main.208"},{"key":"e_1_3_2_2_9_1","volume-title":"Jiang and et al","author":"Albert","year":"2024","unstructured":"Albert Q. Jiang and et al. 2024. Mixtral of Experts. CoRR abs\/2401.04088 (2024)."},{"key":"e_1_3_2_2_10_1","unstructured":"Glenn Jocher and et al. 2021. ultralytics\/yolov5: v6.0 - YOLOv5n 'Nano' models Roboflow integration TensorFlow export OpenCV DNN support."},{"key":"e_1_3_2_2_11_1","unstructured":"Iver Jordal Araik Tamazian and et al. 2024. iver56\/audiomentations: v0.38.0."},{"key":"e_1_3_2_2_12_1","unstructured":"Hasam Khalid Shahroz Tariq and et al. 2021. FakeAVCeleb: A Novel Audio-Video Multimodal Deepfake Dataset. In NeurIPS Datasets and Benchmarks."},{"key":"e_1_3_2_2_13_1","unstructured":"Jaehyeon Kim Jungil Kong and et al. 2021. Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech. In ICML. 5530--5540."},{"key":"e_1_3_2_2_14_1","volume-title":"EAGLE: Speculative Sampling Requires Rethinking Feature Uncertainty. In ICML. 28935--28948.","author":"Li Yuhui","year":"2024","unstructured":"Yuhui Li, Fangyun Wei, and et al. 2024. EAGLE: Speculative Sampling Requires Rethinking Feature Uncertainty. In ICML. 28935--28948."},{"key":"e_1_3_2_2_15_1","first-page":"19594","article-title":"StyleTTS 2: Towards Human-Level Text-to-Speech through Style Diffusion and Adversarial Training with Large Speech Language Models","volume":"36","author":"Li Yinghao Aaron","year":"2023","unstructured":"Yinghao Aaron Li, Cong Han, and et al. 2023. StyleTTS 2: Towards Human-Level Text-to-Speech through Style Diffusion and Adversarial Training with Large Speech Language Models. In NeurIPS, Vol. 36. 19594--19621.","journal-title":"NeurIPS"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"crossref","unstructured":"Tsung-Yi Lin Michael Maire Serge J. Belongie and et al. 2014. Microsoft COCO: Common Objects in Context. In ECCV (5). 740--755.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_2_17_1","volume-title":"SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis. In ICLR.","author":"Podell Dustin","year":"2024","unstructured":"Dustin Podell, Zion English, and et al. 2024. SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis. In ICLR."},{"key":"e_1_3_2_2_18_1","unstructured":"Yuxi Ren Xin Xia and et al. 2024. Hyper-SD: Trajectory Segmented Consistency Model for Efficient Image Synthesis. CoRR abs\/2404.13686 (2024)."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann and et al. 2022. High-Resolution Image Synthesis with Latent Diffusion Models. In CVPR. 10674--10685.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"crossref","unstructured":"Rui Shao Tianxing Wu and Ziwei Liu. 2023. Detecting and Grounding MultiModal Media Manipulation. In CVPR. 6904--6913.","DOI":"10.1109\/CVPR52729.2023.00667"},{"key":"e_1_3_2_2_21_1","unstructured":"Hugo Touvron Louis Martin and et al. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. CoRR abs\/2307.09288 (2023)."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.7717\/peerj.453"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"e_1_3_2_2_24_1","unstructured":"Bolei Zhou \u00c0gata Lapedriza and et al. 2014. Learning Deep Features for Scene Recognition using Places Database. In NIPS. 487--495."},{"key":"e_1_3_2_2_25_1","unstructured":"Bin Zhu Bin Lin and et al. 2024. LanguageBind: Extending Video-Language Pretraining to N-modality by Language-based Semantic Alignment. In ICLR."},{"key":"e_1_3_2_2_26_1","unstructured":"Mingjian Zhu Hanting Chen and et al. 2023. GenImage: A Million-Scale Benchmark for Detecting AI-Generated Image. In NeurIPS. 77771--77782."}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Companion Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3701716.3715306","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T18:26:09Z","timestamp":1759861569000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715306"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,8]]},"references-count":26,"alternative-id":["10.1145\/3701716.3715306","10.1145\/3701716"],"URL":"https:\/\/doi.org\/10.1145\/3701716.3715306","relation":{},"subject":[],"published":{"date-parts":[[2025,5,8]]},"assertion":[{"value":"2025-05-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}