{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T22:46:34Z","timestamp":1776120394923,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T00:00:00Z","timestamp":1717372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"DOI":"10.1145\/3630106.3658911","type":"proceedings-article","created":{"date-parts":[[2024,6,5]],"date-time":"2024-06-05T13:14:21Z","timestamp":1717593261000},"page":"359-376","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":38,"title":["Not My Voice! A Taxonomy of Ethical and Safety Harms of Speech Generators"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9657-9509","authenticated-orcid":false,"given":"Wiebke","family":"Hutiri","sequence":"first","affiliation":[{"name":"Sony AI, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4680-0022","authenticated-orcid":false,"given":"Orestis","family":"Papakyriakopoulos","sequence":"additional","affiliation":[{"name":"Sony AI, Switzerland and Technical University of Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7907-9353","authenticated-orcid":false,"given":"Alice","family":"Xiang","sequence":"additional","affiliation":[{"name":"Sony AI, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,6,5]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"AIAAIC. 2024. The AI Algorithmic and Automation Incidents Database. https:\/\/www.aiaaic.org\/aiaaic-repository\/ai-algorithmic-and-automation-incidents"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), Vol.\u00a01. Association for Computational Linguistics, 5723\u20135738","author":"Ao Junyi","year":"2022","unstructured":"Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei, and Peng Cheng\u00a0Laboratory. 2022. SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing. In Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), Vol.\u00a01. Association for Computational Linguistics, 5723\u20135738."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600211.3604686"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445922"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594095"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","unstructured":"Tom B\u00e4ckstr\u00f6m Okko R\u00e4s\u00e4nen Abraham Zewoudie Pablo\u00a0P\u00e9rez Zarazaga Liisa Koivusalo Sneha Das Esteban\u00a0G\u00f3mez Mellado Marieum\u00a0Bouafif Mansali Daniel Ramos Sudarsana Kadiri and Paavo Alku. 2022. Introduction to Speech Processing (2 ed.). https:\/\/doi.org\/10.5281\/zenodo.6821775","DOI":"10.5281\/zenodo.6821775"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376789"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3359325"},{"key":"e_1_3_2_1_9_1","volume-title":"YourTTS: Towards Zero-Shot Multi-Speaker TTS and Zero-Shot Voice Conversion for everyone. (12","author":"Casanova Edresson","year":"2021","unstructured":"Edresson Casanova, Julian Weber, Christopher Shulby, Arnaldo\u00a0Candido Junior, Eren G\u00f6lge, and Moacir\u00a0Antonelli Ponti. 2021. YourTTS: Towards Zero-Shot Multi-Speaker TTS and Zero-Shot Voice Conversion for everyone. (12 2021). http:\/\/arxiv.org\/abs\/2112.02418"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00146-022-01585-x"},{"key":"e_1_3_2_1_11_1","unstructured":"Guangyu Chen Yu Wu Shujie Liu Tao Liu Xiaoyong Du and Furu Wei. 2024. WavMark: Watermarking for Audio Generation. arxiv:2308.12770\u00a0[cs.SD]"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1186\/s12936-022-04183-w"},{"key":"e_1_3_2_1_13_1","unstructured":"Joseph Cox. 2023. A Computer Generated Swatting Service Is Causing Havoc Across America. https:\/\/www.vice.com\/en\/article\/k7z8be\/torswats-computer-generated-ai-voice-swatting"},{"key":"e_1_3_2_1_14_1","unstructured":"Andrew Critch and Stuart Russell. 2023. TASRA: a Taxonomy and Analysis of Societal-Scale Risks from AI. (2023)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCA.2009.2025452"},{"key":"e_1_3_2_1_16_1","unstructured":"Fairly Trained. 2024. Fairly Trained launches certification for generative AI models that respect creators\u2019 rights. https:\/\/www.fairlytrained.org\/blog\/fairly-trained-launches-certification-for-generative-ai-models-that-respect-creators-rights"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.25300\/MISQ"},{"key":"e_1_3_2_1_18_1","volume-title":"Promptts: Controllable Text-to-Speech with Text Descriptions. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). https:\/\/speechresearch.github.io\/prompttts\/","author":"Guo Zhifang","year":"2023","unstructured":"Zhifang Guo, Yichong Leng, Yihan Wu, Sheng Zhao, and Xu Tan. 2023. Promptts: Controllable Text-to-Speech with Text Descriptions. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). https:\/\/speechresearch.github.io\/prompttts\/"},{"key":"e_1_3_2_1_19_1","unstructured":"Jennifer Hassan. 2023. AI is being used to give dead missing kids a voice they didn\u2019t ask for. https:\/\/www.washingtonpost.com\/technology\/2023\/08\/09\/ai-dead-children-tiktok-videos\/"},{"key":"e_1_3_2_1_20_1","unstructured":"Have I been trained?2024. About. https:\/\/haveibeentrained.com\/about"},{"key":"e_1_3_2_1_22_1","volume-title":"Systems and software engineering \u2014 Life cycle management \u2014 Part 7000: Standard model process for addressing ethical concerns during system design. Standard","author":"IEEE","unstructured":"ISO\/IEC\/IEEE 24748-7000:2022(en) 2022. Systems and software engineering \u2014 Life cycle management \u2014 Part 7000: Standard model process for addressing ethical concerns during system design. Standard. International Organization for Standardization."},{"key":"e_1_3_2_1_23_1","unstructured":"Shengpeng Ji Jialong Zuo Minghui Fang Ziyue Jiang Feiyang Chen Xinyu Duan Baoxing Huai and Zhou Zhao. 2023. TextrolSpeech: A Text Style Control Speech Corpus With Codec Language Text-to-Speech Models. (2023). https:\/\/sall-e.github.io\/."},{"key":"e_1_3_2_1_24_1","volume-title":"Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech. In International Conference on Machine Learning (ICML). https:\/\/jaywalnut310","author":"Kim Jaehyeon","year":"2021","unstructured":"Jaehyeon Kim, Jungil Kong, and Juhee Son. 2021. Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech. In International Conference on Machine Learning (ICML). https:\/\/jaywalnut310.github.io\/vits-demo\/index.html"},{"key":"e_1_3_2_1_25_1","unstructured":"Hannah\u00a0Rose Kirk Bertie Vidgen Paul R\u00f6ttger and Scott\u00a0A Hale. 2023. Personalisation within bounds: A risk taxonomy and policy framework for the alignment of large language models with personalised feedback. (2023)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.21437\/ssw.2023-7"},{"key":"e_1_3_2_1_27_1","unstructured":"Hao-ping Lee Yu-ju Yang and Thomas Serban Von\u00a0Davier. 2023. Deepfakes Phrenology Surveillance and More! A Taxonomy of AI Privacy Risks. (2023)."},{"key":"e_1_3_2_1_28_1","volume-title":"Proc. of the 23rd Int. Society for Music Information Retrieval Conf.https:\/\/osf.io\/7em95\/.","author":"Lee Kyungyun","year":"2022","unstructured":"Kyungyun Lee, Gladys Hitt, Emily Terada, Jin\u00a0Ha Lee, and Gaudio Lab. 2022. Ethics of Singing Voice Synthesis: Perceptions of Users and Developers. In Proc. of the 23rd Int. Society for Music Information Retrieval Conf.https:\/\/osf.io\/7em95\/."},{"key":"e_1_3_2_1_29_1","unstructured":"Yichong Leng Zhifang Guo Kai Shen Xu Tan Zeqian Ju Yanqing Liu Yufei Liu Dongchao Yang Leying Zhang Kaitao Song Lei He Xiang-Yang Li Sheng Zhao Tao Qin and Jiang Bian. 2023. Promptts 2: Describing and Generating Voices with Text Prompt. (2023). https:\/\/speechresearch.github.io\/prompttts2"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1779"},{"key":"e_1_3_2_1_31_1","volume-title":"Stable Bias: Analyzing Societal Representations in Diffusion Models. (3","author":"Luccioni Alexandra\u00a0Sasha","year":"2023","unstructured":"Alexandra\u00a0Sasha Luccioni, Christopher Akiki, Margaret Mitchell, and Yacine Jernite. 2023. Stable Bias: Analyzing Societal Representations in Diffusion Models. (3 2023). http:\/\/arxiv.org\/abs\/2303.11408"},{"key":"e_1_3_2_1_32_1","volume-title":"The Thirty-Fifth AAAI Conference on Artificial Intelligence (AAAI-21)","author":"McGregor Sean","year":"2021","unstructured":"Sean McGregor. 2021. Preventing Repeated Real World AI Failures by Cataloging Incidents: The AI Incident Database. In The Thirty-Fifth AAAI Conference on Artificial Intelligence (AAAI-21). 15458\u201315463. www.aaai.org"},{"key":"e_1_3_2_1_33_1","unstructured":"Leah Nylen. 2023. FTC\u2019s Khan Says Enforcers Need to Be \u2018Vigilant Early\u2019 With AI. https:\/\/www.bloomberg.com\/news\/articles\/2023-06-02\/ftc-s-khan-says-enforcers-need-to-be-vigilant-early-with-ai"},{"key":"e_1_3_2_1_35_1","unstructured":"OECD.AI Policy Observatory. 2024. The OECD AI Incidents Monitor. https:\/\/oecd.ai\/en\/incidents"},{"key":"e_1_3_2_1_36_1","unstructured":"Partnership on AI. 2023. PAI\u2019s Responsible Practices for Synthetic Media. https:\/\/partnershiponai.org\/wp-content\/uploads\/2023\/02\/PAI_synthetic_media_framework.pdf"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533158"},{"key":"e_1_3_2_1_39_1","volume-title":"International Conference on Learning Representation (ICLR). https:\/\/speechresearch.github.io\/fastspeech2\/.","author":"Ren Yi","year":"2020","unstructured":"Yi Ren, Chenxu Hu, Xu Tan, Tao Qin, Sheng Zhao, Zhou Zhao, and Tie-Yan Liu. 2020. FastSpeech 2: Fast and High Quality End-to-End Text to Speech. In International Conference on Learning Representation (ICLR). https:\/\/speechresearch.github.io\/fastspeech2\/."},{"key":"e_1_3_2_1_40_1","unstructured":"Responsible AI Collaborative. 2024. The AI Incident Database. https:\/\/incidentdatabase.ai\/"},{"key":"e_1_3_2_1_41_1","volume-title":"Google apologizes for \u2018missing the mark","author":"Robertson Adi","year":"2024","unstructured":"Adi Robertson. 2024. Google apologizes for \u2018missing the mark\u2019 after Gemini generated racially diverse Nazis. https:\/\/www.theverge.com\/2024\/2\/21\/24079371\/google-ai-gemini-generative-inaccurate-historical"},{"key":"e_1_3_2_1_42_1","unstructured":"Robin\u00a0San Roman Pierre Fernandez Alexandre D\u00e9fossez Teddy Furon Tuan Tran and Hady Elsahar. 2024. Proactive Detection of Voice Cloning with Localized Watermarking. arxiv:2401.17264\u00a0[cs.SD]"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.21437\/eurospeech.2001-150"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376338"},{"key":"e_1_3_2_1_45_1","unstructured":"Preethi Seshadri Sameer Singh and Yanai Elazar. 2023. The Bias Amplification Paradox in Text-to-Image Generation. (2023)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600211.3604673"},{"key":"e_1_3_2_1_47_1","unstructured":"Shona Moreau Chloe Rourke. 2024. Fake porn causes real harm to women. https:\/\/policyoptions.irpp.org\/magazines\/february-2024\/fake-porn-harm\/"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.2307\/40041279"},{"key":"e_1_3_2_1_49_1","volume-title":"A Survey on Neural Speech Synthesis. (6","author":"Tan Xu","year":"2021","unstructured":"Xu Tan, Tao Qin, Frank Soong, and Tie-Yan Liu. 2021. A Survey on Neural Speech Synthesis. (6 2021). http:\/\/arxiv.org\/abs\/2106.15561"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2023.3250266"},{"key":"e_1_3_2_1_51_1","volume-title":"Biden\u2019s AI chief says \u2019voice cloning","author":"Varanasi Lakshmi","year":"2023","unstructured":"Lakshmi Varanasi. 2023. Biden\u2019s AI chief says \u2019voice cloning\u2019 is what keeps him up at night. https:\/\/www.businessinsider.com\/voice-cloning-technology-worries-biden-ai-bruce-reed-elevenlabs-scammers-2023-11"},{"key":"e_1_3_2_1_52_1","volume-title":"Introducing v0. 5 of the AI Safety Benchmark from MLCommons. arXiv preprint arXiv:2404.12241","author":"Vidgen Bertie","year":"2024","unstructured":"Bertie Vidgen, Adarsh Agrawal, Ahmed\u00a0M Ahmed, Victor Akinwande, Namir Al-Nuaimi, Najla Alfaraj, Elie Alhajjar, Lora Aroyo, Trupti Bavalatti, Borhane Blili-Hamelin, 2024. Introducing v0. 5 of the AI Safety Benchmark from MLCommons. arXiv preprint arXiv:2404.12241 (2024)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.21437\/ssw.2019-19"},{"key":"e_1_3_2_1_54_1","volume-title":"Neural Codec Language Models are Zero-Shot Text to Speech Synthesizers. (1","author":"Wang Chengyi","year":"2023","unstructured":"Chengyi Wang, Sanyuan Chen, Yu Wu, Ziqiang Zhang, Long Zhou, Shujie Liu, Zhuo Chen, Yanqing Liu, Huaming Wang, Jinyu Li, Lei He, Sheng Zhao, and Furu Wei. 2023. Neural Codec Language Models are Zero-Shot Text to Speech Synthesizers. (1 2023). http:\/\/arxiv.org\/abs\/2301.02111"},{"key":"e_1_3_2_1_55_1","unstructured":"Tianrui Wang Long Zhou Ziqiang Zhang Yu Wu Shujie Liu Yashesh Gaur Zhuo Chen Jinyu Li and Furu Wei. 2023. VioLA: Unified Codec Language Models for Speech Recognition Synthesis and Translation. (5 2023). http:\/\/arxiv.org\/abs\/2305.16107"},{"key":"e_1_3_2_1_56_1","unstructured":"Laura Weidinger Maribeth Rauh Nahema Marchal Arianna Manzini Lisa\u00a0Anne Hendricks Juan Mateos-Garcia Stevie Bergman Jackie Kay Conor Griffin Ben Bariach Iason Gabriel Verena Rieser and William Isaac. 2023. Sociotechnical Safety Evaluation of Generative AI Systems. (2023)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533088"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533779"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3576915.3623209"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2021.11.006"}],"event":{"name":"FAccT '24: The 2024 ACM Conference on Fairness, Accountability, and Transparency","location":"Rio de Janeiro Brazil","acronym":"FAccT '24"},"container-title":["The 2024 ACM Conference on Fairness, Accountability, and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3630106.3658911","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3630106.3658911","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T22:50:56Z","timestamp":1750287056000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3630106.3658911"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,3]]},"references-count":57,"alternative-id":["10.1145\/3630106.3658911","10.1145\/3630106"],"URL":"https:\/\/doi.org\/10.1145\/3630106.3658911","relation":{},"subject":[],"published":{"date-parts":[[2024,6,3]]},"assertion":[{"value":"2024-06-05","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}