{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,24]],"date-time":"2025-08-24T00:02:36Z","timestamp":1755993756374,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":4,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,2,18]],"date-time":"2025-02-18T00:00:00Z","timestamp":1739836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,2,18]]},"DOI":"10.1145\/3715675.3715806","type":"proceedings-article","created":{"date-parts":[[2025,3,20]],"date-time":"2025-03-20T18:33:01Z","timestamp":1742495581000},"page":"71-71","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Influence of AI Technology on Speech Synthesis and Voice Cloning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-1710-3079","authenticated-orcid":false,"given":"Xiaodong","family":"Wu","sequence":"first","affiliation":[{"name":"Adeia, Dallas, TX, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,3,20]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Text-to-Speech Synthesis: New Paradigms and Advances","author":"Dutoit Thierry","year":"2014","unstructured":"Thierry Dutoit, Nadine Martin. Text-to-Speech Synthesis: New Paradigms and Advances. Prentice Hall 1\/1\/2014. ISBN-13: 978-0131456617."},{"key":"e_1_3_2_1_2_1","unstructured":"Aaron van den Oord Sander Dieleman Heiga Zen et al. 2016. WaveNet: A Generative Model For Raw Audio. https:\/\/arxiv.org\/abs\/1609.03499"},{"key":"e_1_3_2_1_3_1","volume-title":"Tacotron: Towards End-to-End Speech Synthesis. https:\/\/arxiv.org\/abs\/1703.10135","author":"Wang Yuxuan","year":"2017","unstructured":"Yuxuan Wang, RJ Skerry-Ryan, Daisy Stanton, Yonghui Wu, et al. 2017. Tacotron: Towards End-to-End Speech Synthesis. https:\/\/arxiv.org\/abs\/1703.10135"},{"key":"e_1_3_2_1_4_1","unstructured":"Yihao Chen Zhen-Hua Ling et al. 2020. DurIAN: Duration Informed Attention Network for Multimodal Synthesis. IEEE\/ACM Transactions on Audio Speech and Language Processing"}],"event":{"name":"MHV '25: Mile-High Video Conference","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Denver CO USA","acronym":"MHV '25"},"container-title":["Proceedings of the 4th Mile-High Video Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3715675.3715806","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3715675.3715806","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T02:03:16Z","timestamp":1755914596000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3715675.3715806"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,18]]},"references-count":4,"alternative-id":["10.1145\/3715675.3715806","10.1145\/3715675"],"URL":"https:\/\/doi.org\/10.1145\/3715675.3715806","relation":{},"subject":[],"published":{"date-parts":[[2025,2,18]]},"assertion":[{"value":"2025-03-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}