{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T18:42:32Z","timestamp":1779216152235,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":168,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,30]],"date-time":"2024-05-30T00:00:00Z","timestamp":1717027200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Wallenberg AI, Autonomous Systems and Software Program ? Humanities and Society (WASP-HS)","award":["Marcus and Amalia Wallenberg Foundation."],"award-info":[{"award-number":["Marcus and Amalia Wallenberg Foundation."]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,30]]},"DOI":"10.1145\/3658852.3659065","type":"proceedings-article","created":{"date-parts":[[2024,6,27]],"date-time":"2024-06-27T18:23:07Z","timestamp":1719512587000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Singing for the Missing: Bringing the Body Back to AI Voice and Speech Technologies"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2802-0244","authenticated-orcid":false,"given":"Kelsey","family":"Cotton","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, Data Science and AI Division, Chalmers University of Technology, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3028-6084","authenticated-orcid":false,"given":"Katja","family":"De Vries","sequence":"additional","affiliation":[{"name":"Department of Law, Uppsala University, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4133-8641","authenticated-orcid":false,"given":"K\u0131van\u00e7","family":"Tatar","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Data Science and AI Division, Chalmers University of Technology, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,6,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"SAG AFTRA. 2023. Summary of 2023 Tentative Successor Agreement to the 2020 Producer-SAG-AFTRA Codified Basic Agreement (\u2018Codified Basic Agreement\u2019) and 2020 SAG-AFTRA Television Agreement (\u2018Television Agreement\u2019). https:\/\/www.sagaftra.org\/files\/sa_documents\/TV-Theatrical_23_Summary_Agreement_Final.pdf"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Shrutina Agarwal Sriram Ganapathy and Naoya Takahashi. 2022. Leveraging Symmetrical Convolutional Transformer Networks for Speech to Singing Voice Style Transfer. arxiv:2208.12410\u00a0[cs.SD]","DOI":"10.21437\/Interspeech.2022-11256"},{"key":"e_1_3_2_1_3_1","unstructured":"AIContentfy. 2023. The future of content creation for voice assistants. https:\/\/aicontentfy.com\/en\/blog\/future-of-content-creation-for-voice-assistants"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-68649-3_17"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","unstructured":"Junyi Ao Rui Wang Long Zhou Chengyi Wang Shuo Ren Yu Wu Shujie Liu Tom Ko Qing Li Yu Zhang Zhihua Wei Yao Qian Jinyu Li and Furu Wei. 2022. SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing. https:\/\/doi.org\/10.48550\/arXiv.2110.07205 arXiv:2110.07205 [cs eess].","DOI":"10.48550\/arXiv.2110.07205"},{"key":"e_1_3_2_1_6_1","unstructured":"Srishreya\u00a0(Shreya) Arunsaravanakumar. [n. d.]. Deepfake music sends ripples across the music industry. https:\/\/thewildcattribune.com\/17528\/ae\/deepfake-music-sends-ripples-across-the-music-industry\/ Section: Arts & Entertainment."},{"key":"e_1_3_2_1_7_1","volume-title":"Hollywood Writers Reached an AI Deal That Will Rewrite History. Wired","author":"Bedingfield Will","year":"2023","unstructured":"Will Bedingfield. 2023. Hollywood Writers Reached an AI Deal That Will Rewrite History. Wired (2023). https:\/\/www.wired.com\/story\/us-writers-strike-ai-provisions-precedents\/ Section: tags."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2617995.2618007"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2790994.2791010"},{"key":"e_1_3_2_1_10_1","volume-title":"Mind over matter: the thinking and speaking machine in fiction of the long nineteenth century. phd","author":"Bikker Elise\u00a0Jozefa","unstructured":"Elise\u00a0Jozefa Bikker. 2021. Mind over matter: the thinking and speaking machine in fiction of the long nineteenth century. phd. University of York. https:\/\/etheses.whiterose.ac.uk\/31783\/"},{"key":"e_1_3_2_1_11_1","volume-title":"Sonic Mediations: Body, Sound, Technology - Cambridge Scholars Publishing","author":"Birdsall Carolyn","year":"2008","unstructured":"Carolyn Birdsall and Anthony Enns. 2008. Sonic Mediations: Body, Sound, Technology - Cambridge Scholars Publishing. Cambridge Scholars Publishing. https:\/\/www.cambridgescholars.com\/product\/9781847188397"},{"key":"e_1_3_2_1_12_1","unstructured":"Phil\u00a0E. Bloomfield. 2021. Without Limits or Lyrics: The Human Voice as Instrument. https:\/\/daily.bandcamp.com\/lists\/human-voice-as-instrument-list Section: Lists."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3401956.3404249"},{"key":"e_1_3_2_1_14_1","unstructured":"Samantha Bruce. 2016. The Female Fa\u00e7ade: How Performance Artists Are Changing the Way Patriarchal Pressures Objectify\u2026. https:\/\/medium.com\/@SamanthaBruce\/the-female-fa%C3%A7ade-how-performance-artists-are-changing-the-way-patriarchal-pressures-objectify-c3b288fa35e4"},{"key":"e_1_3_2_1_15_1","unstructured":"Henry Bruce-Jones. 2020. Ash Koosha Presents: YONA Part I - (Under Your Skin). https:\/\/www.factmag.com\/2020\/11\/25\/ash-koosha-presents-yona-part-i\/"},{"key":"e_1_3_2_1_16_1","volume-title":"Explorations in Art and Technology (2 ed.)","author":"Candy Linda","unstructured":"Linda Candy, Ernest Edmonds, and Fabrizio Poltronieri. 2018. Explorations in Art and Technology (2 ed.). Springer London. https:\/\/link.springer.com\/book\/10.1007\/978-1-4471-7367-0"},{"key":"e_1_3_2_1_17_1","unstructured":"District of\u00a0Columbia) Cato Institute\u00a0(Washington (Ed.). 2023. Cato Handbook for Policymakers (9th edition ed.). Cato Institute Washington."},{"key":"e_1_3_2_1_18_1","unstructured":"Devin Coldewey. 2023. VALL-E\u2019s quickie voice deepfakes should worry you if you weren\u2019t worried already. https:\/\/techcrunch.com\/2023\/01\/12\/vall-es-quickie-voice-deepfakes-should-worry-you-if-you-werent-worried-already\/"},{"key":"e_1_3_2_1_19_1","volume-title":"Actors vs. AI: Strike brings focus to emerging use of advanced tech. NBC News (July","author":"Collier Kevin","year":"2023","unstructured":"Kevin Collier. 2023. Actors vs. AI: Strike brings focus to emerging use of advanced tech. NBC News (July 2023). https:\/\/www.nbcnews.com\/tech\/tech-news\/hollywood-actor-sag-aftra-ai-artificial-intelligence-strike-rcna94191"},{"key":"e_1_3_2_1_20_1","volume-title":"AIMC 2023 (aug 29 2023","author":"Cotton Kelsey","year":"2023","unstructured":"Kelsey Cotton and K\u0131van\u00e7 Tatar. 2023. Caring Trouble and Musical AI: Considerations towards a Feminist Musical AI. AIMC 2023 (aug 29 2023). https:\/\/aimc2023.pubpub.org\/pub\/zwjy371l."},{"key":"e_1_3_2_1_21_1","unstructured":"Trevor Cox. 2019. The uncanny valley: does it happen with voices?http:\/\/trevorcox.me\/the-uncanny-valley-does-it-happen-with-voices"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1017\/S1355771807001604"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1057\/s41300-021-00134-w"},{"key":"e_1_3_2_1_24_1","volume-title":"Matters of Care: Speculative Ethics in More Than Human Worlds","author":"de\u00a0la Bellacasa Mar\u00eda\u00a0Puig","unstructured":"Mar\u00eda\u00a0Puig de\u00a0la Bellacasa. 2017. Matters of Care: Speculative Ethics in More Than Human Worlds. University of Minnesota Press. https:\/\/libgen.li\/ads.php?md5=3dec273eb9043ae8b1a7140b1120c759"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/11863878_19"},{"key":"e_1_3_2_1_26_1","volume-title":"Listening through the noise: the aesthetics of experimental electronic music","author":"Demers Joanna\u00a0Teresa","year":"1824","unstructured":"Joanna\u00a0Teresa Demers. 2010. Listening through the noise: the aesthetics of experimental electronic music. Oxford University Press, Oxford ; New York. OCLC: ocn435918247."},{"key":"e_1_3_2_1_27_1","unstructured":"Descript. 2023. Lyrebird. https:\/\/www.descript.com\/lyrebird"},{"key":"e_1_3_2_1_28_1","volume-title":"Voice as a technology of selfhood: Towards an analysis of racialized timbre and vocal performance. Ph.\u00a0D. Dissertation","author":"Eidsheim Nina","unstructured":"Nina Eidsheim. 2008. Voice as a technology of selfhood: Towards an analysis of racialized timbre and vocal performance. Ph.\u00a0D. Dissertation. University of California, San Diego. https:\/\/www.academia.edu\/657536\/Voice_as_a_technology_of_selfhood_Towards_an_analysis_of_racialized_timbre_and_vocal_performance"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"Nina Eidsheim Katherine Meizel Nina Eidsheim and Katherine Meizel (Eds.). 2019. The Oxford Handbook of Voice Studies. Oxford University Press Oxford New York.","DOI":"10.1093\/oxfordhb\/9780199982295.001.0001"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.2752\/174589311X12961584845729"},{"key":"e_1_3_2_1_31_1","volume-title":"Sensing sound: singing & listening as vibrational practice","author":"Eidsheim Nina\u00a0Sun","unstructured":"Nina\u00a0Sun Eidsheim. 2015. Sensing sound: singing & listening as vibrational practice. Duke University Press, Durham."},{"key":"e_1_3_2_1_32_1","first-page":"805","article-title":"Remarks on Technology and Art","volume":"46","author":"Ellul Jacques","year":"1979","unstructured":"Jacques Ellul and Daniel Hofstadter. 1979. Remarks on Technology and Art. Social Research 46, 4 (1979), 805\u2013833. http:\/\/www.jstor.org\/stable\/40970814","journal-title":"Social Research"},{"key":"e_1_3_2_1_33_1","unstructured":"Thailand\u00a0Posts English. 2022. VAVA Thailand\u2019s first female artist Ai who looks like a real human. ready to go through music and reality shows. https:\/\/thailand.postsen.com\/local\/85090\/VAVA-Thailand%E2%80%99s-first-female-artist-Ai-who-looks-like-a-real-human-ready-to-go-through-music-and-reality-shows.html Section: Local."},{"key":"e_1_3_2_1_34_1","unstructured":"NVIDIA\u00a0AI Enterprise. [n. d.]. NVIDIA NeMo. https:\/\/nvidia.github.io\/NeMo\/"},{"key":"e_1_3_2_1_35_1","first-page":"149","article-title":"Study on Intelligence (AI) Detection Model about Telecommunication Finance Fraud Accident","volume":"29","author":"Lim Jeong","year":"2019","unstructured":"Jeong Eui-seok and Lim Jong-in. 2019. Study on Intelligence (AI) Detection Model about Telecommunication Finance Fraud Accident. Journal of the Korean Institute of Information Security and Cryptology 29, 1 (2019), 149\u2013164.","journal-title":"Journal of the Korean Institute of Information Security and Cryptology"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1075\/hl.28.3.02fag"},{"key":"e_1_3_2_1_37_1","volume-title":"Voice Deepfakes Are Coming for Your Bank Balance. The New York Times (Aug","author":"Flitter Emily","year":"2023","unstructured":"Emily Flitter and Stacy Cowley. 2023. Voice Deepfakes Are Coming for Your Bank Balance. The New York Times (Aug. 2023). https:\/\/www.nytimes.com\/2023\/08\/30\/business\/voice-deepfakes-bank-scams.html"},{"key":"e_1_3_2_1_38_1","unstructured":"Xue-Yong Fu Cheng Chen Md\u00a0Tahmid\u00a0Rahman Laskar Shayna Gardiner Pooja Hiranandani and Shashi\u00a0Bhushan TN. 2022. Entity-level Sentiment Analysis in Contact Center Telephone Conversations. arxiv:2210.13401\u00a0[cs.CL]"},{"key":"e_1_3_2_1_39_1","unstructured":"Gaby Gayles. 2019. Voice Assistants & the Uncanny Valley: The More Lifelike the Less \u201cReal\u201d. https:\/\/medium.com\/voice-tech-podcast\/voice-assistants-the-uncanny-valley-the-more-lifelike-the-less-real-fb0bab2755d1"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"e_1_3_2_1_41_1","volume-title":"The historical relationship between artistic activities and technology development","author":"Gir\u00e3o Lu\u00eds\u00a0Miguel","unstructured":"Lu\u00eds\u00a0Miguel Gir\u00e3o and C\u00e9u Santos, Maria. 2019. The historical relationship between artistic activities and technology development.Publications Office, LU. https:\/\/data.europa.eu\/doi\/10.2861\/961315"},{"key":"e_1_3_2_1_42_1","volume-title":"Musical Gestures","author":"God\u00f8y Rolf\u00a0Inge","unstructured":"Rolf\u00a0Inge God\u00f8y. 2009. Gestural Affordances of Musical Sound. In Musical Gestures. Routledge. Num Pages: 23."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1017\/S1355771809990264"},{"key":"e_1_3_2_1_44_1","volume-title":"Voice Analytics and Artificial Intelligence: Future Directions for a post-COVID world. White Paper Wharton AI & Analytics for Business","author":"Goorha Saurabh","unstructured":"Saurabh Goorha and Raghuram Iyengar. 2020. Voice Analytics and Artificial Intelligence: Future Directions for a post-COVID world. White Paper Wharton AI & Analytics for Business. Wharton University of Pennsylvania. https:\/\/aiab.wharton.upenn.edu\/white-paper\/voice-analytics-and-artificial-intelligence-future-directions-for-a-post-covid-world\/"},{"key":"e_1_3_2_1_45_1","unstructured":"Decrypt \/\u00a0Will Gottsegen. 2021. Holly Herndon Launches DAO-Controlled Vocal Deepfake Platform \u2019Holly+\u2019. https:\/\/decrypt.co\/75958\/holly-herndon-launches-dao-controlled-vocal-deepfake-platform-holly\/ Section: News."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.5210\/fm.v26i12.11833"},{"key":"e_1_3_2_1_47_1","unstructured":"Grimes [@Grimezsz]. 2023. I think it\u2019s cool to be fused w a machine and I like the idea of open sourcing all art and killing copyright. https:\/\/twitter.com\/Grimezsz\/status\/1650304205981089793"},{"key":"e_1_3_2_1_48_1","unstructured":"Grimes [@Grimezsz]. 2023. I\u2019ll split 50% royalties on any successful AI generated song that uses my voice. Same deal as I would with any artist i collab with. Feel free to use my voice without penalty. I have no label and no legal bindings. https:\/\/t.co\/KIY60B5uqt. https:\/\/twitter.com\/Grimezsz\/status\/1650304051718791170"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445929"},{"key":"e_1_3_2_1_50_1","first-page":"2","article-title":"The Somaesthetics of Musicians: Rethinking the Body in Musical Practice","volume":"5","author":"Han Jungmin\u00a0Grace","year":"2019","unstructured":"Jungmin\u00a0Grace Han. 2019. The Somaesthetics of Musicians: Rethinking the Body in Musical Practice. The Journal of Somaesthetics 5, 2 (Dec. 2019). https:\/\/journals.aau.dk\/index.php\/JOS\/article\/view\/2200 Number: 2.","journal-title":"The Journal of Somaesthetics"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4020-3803-74"},{"key":"e_1_3_2_1_52_1","volume-title":"Staying with the trouble: making kin in the Chthulucene","author":"Haraway J.","unstructured":"Donna\u00a0J. Haraway. 2016. Staying with the trouble: making kin in the Chthulucene. Duke University Press, Durham."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbusres.2020.12.012"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1080\/03615260801973364"},{"key":"e_1_3_2_1_55_1","unstructured":"Thomas Hobbs. 2021. \u2018It\u2019s Fan Fiction For Music\u2019: Why Deepfake Vocals of Music Legends Are on the Rise. https:\/\/www.billboard.com\/pro\/deepfake-music-imitations-history\/"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3363384.3363385"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2021.664925"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.4324\/9780203120330"},{"key":"e_1_3_2_1_59_1","volume-title":"Technology and the Lifeworld: From Garden to Earth","author":"Ihde Don","unstructured":"Don Ihde. 1990. Technology and the Lifeworld: From Garden to Earth. Indiana University Press."},{"key":"e_1_3_2_1_60_1","volume-title":"Listening and Voice","author":"Ihde Don","unstructured":"Don Ihde. 2007. Listening and Voice (2nd edition ed.). State University of New York Press, New York. https:\/\/sunypress.edu\/Books\/L\/Listening-and-Voice2","edition":"2"},{"key":"e_1_3_2_1_61_1","unstructured":"IPCV. [n. d.]. Acappella. https:\/\/ipcv.github.io\/Acappella\/acappella\/"},{"key":"e_1_3_2_1_62_1","unstructured":"IPNHK. 2019. Isabella Winthrop. https:\/\/medium.com\/@IPNHK\/isabella-winthrop-d07814ab4ba0"},{"key":"e_1_3_2_1_63_1","unstructured":"Keith Ito and Linda Johnson. 2017. The LJ Speech Dataset. https:\/\/keithito.com\/LJ-Speech-Dataset\/."},{"key":"e_1_3_2_1_64_1","volume-title":"Discovering the uncanny valley for the sound of a voice.Ph.\u00a0D. Dissertation","author":"Jansen Dennis","unstructured":"Dennis Jansen. 2019. Discovering the uncanny valley for the sound of a voice.Ph.\u00a0D. Dissertation. Tilburg University, Netherlands. http:\/\/arno.uvt.nl\/show.cgi?fid=149554"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.4324\/9780203863411"},{"key":"e_1_3_2_1_66_1","unstructured":"Alexander\u00a0Refsum Jensenius. 2007. Action-sound : developing methods and tools to study music-related body movement. Doctoral thesis. University of Oslo Norway. https:\/\/www.duo.uio.no\/handle\/10852\/27149Accepted: 2013-03-12T12:01:35Z."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.13089\/JKIISC.2019.29.1.149"},{"key":"e_1_3_2_1_68_1","volume-title":"Body Art\/Performing the Subject","author":"Jones Amelia","unstructured":"Amelia Jones. 1998. Body Art\/Performing the Subject. University of Minnesota Press. https:\/\/www.upress.umn.edu\/book-division\/books\/body-art-performing-the-subject"},{"key":"e_1_3_2_1_69_1","volume-title":"Sander Dieleman, and Koray Kavukcuoglu.","author":"Kalchbrenner Nal","year":"2018","unstructured":"Nal Kalchbrenner, Erich Elsen, Karen Simonyan, Seb Noury, Norman Casagrande, Edward Lockhart, Florian Stimberg, Aaron van\u00a0den Oord, Sander Dieleman, and Koray Kavukcuoglu. 2018. Efficient Neural Audio Synthesis. http:\/\/arxiv.org\/abs\/1802.08435 arXiv:1802.08435 [cs, eess]."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"crossref","unstructured":"Hirokazu Kameoka Takuhiro Kaneko Kou Tanaka and Nobukatsu Hojo. 2018. StarGAN-VC: Non-parallel many-to-many voice conversion with star generative adversarial networks. http:\/\/arxiv.org\/abs\/1806.02169 arXiv:1806.02169 [cs eess stat].","DOI":"10.1109\/SLT.2018.8639535"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1017\/S135577180700163X"},{"key":"e_1_3_2_1_72_1","unstructured":"Zahra Khanjani Gabrielle Watson and Vandana\u00a0P. Janeja. 2021. How Deep Are the Fakes? Focusing on Audio Deepfake: A Survey. arxiv:2111.14203\u00a0[cs.SD]"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.2993893"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/2948910.2948952"},{"key":"e_1_3_2_1_75_1","volume-title":"Meet Yona, a first generation \u2019Auxiliary Human\u2019 | Facebook. https:\/\/www.facebook.com\/ashkoosha\/posts\/meet-yona-a-first-generation-auxiliary-human-who-uses-artificial-intelligence-an\/10157017594454400\/","author":"Koosha Ash","year":"2019","unstructured":"Ash Koosha. 2019. Meet Yona, a first generation \u2019Auxiliary Human\u2019 | Facebook. https:\/\/www.facebook.com\/ashkoosha\/posts\/meet-yona-a-first-generation-auxiliary-human-who-uses-artificial-intelligence-an\/10157017594454400\/"},{"key":"e_1_3_2_1_76_1","unstructured":"Ashkan Kooshanejad. [n. d.]. Yona. https:\/\/theyona.bandcamp.com"},{"key":"e_1_3_2_1_77_1","unstructured":"Kimberlee Kruesi. 2024. Tennessee just became the first state to protect musicians and other artists against AI. https:\/\/artscanvas.org\/arts-culture\/tennessee-just-became-the-first-state-to-protect-musicians-and-other-artists-against-ai"},{"key":"e_1_3_2_1_78_1","unstructured":"Kundan Kumar Rithesh Kumar Thibault de Boissiere Lucas Gestin Wei\u00a0Zhen Teoh Jose Sotelo Alexandre de Brebisson Yoshua Bengio and Aaron Courville. 2019. MelGAN: Generative Adversarial Networks for Conditional Waveform Synthesis. arxiv:1910.06711\u00a0[eess.AS]"},{"key":"e_1_3_2_1_79_1","volume-title":"Eleven Labs: Text to Speech & AI Voice Generator. https:\/\/elevenlabs.io","author":"Labs Eleven","year":"2023","unstructured":"Eleven Labs. 2023. Eleven Labs: Text to Speech & AI Voice Generator. https:\/\/elevenlabs.io"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.5749\/minnesota\/9780816679959.003.0004"},{"key":"e_1_3_2_1_81_1","volume-title":"Making Things Public","author":"Latour Bruno","unstructured":"Bruno Latour and Peter Weibel. 2005. Making Things Public. MIT Press, Cambridge, Massachusetts. https:\/\/mitpress.mit.edu\/9780262122795\/making-things-public\/"},{"key":"e_1_3_2_1_82_1","volume-title":"Embodied music cognition and mediation technology","author":"Leman Marc","unstructured":"Marc Leman. 2007. Embodied music cognition and mediation technology. MIT Press, Cambridge, Mass. OCLC: ocm74915535."},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","unstructured":"Mingkuan Liu Chi Zhang Hua Xing Chao Feng Monchu Chen Judith Bishop and Grace Ngapo. 2021. Scalable Data Annotation Pipeline for High-Quality Large Speech Datasets Development. https:\/\/doi.org\/10.48550\/arXiv.2109.01164 arXiv:2109.01164 [cs eess].","DOI":"10.48550\/arXiv.2109.01164"},{"key":"e_1_3_2_1_84_1","unstructured":"Rui Liu Berrak Sisman and Haizhou Li. 2021. StrengthNet: Deep Learning-based Emotion Strength Assessment for Emotional Speech Synthesis. http:\/\/arxiv.org\/abs\/2110.03156 arXiv:2110.03156 [cs eess]."},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.3390\/make1010030"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1177\/20438206221102952"},{"key":"e_1_3_2_1_87_1","volume-title":"AI kidnapping scam copied teen girl\u2019s voice in $1M extortion attempt - National | Globalnews.ca. Global News (April","author":"Mannie Kathryn","year":"2023","unstructured":"Kathryn Mannie. 2023. AI kidnapping scam copied teen girl\u2019s voice in $1M extortion attempt - National | Globalnews.ca. Global News (April 2023). https:\/\/globalnews.ca\/news\/9629883\/ai-kidnapping-scam-teen-girl-voice-cloned-extortion-arizona-jennifer-destefano\/"},{"key":"e_1_3_2_1_88_1","unstructured":"Clovis McEvoy. [n. d.]. Vocal AI deepfakes of major artists are cropping up everywhere \u2013 should artists be worried?https:\/\/musictech.com\/features\/music-deepfakes-ai-drake-grimes-weeknd\/"},{"key":"e_1_3_2_1_89_1","volume-title":"Professional Communication and Network Interaction: A Rhetorical and Ethical Approach","author":"McKee A.","unstructured":"Heidi\u00a0A. McKee and James\u00a0E. Porter. 2019. Professional Communication and Network Interaction: A Rhetorical and Ethical Approach (1st edition ed.). Routledge. https:\/\/www.routledge.com\/Professional-Communication-and-Network-Interaction-A-Rhetorical-and-Ethical\/McKee-Porter\/p\/book\/9780367888398","edition":"1"},{"key":"e_1_3_2_1_90_1","unstructured":"Anna McNay. 2015. The Body as Language: Women and Performance. https:\/\/www.studiointernational.com\/the-body-as-language-women-and-performance-review-richard-saltoun"},{"key":"e_1_3_2_1_91_1","unstructured":"Edwin\u00a0F. McPherson. 2003. Voice Misappropriation In California - Bette Midler Tom Waits and Grandma Burger.https:\/\/mcpherson-llp.com\/articles\/voice-misappropriation-in-california-bette-midler-tom-waits-and-grandma-burger\/"},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijinfomgt.2022.102545"},{"key":"e_1_3_2_1_93_1","doi-asserted-by":"publisher","DOI":"10.5250\/quiparle.21.1.0107"},{"key":"e_1_3_2_1_94_1","unstructured":"Keyaan Minhas. 2023. The-Rise-of-Voice-Assistants:-Changing-the-Way-We-Interact-with-Technology. https:\/\/medium.com\/@keyaanminhas\/the-rise-of-voice-assistants-changing-the-way-we-interact-with-technology-d613a1063929"},{"key":"e_1_3_2_1_95_1","unstructured":"Mozilla. 2017. Mozilla Common Voice. https:\/\/commonvoice.mozilla.org\/"},{"key":"e_1_3_2_1_96_1","unstructured":"Madhumita Murgia and Anna Nicolaou. 2023. Google and Universal Music negotiate deal over AI \u2018deepfakes\u2019. https:\/\/www.ft.com\/content\/6f022306-2f83-4da7-8066-51386e8fe63b"},{"key":"e_1_3_2_1_97_1","unstructured":"MUTEK. [n. d.]. YONA featuring Ash Koosha. https:\/\/mutek.org\/en\/artists\/yona-featuring-ash-koosha"},{"key":"e_1_3_2_1_98_1","doi-asserted-by":"crossref","unstructured":"A. Nagrani J.\u00a0S. Chung and A. Zisserman. 2017. VoxCeleb: a large-scale speaker identification dataset. In INTERSPEECH.","DOI":"10.21437\/Interspeech.2017-950"},{"key":"e_1_3_2_1_99_1","doi-asserted-by":"publisher","DOI":"10.1145\/2465780.2465783"},{"key":"e_1_3_2_1_100_1","volume-title":"Midler v","author":"Appeals S\u00a0Court","year":"1988","unstructured":"US\u00a0Court of Appeals. 1988. Midler v. Ford Motor Co., 849 F.2d 460 (9th Cir. 1988). https:\/\/law.justia.com\/cases\/federal\/appellate-courts\/F2\/849\/460\/37485\/"},{"key":"e_1_3_2_1_101_1","volume-title":"The Body in Sound, Music and Performance: Studies in Audio and Sonic Arts","author":"O\u2019Keefe Linda","year":"1944","unstructured":"Linda O\u2019Keefe and Nogueira. 2022. The Body in Sound, Music and Performance: Studies in Audio and Sonic Arts (1st ed.). Focal Press. https:\/\/www.routledge.com\/The-Body-in-Sound-Music-and-Performance-Studies-in-Audio-and-Sonic-Arts\/O-Keeffe-Nogueira\/p\/book\/9780367441944","edition":"1"},{"key":"e_1_3_2_1_102_1","unstructured":"Arlen Olsen. 2023. Voice Cloning Technology and its Legal Implications: An IP Law Perspective - Schmeiser Olsen & Watts LLP. https:\/\/iplawusa.com\/voice-cloning-technology-and-its-legal-implications-an-ip-law-perspective\/"},{"key":"e_1_3_2_1_103_1","unstructured":"Aaron van\u00a0den Oord Sander Dieleman Heiga Zen Karen Simonyan Oriol Vinyals Alex Graves Nal Kalchbrenner Andrew Senior and Koray Kavukcuoglu. 2016. WaveNet: A Generative Model for Raw Audio. http:\/\/arxiv.org\/abs\/1609.03499 arXiv:1609.03499 [cs]."},{"key":"e_1_3_2_1_104_1","unstructured":"Vassil Panayotov Guoguo Chen Daniel Povey and Sanjeev Khudanpur. 2015. LibriSpeech ASR. http:\/\/www.openslr.org\/12"},{"key":"e_1_3_2_1_105_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-90-481-2816-7_2"},{"key":"e_1_3_2_1_106_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"e_1_3_2_1_107_1","volume-title":"The Guardian (Aug","author":"Parkinson Hannah\u00a0Jane","year":"2015","unstructured":"Hannah\u00a0Jane Parkinson. 2015. Hey, Siri! Meet the real people behind Apple\u2019s voice-activated assistant. The Guardian (Aug. 2015). https:\/\/www.theguardian.com\/technology\/2015\/aug\/12\/siri-real-voices-apple-ios-assistant-jon-briggs-susan-bennett-karen-jacobsen"},{"key":"e_1_3_2_1_108_1","doi-asserted-by":"publisher","DOI":"10.1145\/2948910.2948914"},{"key":"e_1_3_2_1_109_1","doi-asserted-by":"publisher","DOI":"10.1145\/2948910.2948920"},{"key":"e_1_3_2_1_110_1","volume-title":"The Voice in the Machine: Building Computers That Understand Speech","author":"Pieraccini Roberto","unstructured":"Roberto Pieraccini. 2012. The Voice in the Machine: Building Computers That Understand Speech. MIT Press. Google-Books-ID: 3NjxCwAAQBAJ."},{"key":"e_1_3_2_1_111_1","unstructured":"Carlos Pinheiro. 2023. Voice Cloning Technology: The Benefits Risks and Ethical Considerations. https:\/\/medium.com\/@ocarlospinheiro\/voice-cloning-technology-the-benefits-risks-and-ethical-considerations-2e1f737a4722"},{"key":"e_1_3_2_1_112_1","doi-asserted-by":"publisher","DOI":"10.1002\/mar.21457"},{"key":"e_1_3_2_1_113_1","unstructured":"Rhett Power. [n. d.]. No Black Boxes: Keep Humans Involved In Artificial Intelligence. https:\/\/www.forbes.com\/sites\/rhettpower\/2023\/01\/15\/no-black-boxes-keep-humans-involved-in-artificial-intelligence\/ Section: Entrepreneurs."},{"key":"e_1_3_2_1_114_1","doi-asserted-by":"publisher","DOI":"10.5555\/3489212.3489235"},{"key":"e_1_3_2_1_115_1","doi-asserted-by":"publisher","unstructured":"Ryan Prenger Rafael Valle and Bryan Catanzaro. 2018. WaveGlow: A Flow-based Generative Network for Speech Synthesis. https:\/\/doi.org\/10.48550\/arXiv.1811.00002 arXiv:1811.00002 [cs eess stat].","DOI":"10.48550\/arXiv.1811.00002"},{"key":"e_1_3_2_1_116_1","volume-title":"Proceedings of the First MiniCon Conference. 3. https:\/\/ismir2023program.ismir.net\/lbd_354","author":"McBride M.","year":"2023","unstructured":"Polina* Proutskova, John\u00a0M. McBride, Yuto Ozaki, Gakuto Chiba, Yukun Li, Yu Zhaoxin, Wei Yue, Miranda Crowdus, Gabriel Zuckerberg, Olga Velichkina, Yulia Nikolaenko, Yannick Wey, Lawrence Shuster, Patrick\u00a0E. Savage, Elizabeth Phillips, and Andrew Killick. 2023. The VocalNotes Dataset. In Proceedings of the First MiniCon Conference. 3. https:\/\/ismir2023program.ismir.net\/lbd_354.html Conference Name: Ismir 2023 Hybrid Conference."},{"key":"e_1_3_2_1_117_1","unstructured":"Polina* Proutskova John\u00a0M. McBride Yuto Ozaki Gakuto Chiba Yukun Li Yu Zhaoxin Wei Yue Miranda Crowdus Gabriel Zuckerberg Olga Velichkina Yulia Nikolaenko Yannick Wey Lawrence Shuster Patrick\u00a0E. Savage Elizabeth Phillips and Andrew Killick. 2023. VocalNotes Dataset Access Form. https:\/\/docs.google.com\/forms\/d\/e\/1FAIpQLSfWn7fh2pTUnrpwlURzwyCxrxeWDpdTQIq7unLKVE1td_KKsg\/viewform"},{"key":"e_1_3_2_1_118_1","volume-title":"AUTOVC: Zero-Shot Voice Style Transfer with Only Autoencoder Loss","author":"Qian Kaizhi","year":"2019","unstructured":"Kaizhi Qian, Yang Zhang, Shiyu Chang, Xuesong Yang, and Mark Hasegawa-Johnson. 2019. AUTOVC: Zero-Shot Voice Style Transfer with Only Autoencoder Loss. http:\/\/arxiv.org\/abs\/1905.05879 arXiv:1905.05879 [cs, eess, stat]."},{"key":"e_1_3_2_1_119_1","volume-title":"\u2019I\u2019m the original voice of Siri","author":"Ravitz Jessica","year":"2013","unstructured":"Jessica Ravitz. 2013. \u2019I\u2019m the original voice of Siri\u2019 | CNN Business. https:\/\/www.cnn.com\/2013\/10\/04\/tech\/mobile\/bennett-siri-iphone-voice\/index.html"},{"key":"e_1_3_2_1_120_1","unstructured":"Yi Ren Chenxu Hu Xu Tan Tao Qin Sheng Zhao Zhou Zhao and Tie-Yan Liu. 2022. FastSpeech 2: Fast and High-Quality End-to-End Text to Speech. http:\/\/arxiv.org\/abs\/2006.04558 arXiv:2006.04558 [cs eess]."},{"key":"e_1_3_2_1_121_1","unstructured":"ResembleAI. 2023. ResembleAI: AI Voice Generator with Text to Speech and Speech to Speech. https:\/\/www.resemble.ai\/"},{"key":"e_1_3_2_1_122_1","volume-title":"Virtual singer Yona joins Ash Koosha live at Rewire","year":"2019","unstructured":"rewire. 2019. Virtual singer Yona joins Ash Koosha live at Rewire 2019. https:\/\/www.rewirefestival.nl\/artist\/https:\/www.rewirefestival.nl\/artist\/yona"},{"key":"e_1_3_2_1_123_1","volume-title":"\u00a0C. Verbeek","author":"Rosenberger Robert","year":"2015","unstructured":"Robert Rosenberger and Peter P. C.\u00a0C. Verbeek. 2015. A field guide to postphenomenology. Postphenomenological Investigations: Essays on Human-Technology Relations (2015), 9\u201341. https:\/\/research.utwente.nl\/en\/publications\/a-field-guide-to-postphenomenology Publisher: Lexington Books."},{"key":"e_1_3_2_1_124_1","volume-title":"The right of publicity: privacy reimagined for a public world","author":"Rothman E.","unstructured":"Jennifer\u00a0E. Rothman. 2018. The right of publicity: privacy reimagined for a public world. Harvard University Press, Cambridge, Massachusetts."},{"key":"e_1_3_2_1_125_1","unstructured":"Legacy Russell. 2020. Glitch Feminism. Verso. https:\/\/www.penguinrandomhouse.com\/books\/646946\/glitch-feminism-by-legacy-russell\/"},{"key":"e_1_3_2_1_126_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2005.1415612"},{"key":"e_1_3_2_1_127_1","unstructured":"Vlad Savov. 2011. British voice of Siri only found out about it when he heard himself on TV. https:\/\/www.theverge.com\/2011\/11\/10\/2551519\/british-voice-of-siri-only-found-out-about-it-when-he-heard-himself"},{"key":"e_1_3_2_1_128_1","volume-title":"Pierre Sc...\u00c9ditions du Seuil.","author":"Schaeffer Pierre","year":"2020","unstructured":"Pierre Schaeffer. 1966. Trait\u00e9 des objets musicaux, Pierre Sc...\u00c9ditions du Seuil., Paris, France. https:\/\/www.seuil.com\/ouvrage\/traite-des-objets-musicaux-pierre-schaeffer\/9782020026086"},{"key":"e_1_3_2_1_129_1","volume-title":"Robot Voices in Daily Life: Vocal Human-Likeness and Application Context as Determinants of User Acceptance. Frontiers in Psychology 13","author":"Schreibelmayr Simon","year":"2022","unstructured":"Simon Schreibelmayr and Martina Mara. 2022. Robot Voices in Daily Life: Vocal Human-Likeness and Application Context as Determinants of User Acceptance. Frontiers in Psychology 13 (2022). https:\/\/www.frontiersin.org\/articles\/10.3389\/fpsyg.2022.787499"},{"key":"e_1_3_2_1_130_1","unstructured":"Hardik Shah. 2023. Exploring the Pros and Cons of AI Voice Cloning. https:\/\/medium.com\/@shahhardik2905\/exploring-the-pros-and-cons-of-ai-voice-cloning-f4bb15514284"},{"key":"e_1_3_2_1_131_1","volume-title":"The Primacy of Movement","author":"Sheets-Johnstone Maxine","unstructured":"Maxine Sheets-Johnstone. 2011. The Primacy of Movement. John Benjamins Publishing. Google-Books-ID: 2EDgXzWMfuwC."},{"key":"e_1_3_2_1_132_1","volume-title":"The Corporeal Turn: An Interdisciplinary Reader. Andrews UK Limited","author":"Sheets-Johnstone Maxine","unstructured":"Maxine Sheets-Johnstone. 2015. The Corporeal Turn: An Interdisciplinary Reader. Andrews UK Limited. Google-Books-ID: RXPZCgAAQBAJ."},{"key":"e_1_3_2_1_133_1","doi-asserted-by":"crossref","unstructured":"Jonathan Shen Ruoming Pang Ron\u00a0J. Weiss Mike Schuster Navdeep Jaitly Zongheng Yang Zhifeng Chen Yu Zhang Yuxuan Wang RJ Skerry-Ryan Rif\u00a0A. Saurous Yannis Agiomyrgiannakis and Yonghui Wu. 2018. Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions. arxiv:1712.05884\u00a0[cs.CL]","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"e_1_3_2_1_134_1","volume-title":"Proc. of the 1st International Symposion on Companion Technology (ISCT","author":"Ohnemus Siegert","year":"2015","unstructured":"Siegert and Ohnemus. 2015. A new Dataset of Telephone-Based Human-Human Call-Center Interaction with Emotional Evaluation. In Proc. of the 1st International Symposion on Companion Technology (ISCT 2015). Ulm, Germany, 143\u2013148."},{"key":"e_1_3_2_1_135_1","volume-title":"The Uncanny Valley Nobody\u2019s Talking About: Eerie Robot Voices. Wired","author":"Simon Matt","year":"2019","unstructured":"Matt Simon. 2019. The Uncanny Valley Nobody\u2019s Talking About: Eerie Robot Voices. Wired (2019). https:\/\/www.wired.com\/story\/uncanny-valley-robot-voices\/ Section: tags."},{"key":"e_1_3_2_1_136_1","doi-asserted-by":"publisher","DOI":"10.1017\/S1355771897009059"},{"key":"e_1_3_2_1_137_1","doi-asserted-by":"publisher","DOI":"10.5278\/ojs.jos.v5i2.3344"},{"key":"e_1_3_2_1_138_1","unstructured":"soerenab. 2024. AudioMNIST. https:\/\/github.com\/soerenab\/AudioMNIST original-date: 2018-06-29T16:31:21Z."},{"key":"e_1_3_2_1_139_1","volume-title":"Interactions between voice-activated AI assistants and human speakers and their implications for second-language acquisition. Frontiers in Communication 7","author":"Song Jae\u00a0Yung","year":"2022","unstructured":"Jae\u00a0Yung Song, Anne Pycha, and Tessa Culleton. 2022. Interactions between voice-activated AI assistants and human speakers and their implications for second-language acquisition. Frontiers in Communication 7 (2022). https:\/\/www.frontiersin.org\/articles\/10.3389\/fcomm.2022.995475"},{"key":"e_1_3_2_1_140_1","unstructured":"SpeechBrain. [n. d.]. SpeechBrain: A PyTorch Speech Toolkit. https:\/\/speechbrain.github.io\/"},{"key":"e_1_3_2_1_141_1","unstructured":"SpeechColab. 2024. GigaSpeech. https:\/\/github.com\/SpeechColab\/GigaSpeech original-date: 2021-03-03T06:36:25Z."},{"key":"e_1_3_2_1_142_1","volume-title":"Science of the Singing Voice","author":"Sundberg Johan","unstructured":"Johan Sundberg. 1989. Science of the Singing Voice. Northern Illinois University Press, Dekalb, Ill."},{"key":"e_1_3_2_1_143_1","unstructured":"Eric\u00a0E. Surbano. 2023. VAVA Thai pop\u2019s first AI artist has dropped her first single. https:\/\/www.lifestyleasia.com\/bk\/tech\/vava-ai-artist\/"},{"key":"e_1_3_2_1_144_1","volume-title":"The Stanford Encyclopedia of Philosophy (Spring 2021 ed.), Edward\u00a0N","author":"Thomson-Jones Katherine","unstructured":"Katherine Thomson-Jones and Shelby Moser. 2021. The Philosophy of Digital Art. In The Stanford Encyclopedia of Philosophy (Spring 2021 ed.), Edward\u00a0N. Zalta (Ed.). Metaphysics Research Lab, Stanford University, N\/A."},{"key":"e_1_3_2_1_145_1","volume-title":"China\u2019s court hears nation\u2019s first AI voice rights case. The Straits Times (Dec","author":"Times The\u00a0Strait","year":"2023","unstructured":"The\u00a0Strait Times. 2023. China\u2019s court hears nation\u2019s first AI voice rights case. The Straits Times (Dec. 2023). https:\/\/www.straitstimes.com\/asia\/east-asia\/china-s-court-hears-nation-s-first-ai-voice-rights-case"},{"key":"e_1_3_2_1_146_1","doi-asserted-by":"crossref","unstructured":"No\u00e9 Tits Kevin\u00a0El Haddad and Thierry Dutoit. 2019. Exploring Transfer Learning for Low Resource Emotional TTS. http:\/\/arxiv.org\/abs\/1901.04276 arXiv:1901.04276 [cs eess].","DOI":"10.1007\/978-3-030-29516-5_5"},{"key":"e_1_3_2_1_147_1","volume-title":"Laughter Synthesis: Combining Seq2seq modeling with Transfer Learning","author":"Tits No\u00e9","year":"2020","unstructured":"No\u00e9 Tits, Kevin\u00a0El Haddad, and Thierry Dutoit. 2020. Laughter Synthesis: Combining Seq2seq modeling with Transfer Learning. http:\/\/arxiv.org\/abs\/2008.09483 arXiv:2008.09483 [cs, eess]."},{"key":"e_1_3_2_1_148_1","doi-asserted-by":"publisher","DOI":"10.1080\/09298215.2011.614951"},{"key":"e_1_3_2_1_149_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11097-008-9099-x"},{"key":"e_1_3_2_1_150_1","volume-title":"They thought loved ones were calling for help. It was an AI scam.Washington Post (March","author":"Verma Pranshu","year":"2023","unstructured":"Pranshu Verma. 2023. They thought loved ones were calling for help. It was an AI scam.Washington Post (March 2023). https:\/\/www.washingtonpost.com\/technology\/2023\/03\/05\/ai-voice-scam\/"},{"key":"e_1_3_2_1_151_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.825882"},{"key":"e_1_3_2_1_152_1","doi-asserted-by":"publisher","DOI":"10.1080\/09298210500124208"},{"key":"e_1_3_2_1_153_1","doi-asserted-by":"publisher","DOI":"10.4236\/adr.2021.93019"},{"key":"e_1_3_2_1_154_1","volume-title":"Tacotron: Towards End-to-End Speech Synthesis","author":"Wang Yuxuan","year":"2017","unstructured":"Yuxuan Wang, R.\u00a0J. Skerry-Ryan, Daisy Stanton, Yonghui Wu, Ron\u00a0J. Weiss, Navdeep Jaitly, Zongheng Yang, Ying Xiao, Zhifeng Chen, Samy Bengio, Quoc Le, Yannis Agiomyrgiannakis, Rob Clark, and Rif\u00a0A. Saurous. 2017. Tacotron: Towards End-to-End Speech Synthesis. http:\/\/arxiv.org\/abs\/1703.10135 arXiv:1703.10135 [cs]."},{"key":"e_1_3_2_1_155_1","volume-title":"Speech Commands: A Dataset for Limited-Vocabulary Speech Recognition","author":"Warden Pete","year":"2018","unstructured":"Pete Warden. 2018. Speech Commands: A Dataset for Limited-Vocabulary Speech Recognition. http:\/\/arxiv.org\/abs\/1804.03209 arXiv:1804.03209 [cs]."},{"key":"e_1_3_2_1_156_1","unstructured":"Matt Warman. 2011. The voice behind Siri breaks his silence. https:\/\/www.telegraph.co.uk\/technology\/apple\/8879705\/The-voice-behind-Siri-breaks-his-silence.html"},{"key":"e_1_3_2_1_157_1","volume-title":"Hollywood Actors Strike Ends With a Deal That Will Impact AI and Streaming for Decades. Wired","author":"Watercutter Angela","year":"2023","unstructured":"Angela Watercutter. 2023. Hollywood Actors Strike Ends With a Deal That Will Impact AI and Streaming for Decades. Wired (2023). https:\/\/www.wired.com\/story\/hollywood-actors-strike-ends-ai-streaming\/ Section: tags."},{"key":"e_1_3_2_1_158_1","unstructured":"Oskar\u00a0M. Wiklund. 2023. Unveiling the Future: The Power of Voice in AI Interactions. https:\/\/www.multiply.co\/multiply-blog\/unveiling-the-future-the-power-of-voice-in-ai-interactions"},{"key":"e_1_3_2_1_159_1","unstructured":"Chloe Xiang. 2022. Scientists Increasingly Can\u2019t Explain How AI Works. https:\/\/www.vice.com\/en\/article\/y3pezm\/scientists-increasingly-cant-explain-how-ai-works"},{"key":"e_1_3_2_1_160_1","volume-title":"NNSVS: A Neural Network-Based Singing Voice Synthesis Toolkit","author":"Yamamoto Ryuichi","year":"2023","unstructured":"Ryuichi Yamamoto, Reo Yoneyama, and Tomoki Toda. 2023. NNSVS: A Neural Network-Based Singing Voice Synthesis Toolkit. http:\/\/arxiv.org\/abs\/2210.15987 arXiv:2210.15987 [cs, eess]."},{"key":"e_1_3_2_1_161_1","unstructured":"Cao Yin. 2023. Chinese court hears nation\u2019s first AI voice rights case. https:\/\/asianews.network\/chinese-court-hears-nations-first-ai-voice-rights-case\/"},{"key":"e_1_3_2_1_162_1","doi-asserted-by":"publisher","DOI":"10.4324\/9781315609164"},{"key":"e_1_3_2_1_163_1","volume-title":"China mulls legality of AI-generated voice used in audiobooks. ZDNET (Dec","author":"Eileen Yu.","year":"2023","unstructured":"Eileen Yu. 2023. China mulls legality of AI-generated voice used in audiobooks. ZDNET (Dec. 2023). https:\/\/www.zdnet.com\/article\/china-mulls-legality-of-ai-generated-voice-used-in-audiobooks\/"},{"key":"e_1_3_2_1_164_1","doi-asserted-by":"crossref","unstructured":"Heiga Zen Viet Dang Rob Clark Yu Zhang Ron\u00a0J. Weiss Ye Jia Zhifeng Chen and Yonghui Wu. 2019. LibriTTS: A Corpus Derived from LibriSpeech for Text-to-Speech. arxiv:1904.02882\u00a0[cs.SD]","DOI":"10.21437\/Interspeech.2019-2441"},{"key":"e_1_3_2_1_165_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2892235"},{"key":"e_1_3_2_1_166_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3066047"},{"key":"e_1_3_2_1_167_1","doi-asserted-by":"publisher","unstructured":"Shikun Zhang Omid Jafari and Parth Nagarkar. 2021. A Survey on Machine Learning Techniques for Auto Labeling of Video Audio and Text Data. https:\/\/doi.org\/10.48550\/arXiv.2109.03784 arXiv:2109.03784 [cs].","DOI":"10.48550\/arXiv.2109.03784"},{"key":"e_1_3_2_1_168_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.4964509"}],"event":{"name":"MOCO '24: 9th International Conference on Movement and Computing","location":"Utrecht Netherlands","acronym":"MOCO '24"},"container-title":["Proceedings of the 9th International Conference on Movement and Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3658852.3659065","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3658852.3659065","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T23:26:01Z","timestamp":1755905161000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3658852.3659065"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,30]]},"references-count":168,"alternative-id":["10.1145\/3658852.3659065","10.1145\/3658852"],"URL":"https:\/\/doi.org\/10.1145\/3658852.3659065","relation":{},"subject":[],"published":{"date-parts":[[2024,5,30]]},"assertion":[{"value":"2024-06-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}