{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:55:23Z","timestamp":1776887723983,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,18]],"date-time":"2024-03-18T00:00:00Z","timestamp":1710720000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62102354, 62032021, 62172359, 62372406, 62202099, 62372402"],"award-info":[{"award-number":["62102354, 62032021, 62172359, 62372406, 62202099, 62372402"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Zhejiang Provincial Natural Science Foundation","award":["LY24F020007"],"award-info":[{"award-number":["LY24F020007"]}]},{"name":"Hangzhou Leading Innovation and Entrepreneurship Team","award":["TD2020003"],"award-info":[{"award-number":["TD2020003"]}]},{"name":"Jiangsu Provincial Natural Science Foundation of China","award":["BK20220806"],"award-info":[{"award-number":["BK20220806"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,18]]},"DOI":"10.1145\/3640543.3645146","type":"proceedings-article","created":{"date-parts":[[2024,4,5]],"date-time":"2024-04-05T18:23:12Z","timestamp":1712341392000},"page":"35-50","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Conan's Bow Tie: A Streaming Voice Conversion for Real-Time VTuber Livestreaming"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3495-4369","authenticated-orcid":false,"given":"Qianniu","family":"Chen","sequence":"first","affiliation":[{"name":"The State Key Laboratory of Blockchain and Data Security, Zhejiang University, China and School of Cyber Science and Technology, Zhejiang University, China and College of Computer Science and Technology, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0109-7238","authenticated-orcid":false,"given":"Zhehan","family":"Gu","sequence":"additional","affiliation":[{"name":"Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5230-3749","authenticated-orcid":false,"given":"Li","family":"Lu","sequence":"additional","affiliation":[{"name":"The State Key Laboratory of Blockchain and Data Security, Zhejiang University, China and School of Cyber Science and Technology, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1628-906X","authenticated-orcid":false,"given":"Xiangyu","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Southeast University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0921-8869","authenticated-orcid":false,"given":"Zhongjie","family":"Ba","sequence":"additional","affiliation":[{"name":"Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5240-5200","authenticated-orcid":false,"given":"Feng","family":"Lin","sequence":"additional","affiliation":[{"name":"Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7981-9873","authenticated-orcid":false,"given":"Zhenguang","family":"Liu","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3441-6277","authenticated-orcid":false,"given":"Kui","family":"Ren","sequence":"additional","affiliation":[{"name":"School of Cyber Science and Technology, Zhejiang University, China"}]}],"member":"320","published-online":{"date-parts":[[2024,4,5]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2019-17"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3332167.3357106"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341162.3346274"},{"key":"e_1_3_2_1_4_1","first-page":"1","article-title":"Firefox Voice: An Open and Extensible Voice Assistant Built Upon the Web. In Proceedings of ACM CHI, Yoshifumi Kitamura, Aaron Quigley, Katherine Isbister, Takeo Igarashi, Pernille Bj\u00f8rn, and Steven\u00a0Mark Drucker (Eds.). Virtual Event \/ Yokohama","volume":"250","author":"Cambre Julia","year":"2021","unstructured":"Julia Cambre, Alex\u00a0C. Williams, Afsaneh Razi, Ian Bicking, Abraham Wallin, Janice\u00a0Y. Tsai, Chinmay Kulkarni, and Jofish Kaye. 2021. Firefox Voice: An Open and Extensible Voice Assistant Built Upon the Web. In Proceedings of ACM CHI, Yoshifumi Kitamura, Aaron Quigley, Katherine Isbister, Takeo Igarashi, Pernille Bj\u00f8rn, and Steven\u00a0Mark Drucker (Eds.). Virtual Event \/ Yokohama, Japan, 250:1\u2013250:18.","journal-title":"Japan"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-022-13363-4"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2353991"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3596266"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3560905.3568518"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00916"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of ACM IUI","author":"Long\u00a0Hin Chong Toby","year":"2023","unstructured":"Toby Long\u00a0Hin Chong, Hijung\u00a0Valentina Shin, Deepali Aneja, and Takeo Igarashi. 2023. SoundToons: Exemplar-Based Authoring of Interactive Audio-Driven Animation Sprites. In Proceedings of ACM IUI. Sydney, NSW, Australia, 710\u2013722."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581281"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2009.4960478"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2650"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.3390\/make1010031"},{"key":"e_1_3_2_1_15_1","volume-title":"Voice Games: Investigation Into the Use of Non-speech Voice Input for Making Computer Games More Accessible. In Proceedings of INTERACT(Lecture Notes in Computer Science, Vol.\u00a06946)","author":"Harada Susumu","year":"2011","unstructured":"Susumu Harada, Jacob\u00a0O. Wobbrock, and James\u00a0A. Landay. 2011. Voice Games: Investigation Into the Use of Non-speech Voice Input for Making Computer Games More Accessible. In Proceedings of INTERACT(Lecture Notes in Computer Science, Vol.\u00a06946). Springer, Lisbon, Portugal, 11\u201329."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682336"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2165944"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581641.3584083"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639535"},{"key":"e_1_3_2_1_20_1","volume-title":"FastS2S-VC: Streaming Non-Autoregressive Sequence-to-Sequence Voice Conversion. CoRR abs\/2104.06900","author":"Kameoka Hirokazu","year":"2021","unstructured":"Hirokazu Kameoka, Kou Tanaka, and Takuhiro Kaneko. 2021. FastS2S-VC: Streaming Non-Autoregressive Sequence-to-Sequence Voice Conversion. CoRR abs\/2104.06900 (2021). arXiv:2104.06900"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3001456"},{"key":"e_1_3_2_1_22_1","volume-title":"CycleGAN-VC: Non-parallel Voice Conversion Using Cycle-Consistent Adversarial Networks","author":"Kaneko Takuhiro","unstructured":"Takuhiro Kaneko and Hirokazu Kameoka. 2018. CycleGAN-VC: Non-parallel Voice Conversion Using Cycle-Consistent Adversarial Networks. In Proceeding of IEEE EUSIPCO. Roma, Italy, 2100\u20132104."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682897"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2280"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3563657.3595970"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-77626-8_20"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACRIM.1993.407206"},{"key":"e_1_3_2_1_28_1","volume-title":"Joint Detection and Classification of Singing Voice Melody Using Convolutional Recurrent Neural Networks. Applied Sciences 9, 7","author":"Kum Sangeun","year":"2019","unstructured":"Sangeun Kum and Juhan Nam. 2019. Joint Detection and Classification of Singing Voice Melody Using Convolutional Recurrent Neural Networks. Applied Sciences 9, 7 (2019)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413699"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853897"},{"key":"e_1_3_2_1_31_1","volume-title":"Our World in Data","author":"Mathieu Edouard","year":"2020","unstructured":"Edouard Mathieu, Hannah Ritchie, Lucas Rod\u00e9s-Guirao, Cameron Appel, Charlie Giattino, Joe Hasell, Bobbie Macdonald, Saloni Dattani, Diana Beltekian, Esteban Ortiz-Ospina, and Max Roser. 2020. Coronavirus Pandemic (COVID-19). Our World in Data (2020). https:\/\/ourworldindata.org\/coronavirus."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1053"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2014.7078543"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1587\/transinf.2015EDP7457"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2013-102"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-447"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747020"},{"key":"e_1_3_2_1_38_1","unstructured":"PLAYBOARD. 2022. Most Super Chatted. https:\/\/playboard.co\/en\/youtube-ranking\/most-superchatted-all-channels-in-worldwide-yearend."},{"key":"e_1_3_2_1_39_1","volume-title":"Proceedings of ICLR. OpenReview.net, Virtual Event.","author":"Popov Vadim","year":"2022","unstructured":"Vadim Popov, Ivan Vovk, Vladimir Gogoryan, Tasnima Sadekova, Mikhail\u00a0Sergeevich Kudinov, and Jiansheng Wei. 2022. Diffusion-Based Voice Conversion with Fast Maximum Likelihood Sampling Scheme. In Proceedings of ICLR. OpenReview.net, Virtual Event."},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings of ICML(Proceedings of Machine Learning Research, Vol.\u00a097)","author":"Qian Kaizhi","year":"2019","unstructured":"Kaizhi Qian, Yang Zhang, Shiyu Chang, Xuesong Yang, and Mark Hasegawa-Johnson. 2019. AutoVC: Zero-Shot Voice Style Transfer with Only Autoencoder Loss. In Proceedings of ICML(Proceedings of Machine Learning Research, Vol.\u00a097). PMLR, Long Beach, California, USA, 5210\u20135219."},{"key":"e_1_3_2_1_41_1","volume-title":"Proceedings of ISCA INTERSPEECH. ISCA, Virtual Event","author":"Saeki Takaaki","year":"2020","unstructured":"Takaaki Saeki, Yuki Saito, Shinnosuke Takamichi, and Hiroshi Saruwatari. 2020. Real-Time, Full-Band, Online DNN-Based Voice Conversion System Using a Single CPU. In Proceedings of ISCA INTERSPEECH. ISCA, Virtual Event, Shanghai, China, 1021\u20131022."},{"key":"e_1_3_2_1_42_1","first-page":"1","article-title":"Can Voice Assistants Sound Cute? Towards a Model of Kawaii Vocalics. In Proceedings of ACM CHI EA. Hamburg","volume":"63","author":"Seaborn Katie","year":"2023","unstructured":"Katie Seaborn, Somang Nam, Julia Keckeis, and Tatsuya Itagaki. 2023. Can Voice Assistants Sound Cute? Towards a Model of Kawaii Vocalics. In Proceedings of ACM CHI EA. Hamburg, Germany, 63:1\u201363:7.","journal-title":"Germany"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3038524"},{"key":"e_1_3_2_1_44_1","unstructured":"SpeechBrain. 2021. Transformer for LibriSpeech (with Transformer LM). https:\/\/huggingface.co\/speechbrain\/asr-transformer-transformerlm-librispeech."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/89.661472"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178896"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683282"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3369811"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543873.3584655"},{"key":"e_1_3_2_1_50_1","unstructured":"Virtual\u00a0YouTuber Wiki. 2023. Kizuna AI. https:\/\/virtualyoutuber.fandom.com\/wiki\/Kizuna_Ai."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-448"},{"key":"e_1_3_2_1_52_1","volume-title":"Proceedings of the International Conference on Language Resources and Evaluation (LREC)","author":"Yamagishi Junichi","year":"2019","unstructured":"Junichi Yamagishi, Christophe Veaux, and Kirsten MacDonald. 2019. CSTR VCTK Corpus: English Multi-speaker Corpus for CSTR Voice Cloning Toolkit. Proceedings of the International Conference on Language Resources and Evaluation (LREC) (2019)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682380"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2892235"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3092555"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"}],"event":{"name":"IUI '24: 29th International Conference on Intelligent User Interfaces","location":"Greenville SC USA","acronym":"IUI '24","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 29th International Conference on Intelligent User Interfaces"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640543.3645146","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3640543.3645146","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:56:32Z","timestamp":1764550592000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640543.3645146"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,18]]},"references-count":56,"alternative-id":["10.1145\/3640543.3645146","10.1145\/3640543"],"URL":"https:\/\/doi.org\/10.1145\/3640543.3645146","relation":{},"subject":[],"published":{"date-parts":[[2024,3,18]]},"assertion":[{"value":"2024-04-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}