{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T05:25:56Z","timestamp":1755926756299,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,3,27]],"date-time":"2023-03-27T00:00:00Z","timestamp":1679875200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"JST CREST","award":["JPMJCR17A1"],"award-info":[{"award-number":["JPMJCR17A1"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,3,27]]},"DOI":"10.1145\/3581641.3584047","type":"proceedings-article","created":{"date-parts":[[2023,3,27]],"date-time":"2023-03-27T16:16:52Z","timestamp":1679933812000},"page":"710-722","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["SoundToons: Exemplar-Based Authoring of Interactive Audio-Driven Animation Sprites"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5992-5647","authenticated-orcid":false,"given":"Toby","family":"Chong","sequence":"first","affiliation":[{"name":"The University of Tokyo, Japan and The University of Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8798-4580","authenticated-orcid":false,"given":"Hijung Valentina","family":"Shin","sequence":"additional","affiliation":[{"name":"Adobe Research, United States and Adobe Research, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9610-5648","authenticated-orcid":false,"given":"Deepali","family":"Aneja","sequence":"additional","affiliation":[{"name":"Adobe Research, United States and Adobe Research, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5495-6441","authenticated-orcid":false,"given":"Takeo","family":"Igarashi","sequence":"additional","affiliation":[{"name":"The University of Tokyo, Japan and The University of Tokyo, Japan"}]}],"member":"320","published-online":{"date-parts":[[2023,3,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2008. Princess Fairy Tale Maker. https:\/\/www.duckduckmoose.com\/educational-iphone-itouch-apps-for-kids\/princess-fairy-tale-maker"},{"key":"e_1_3_2_1_2_1","unstructured":"2016. The Simpsons."},{"key":"e_1_3_2_1_3_1","volume-title":"Real-Time Lip Sync for Live 2D Animation. CoRR abs\/1910.08685","author":"Aneja Deepali","year":"2019","unstructured":"Deepali Aneja and Wilmot Li. 2019. Real-Time Lip Sync for Live 2D Animation. CoRR abs\/1910.08685 (2019). arxiv:1910.08685http:\/\/arxiv.org\/abs\/1910.08685"},{"key":"e_1_3_2_1_4_1","volume-title":"Real-time lip sync for live 2d animation. arXiv preprint arXiv:1910.08685","author":"Aneja Deepali","year":"2019","unstructured":"Deepali Aneja and Wilmot Li. 2019. Real-time lip sync for live 2d animation. arXiv preprint arXiv:1910.08685 (2019)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1409060.1409077"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1111\/cgf.13128"},{"key":"e_1_3_2_1_7_1","unstructured":"Charlotte Church. 2000. Ave Maria (Dormition Abbey 2000). https:\/\/www.youtube.com\/watch?v=Uch0FlNo3Go"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Daniel Cudeiro Timo Bolkart Cassidy Laidlaw Anurag Ranjan and Michael\u00a0J. Black. 2019. Capture Learning and Synthesis of 3D Speaking Styles. arxiv:1905.03079\u00a0[cs.CV]","DOI":"10.1109\/CVPR.2019.01034"},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of The Eleventh International Society for Music Information Retrieval Conference (ISMIR 2010)","author":"Fiebrink Rebecca","year":"2010","unstructured":"Rebecca Fiebrink and Perry\u00a0R Cook. 2010. The Wekinator: a system for real-time, interactive machine learning in music. In Proceedings of The Eleventh International Society for Music Information Retrieval Conference (ISMIR 2010)(Utrecht), Vol.\u00a03."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2668064.2668096"},{"key":"e_1_3_2_1_11_1","volume-title":"International Conference on Advances in Computer Entertainment. Springer, 153\u2013171","author":"Furukawa Shoichi","year":"2017","unstructured":"Shoichi Furukawa, Tsukasa Fukusato, Shugo Yamaguchi, and Shigeo Morishima. 2017. Voice Animator: Automatic Lip-Synching in Limited Animation by Audio. In International Conference on Advances in Computer Entertainment. Springer, 153\u2013171."},{"key":"e_1_3_2_1_12_1","volume-title":"Proc. of HCI International","author":"Goto Masataka","year":"2001","unstructured":"Masataka Goto, Katunobu Itou, Tomoyosi Akiba, and Satoru Hayamizu. 2001. Speech completion: New speech interface with on-demand completion assistance. In Proc. of HCI International 2001. Citeseer."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1296843.1296850"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/1240624.1240646"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/502348.502372"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2642918.2647375"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3196709.3196736"},{"key":"e_1_3_2_1_18_1","volume-title":"CREPE: A Convolutional Representation for Pitch Estimation. arxiv:1802.06182\u00a0[eess.AS]","author":"Kim Jong\u00a0Wook","year":"2018","unstructured":"Jong\u00a0Wook Kim, Justin Salamon, Peter Li, and Juan\u00a0Pablo Bello. 2018. CREPE: A Convolutional Representation for Pitch Estimation. arxiv:1802.06182\u00a0[eess.AS]"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Sergey Levine Christian Theobalt and Vladlen Koltun. 2009. Real-time prosody-driven synthesis of body language. In ACM SIGGRAPH Asia 2009 papers. 1\u201310.","DOI":"10.1145\/1661412.1618518"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130813"},{"key":"e_1_3_2_1_21_1","unstructured":"mamoworld tools. [n. d.]. BeatEdit for After Effects. https:\/\/aescripts.com\/beatedit-for-after-effects\/"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.3955228"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1185657.1185842"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300852"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/545261.545281"},{"key":"e_1_3_2_1_26_1","volume-title":"First Order Motion Model for Image Animation. In Conference on Neural Information Processing Systems (NeurIPS).","author":"Siarohin Aliaksandr","year":"2019","unstructured":"Aliaksandr Siarohin, St\u00e9phane Lathuili\u00e8re, Sergey Tulyakov, Elisa Ricci, and Nicu Sebe. 2019. First Order Motion Model for Image Animation. In Conference on Neural Information Processing Systems (NeurIPS)."},{"volume-title":"Proc. Computer Vision and Pattern Recognition (CVPR), IEEE.","author":"Thies J.","key":"e_1_3_2_1_27_1","unstructured":"J. Thies, M. Zollh\u00f6fer, M. Stamminger, C. Theobalt, and M. Nie\u00dfner. 2016. Face2Face: Real-time Face Capture and Reenactment of RGB Videos. In Proc. Computer Vision and Pattern Recognition (CVPR), IEEE."},{"key":"e_1_3_2_1_28_1","unstructured":"F. Thomas and O. Johnston. 1981. The Illusion of Life: Disney Animation. Disney Editions. https:\/\/books.google.co.jp\/books?id=k5TMoAEACAAJ"},{"key":"e_1_3_2_1_29_1","unstructured":"Ting-Chun Wang Ming-Yu Liu Jun-Yan Zhu Guilin Liu Andrew Tao Jan Kautz and Bryan Catanzaro. 2018. Video-to-Video Synthesis. In Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054708"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3126594.3126596"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377325.3377505"},{"key":"e_1_3_2_1_33_1","volume-title":"Fast Bi-layer Neural Synthesis of One-Shot Realistic Head Avatars. In European Conference on Computer Vision. Springer, 524\u2013540","author":"Zakharov Egor","year":"2020","unstructured":"Egor Zakharov, Aleksei Ivakhnenko, Aliaksandra Shysheya, and Victor Lempitsky. 2020. Fast Bi-layer Neural Synthesis of One-Shot Realistic Head Avatars. In European Conference on Computer Vision. Springer, 524\u2013540."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3414685.3417774"}],"event":{"name":"IUI '23: 28th International Conference on Intelligent User Interfaces","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"location":"Sydney NSW Australia","acronym":"IUI '23"},"container-title":["Proceedings of the 28th International Conference on Intelligent User Interfaces"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581641.3584047","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581641.3584047","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:20Z","timestamp":1750178180000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581641.3584047"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3,27]]},"references-count":34,"alternative-id":["10.1145\/3581641.3584047","10.1145\/3581641"],"URL":"https:\/\/doi.org\/10.1145\/3581641.3584047","relation":{},"subject":[],"published":{"date-parts":[[2023,3,27]]},"assertion":[{"value":"2023-03-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}