{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,15]],"date-time":"2025-10-15T18:13:43Z","timestamp":1760552023066,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,9]],"date-time":"2023-10-09T00:00:00Z","timestamp":1696809600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"This work was supported by the Development of cognitive\/response advancement technology for AI avatar commercialization project funded by the Brand Engagement Network(BEN)","award":["Q2312881"],"award-info":[{"award-number":["Q2312881"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,9]]},"DOI":"10.1145\/3610661.3616551","type":"proceedings-article","created":{"date-parts":[[2023,10,9]],"date-time":"2023-10-09T16:51:22Z","timestamp":1696870282000},"page":"220-227","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["The KU-ISPL entry to the GENEA Challenge 2023-A Diffusion Model for Co-speech Gesture generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3239-1865","authenticated-orcid":false,"given":"Gwantae","family":"Kim","sequence":"first","affiliation":[{"name":"Korea University, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2560-9261","authenticated-orcid":false,"given":"Yuanming","family":"Li","sequence":"additional","affiliation":[{"name":"Korea University, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8744-4514","authenticated-orcid":false,"given":"Hanseok","family":"Ko","sequence":"additional","affiliation":[{"name":"Korea University, Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2023,10,9]]},"reference":[{"volume-title":"Computer Graphics Forum, Vol.\u00a039","author":"Alexanderson Simon","key":"e_1_3_2_1_1_1","unstructured":"Simon Alexanderson, Gustav\u00a0Eje Henter, Taras Kucherenko, and Jonas Beskow. 2020. Style-controllable speech-driven gesture synthesis using normalising flows. In Computer Graphics Forum, Vol.\u00a039. Wiley Online Library, 487\u2013496."},{"key":"e_1_3_2_1_2_1","volume-title":"denoise, action! audio-driven motion synthesis with diffusion models. arXiv preprint arXiv:2211.09707","author":"Alexanderson Simon","year":"2022","unstructured":"Simon Alexanderson, Rajmund Nagy, Jonas Beskow, and Gustav\u00a0Eje Henter. 2022. Listen, denoise, action! audio-driven motion synthesis with diffusion models. arXiv preprint arXiv:2211.09707 (2022)."},{"key":"e_1_3_2_1_3_1","volume-title":"GestureDiffuCLIP: Gesture diffusion model with CLIP latents. arXiv preprint arXiv:2303.14613","author":"Ao Tenglong","year":"2023","unstructured":"Tenglong Ao, Zeyi Zhang, and Libin Liu. 2023. GestureDiffuCLIP: Gesture diffusion model with CLIP latents. arXiv preprint arXiv:2303.14613 (2023)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3536221.3558060"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01726"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"e_1_3_2_1_7_1","volume-title":"Diffusion models beat gans on image synthesis. Advances in neural information processing systems 34","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. Advances in neural information processing systems 34 (2021), 8780\u20138794."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1080\/10867651.1998.10487493"},{"key":"e_1_3_2_1_10_1","volume-title":"Conformer: Convolution-augmented transformer for speech recognition. arXiv preprint arXiv:2005.08100","author":"Gulati Anmol","year":"2020","unstructured":"Anmol Gulati, James Qin, Chung-Cheng Chiu, Niki Parmar, Yu Zhang, Jiahui Yu, Wei Han, Shibo Wang, Zhengdong Zhang, Yonghui Wu, 2020. Conformer: Convolution-augmented transformer for speech recognition. arXiv preprint arXiv:2005.08100 (2020)."},{"key":"e_1_3_2_1_11_1","volume-title":"Gaussian error linear units (gelus). arXiv preprint arXiv:1606.08415","author":"Hendrycks Dan","year":"2016","unstructured":"Dan Hendrycks and Kevin Gimpel. 2016. Gaussian error linear units (gelus). arXiv preprint arXiv:1606.08415 (2016)."},{"key":"e_1_3_2_1_12_1","volume-title":"Denoising diffusion probabilistic models. Advances in neural information processing systems 33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems 33 (2020), 6840\u20136851."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.wocn.2018.07.001"},{"key":"e_1_3_2_1_14_1","volume-title":"Spatial-temporal Transformer-guided Diffusion based Data Augmentation for Efficient Skeleton-based Action Recognition. arXiv preprint arXiv:2302.13434","author":"Jiang Yifan","year":"2023","unstructured":"Yifan Jiang, Han Chen, and Hanseok Ko. 2023. Spatial-temporal Transformer-guided Diffusion based Data Augmentation for Efficient Skeleton-based Action Recognition. arXiv preprint arXiv:2302.13434 (2023)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095344"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP46576.2022.9897884"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i7.25996"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308532.3329472"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3382507.3418815"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3577190.3616120"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision. 763\u2013772","author":"Lee Gilwoo","year":"2019","unstructured":"Gilwoo Lee, Zhiwei Deng, Shugao Ma, Takaaki Shiratori, Siddhartha\u00a0S Srinivasa, and Yaser Sheikh. 2019. Talking with hands 16.2 m: A large-scale dataset of synchronized body-finger motion and audio for conversational motion analysis and synthesis. In Proceedings of the IEEE\/CVF International Conference on Computer Vision. 763\u2013772."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01110"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01022"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU51503.2021.9688219"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01021"},{"key":"e_1_3_2_1_26_1","volume-title":"Savitzky\u2013Golay smoothing and differentiation filter for even number data. Signal processing 85, 7","author":"Luo Jianwen","year":"2005","unstructured":"Jianwen Luo, Kui Ying, and Jing Bai. 2005. Savitzky\u2013Golay smoothing and differentiation filter for even number data. Signal processing 85, 7 (2005), 1429\u20131434."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_29_1","volume-title":"Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research 15, 1","author":"Srivastava Nitish","year":"2014","unstructured":"Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, and Ruslan Salakhutdinov. 2014. Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research 15, 1 (2014), 1929\u20131958."},{"key":"e_1_3_2_1_30_1","volume-title":"Human Motion Diffusion Model. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SJ1kSyO2jwu","author":"Tevet Guy","year":"2023","unstructured":"Guy Tevet, Sigal Raab, Brian Gordon, Yoni Shafir, Daniel Cohen-or, and Amit\u00a0Haim Bermano. 2023. Human Motion Diffusion Model. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SJ1kSyO2jwu"},{"key":"e_1_3_2_1_31_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_32_1","volume-title":"Text embeddings by weakly-supervised contrastive pre-training. arXiv preprint arXiv:2212.03533","author":"Wang Liang","year":"2022","unstructured":"Liang Wang, Nan Yang, Xiaolong Huang, Binxing Jiao, Linjun Yang, Daxin Jiang, Rangan Majumder, and Furu Wei. 2022. Text embeddings by weakly-supervised contrastive pre-training. arXiv preprint arXiv:2212.03533 (2022)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3414685.3417838"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793720"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3536221.3558058"},{"key":"e_1_3_2_1_36_1","volume-title":"MotionDiffuse: Text-Driven Human Motion Generation with Diffusion Model. arXiv preprint arXiv:2208.15001","author":"Zhang Mingyuan","year":"2022","unstructured":"Mingyuan Zhang, Zhongang Cai, Liang Pan, Fangzhou Hong, Xinying Guo, Lei Yang, and Ziwei Liu. 2022. MotionDiffuse: Text-Driven Human Motion Generation with Diffusion Model. arXiv preprint arXiv:2208.15001 (2022)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3536221.3558063"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01016"}],"event":{"name":"ICMI '23: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"location":"Paris France","acronym":"ICMI '23"},"container-title":["International Cconference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610661.3616551","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3610661.3616551","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T19:34:23Z","timestamp":1755891263000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610661.3616551"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,9]]},"references-count":38,"alternative-id":["10.1145\/3610661.3616551","10.1145\/3610661"],"URL":"https:\/\/doi.org\/10.1145\/3610661.3616551","relation":{},"subject":[],"published":{"date-parts":[[2023,10,9]]},"assertion":[{"value":"2023-10-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}