{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:58:00Z","timestamp":1781539080450,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810842","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1842-1850","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["NeRAG: Neuro-Explicit Retrieval-Augmented Generation for Real-Time Interaction in Digital Humans"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1613-0665","authenticated-orcid":false,"given":"Yueqian","family":"Guo","sequence":"first","affiliation":[{"name":"Jiangxi University of Finance and Economics, Nanchang, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8023-4821","authenticated-orcid":false,"given":"Tianzhao","family":"Li","sequence":"additional","affiliation":[{"name":"Communication University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1055-6334","authenticated-orcid":false,"given":"Xin","family":"Lv","sequence":"additional","affiliation":[{"name":"Communication University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8424-8872","authenticated-orcid":false,"given":"Jiehaolin","family":"Chen","sequence":"additional","affiliation":[{"name":"Communication University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4783-6213","authenticated-orcid":false,"given":"Zhaohan","family":"Wang","sequence":"additional","affiliation":[{"name":"Communication University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6969-2189","authenticated-orcid":false,"given":"Yurun","family":"Chen","sequence":"additional","affiliation":[{"name":"Communication University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0969-7151","authenticated-orcid":false,"given":"Sirui","family":"Xiao","sequence":"additional","affiliation":[{"name":"Communication University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7568-5413","authenticated-orcid":false,"given":"Yezi","family":"He","sequence":"additional","affiliation":[{"name":"Communication University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0650-3346","authenticated-orcid":false,"given":"Helin","family":"Li","sequence":"additional","affiliation":[{"name":"Communication University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9534-1777","authenticated-orcid":false,"given":"Fan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Communication University of Zhejiang, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"crossref","unstructured":"Okan Arikan and David\u00a0A Forsyth. 2002. Interactive motion generation from examples. ACM Transactions on Graphics (TOG) 21 3 (2002) 483\u2013490.","DOI":"10.1145\/566654.566606"},{"key":"e_1_3_3_2_3_2","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Bengio Samy","year":"2015","unstructured":"Samy Bengio, Oriol Vinyals, Navdeep Jaitly, and Noam Shazeer. 2015. Scheduled sampling for sequence prediction with recurrent neural networks. In Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_2_4_2","volume-title":"Proc. of GDC","author":"Bollo David","year":"2018","unstructured":"David Bollo. 2018. Inertialization: High-performance animation transitions in\u2019gears of war\u2019. In Proc. of GDC , Vol.\u00a02016."},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00702"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","unstructured":"Kang Chen Zhipeng Tan Jin Lei Song-Hai Zhang Yuan-Chen Guo Weidong Zhang and Shi-Min Hu. 2021. ChoreoMaster: choreography-oriented music-driven dance synthesis. ACM Transactions on Graphics 40 4 (Aug. 2021) 1\u201313. 10.1145\/3450626.3459932","DOI":"10.1145\/3450626.3459932"},{"key":"e_1_3_3_2_7_2","unstructured":"Simon Clavet. 2016. Motion Matching and The Road to Next-Gen Animation. GDC 2016 Presentation (2016). https:\/\/archive.org\/details\/GDC2016Clavet"},{"key":"e_1_3_3_2_8_2","first-page":"390","volume-title":"European Conference on Computer Vision","author":"Dai Wenxun","year":"2024","unstructured":"Wenxun Dai, Ling-Hao Chen, Jingbo Wang, Jinpeng Liu, Bo Dai, and Yansong Tang. 2024. Motionlcm: Real-time controllable motion generation via latent consistency model. In European Conference on Computer Vision. Springer, 390\u2013408."},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"crossref","unstructured":"Ikhsanul Habibie Mohamed Elgharib Christian Theobalt et\u00a0al. 2022. A motion matching-based framework for controllable gesture generation from speech. ACM Transactions on Graphics (TOG) 41 4 (2022) 1\u201315.","DOI":"10.1145\/3528233.3530750"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"crossref","unstructured":"Daniel Holden Oussama Kanoun Maksym Perepichka and Tiberiu Popa. 2020. Learned motion matching. ACM Transactions on Graphics (ToG) 39 4 (2020) 53\u20131.","DOI":"10.1145\/3386569.3392440"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"crossref","unstructured":"Daniel Holden Jun Saito and Taku Komura. 2016. Deep phase manifolds for changing character stylization. ACM Transactions on Graphics (TOG) 35 4 (2016).","DOI":"10.1145\/2897824.2925975"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00133"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00205"},{"key":"e_1_3_3_2_14_2","volume-title":"ACM Transactions on Graphics (TOG)","author":"Kou Qilong","year":"2023","unstructured":"Qilong Kou, Sida Peng, Robinson Yang, Song-Hai Zhang, Gaofeng Xu, Zhipeng Tan, and Xiaowei Zhou. 2023. BodyFormer: Semantics-guided 3D Body Gesture Generation with Transformer. In ACM Transactions on Graphics (TOG)."},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Lucas Kovar Michael Gleicher and Fr\u00e9d\u00e9ric Pighin. 2002. Motion graphs. ACM Transactions on Graphics (TOG) 21 3 (2002) 473\u2013482.","DOI":"10.1145\/566654.566605"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3596711.3596788"},{"key":"e_1_3_3_2_17_2","first-page":"9459","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela. 2020. Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. In Advances in Neural Information Processing Systems (NeurIPS) , Vol.\u00a033. 9459\u20139474."},{"key":"e_1_3_3_2_18_2","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Li Jing","year":"2021","unstructured":"Jing Li, Zongwei Di, YANYI Xu, et\u00a0al. 2021. Audio-driven robot upper body gesture synthesis. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_3_2_19_2","volume-title":"IEEE Conference on Virtual Reality and 3D User Interfaces (VR)","author":"Li Jing","year":"2021","unstructured":"Jing Li, Di Kang, Wenjie Pei, et\u00a0al. 2021. Audio2Gestures: Generating Diverse Gestures from Audio. In IEEE Conference on Virtual Reality and 3D User Interfaces (VR)."},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00115"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20071-7_36"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51701.2025.01296"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.497"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/1476589.1476628"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01545"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.5555\/2821575"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01080"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_28"},{"key":"e_1_3_3_2_29_2","unstructured":"Sinan Sonlu Bennie Bendiksen Funda Durupinar and U\u011fur G\u00fcd\u00fckbay. 2024. The Effects of Embodiment and Personality Expression on Learning in LLM-based Educational Agents. http:\/\/arxiv.org\/abs\/2407.10993 arXiv:https:\/\/arXiv.org\/abs\/2407.10993 [cs]."},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"crossref","unstructured":"Xiangjun Tang He Wang Bo Hu Xu Gong Ruifan Yi Qilong Kou and Xiaogang Jin. 2022. Real-time controllable motion transition for characters. ACM Transactions on Graphics (TOG) 41 4 (2022) 1\u201310.","DOI":"10.1145\/3528223.3530090"},{"key":"e_1_3_3_2_31_2","volume-title":"International Conference on Learning Representations (ICLR)","author":"Tevet Guy","year":"2023","unstructured":"Guy Tevet, Sigal Raab, Brian Gordon, et\u00a0al. 2023. Human motion diffusion model. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613905.3651026"},{"key":"e_1_3_3_2_33_2","volume-title":"The Thirteenth International Conference on Learning Representations","author":"Wu Qi","year":"2025","unstructured":"Qi Wu, Yubo Zhao, Yifan Wang, Xinhang Liu, Yu-Wing Tai, and Chi-Keung Tang. 2025. Motion-Agent: A Conversational Framework for Human Motion Generation with LLMs. In The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=AvOhBgsE5R"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","unstructured":"Lixing Xiao Shunlin Lu Huaijin Pi Ke Fan Liang Pan Yueer Zhou Ziyong Feng Xiaowei Zhou Sida Peng and Jingbo Wang. 2025. MotionStreamer: Streaming Motion Generation via Diffusion-based Autoregressive Model in Causal Latent Space. 10.48550\/arXiv.2503.15451arXiv:https:\/\/arXiv.org\/abs\/2503.15451 [cs] version: 1.","DOI":"10.48550\/arXiv.2503.15451"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51701.2025.01172"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/650"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","unstructured":"Heyuan Yao Zhenhua Song Baoquan Chen and Libin Liu. 2022. ControlVAE: Model-Based Learning of Generative Controllers for Physics-Based Characters. ACM Transactions on Graphics 41 6 (Dec. 2022) 1\u201316. 10.1145\/3550454.3555434arXiv:https:\/\/arXiv.org\/abs\/2210.06063 [cs].","DOI":"10.1145\/3550454.3555434"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"crossref","unstructured":"Youngwoo Yoon Bok Cha Joo-Haeng Lee Minsu Jang Jaeyeon Lee Jaehong Kim and Geehyuk Lee. 2020. Speech gesture generation from the trimodal context of text audio and speaker identity. ACM Transactions on Graphics (TOG) 39 6 (2020) 1\u201316.","DOI":"10.1145\/3414685.3417838"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","unstructured":"Fan Zhang Zhaohan Wang Xin Lyu Siyuan Zhao Mengjian Li Weidong Geng Naye Ji Hui Du Fuxing Gao Hao Wu and Shunman Li. 2024. Speech-Driven Personalized Gesture Synthetics: Harnessing Automatic Fuzzy Feature Inference. IEEE Transactions on Visualization and Computer Graphics 30 10 (Oct. 2024) 6984\u20136996. 10.1109\/TVCG.2024.3393236","DOI":"10.1109\/TVCG.2024.3393236"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"crossref","unstructured":"Mingyuan Zhang Zhongang Cai Liang Pan Fangzhou Hong Xinying Guo Lei Yang and Ziwei Liu. 2024. Motiondiffuse: Text-driven human motion generation with diffusion model. IEEE transactions on pattern analysis and machine intelligence 46 6 (2024) 4115\u20134128.","DOI":"10.1109\/TPAMI.2024.3355414"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00040"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:51:27Z","timestamp":1781538687000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810842"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":40,"alternative-id":["10.1145\/3805622.3810842","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810842","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}