{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T16:09:19Z","timestamp":1777565359402,"version":"3.51.4"},"reference-count":52,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/100020595","name":"NSTC","doi-asserted-by":"publisher","award":["113-2221-E-001-017-MY3"],"award-info":[{"award-number":["113-2221-E-001-017-MY3"]}],"id":[{"id":"10.13039\/100020595","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computers &amp; Graphics"],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1016\/j.cag.2026.104594","type":"journal-article","created":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T06:52:30Z","timestamp":1775544750000},"page":"104594","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"C","title":["Tai-Chi: Text-to-motion generation with locality-aware bipartite body-part motion prior"],"prefix":"10.1016","volume":"136","author":[{"given":"Jian-Kai","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Wen-Li","family":"Wei","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9237-4119","authenticated-orcid":false,"given":"Jen-Chun","family":"Lin","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.cag.2026.104594_b1","doi-asserted-by":"crossref","unstructured":"Zhang Jianrong, Zhang Yangsong, Cun Xiaodong, Huang Shaoli, Zhang Yong, Zhao Hongwei, Lu Hongtao, Shen Xi. T2M-GPT: Generating Human Motion from Textual Descriptions with Discrete Representations. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2023.","DOI":"10.1109\/CVPR52729.2023.01415"},{"key":"10.1016\/j.cag.2026.104594_b2","doi-asserted-by":"crossref","unstructured":"Zhong Chongyang, Hu Lei, Zhang Zihao, Xia Shihong. AttT2M: Text-driven Human Motion Generation with Multi-Perspective Attention Mechanism. In: International conference on computer vision. 2023.","DOI":"10.1109\/ICCV51070.2023.00053"},{"key":"10.1016\/j.cag.2026.104594_b3","doi-asserted-by":"crossref","unstructured":"Guo Chuan, Mu Yuxuan, Javed Muhammad Gohar, Wang Sen, Cheng Li. MoMask: Generative Masked Modeling of 3D Human Motions. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2024.","DOI":"10.1109\/CVPR52733.2024.00186"},{"key":"10.1016\/j.cag.2026.104594_b4","doi-asserted-by":"crossref","unstructured":"Pinyoanuntapong Ekkasit, Wang Pu, Lee Minwoo, Chen Chen. MMM: Generative Masked Motion Model. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2024.","DOI":"10.1109\/CVPR52733.2024.00153"},{"key":"10.1016\/j.cag.2026.104594_b5","unstructured":"Jiang Biao, Chen Xin, Liu Wen, Yu Jingyi, Yu Gang, Chen Tao. MotionGPT: Human motion as a foreign language. In: Proceedings of the international conference on neural information processing systems. 2023."},{"key":"10.1016\/j.cag.2026.104594_b6","doi-asserted-by":"crossref","unstructured":"Zou Qiran, Yuan Shangyuan, Du Shian, Wang Yu, Liu Chang, Xu Yi, Chen Jie, Ji Xiangyang. ParCo: Part-Coordinating Text-to-Motion Synthesis. In: European conference on computer vision. 2024.","DOI":"10.1007\/978-3-031-72992-8_8"},{"key":"10.1016\/j.cag.2026.104594_b7","unstructured":"Oord Aaron van den, Vinyals Oriol, Kavukcuoglu Koray. Neural Discrete Representation Learning. In: Proceedings of the international conference on neural information processing systems. 2017."},{"key":"10.1016\/j.cag.2026.104594_b8","doi-asserted-by":"crossref","unstructured":"Wan Weilin, Dou Zhiyang, Komura Taku, Wang Wenping, Jayaraman Dinesh, Liu Lingjie. TLControl: Trajectory and Language Control for Human Motion Synthesis. In: European conference on computer vision. 2024.","DOI":"10.1007\/978-3-031-72913-3_3"},{"key":"10.1016\/j.cag.2026.104594_b9","doi-asserted-by":"crossref","unstructured":"Wei Wen-Li, Lin Jen-Chun, Liu Tyng-Luh, Liao Hong-Yuan Mark. Capturing Humans in Motion: Temporal-Attentive 3D Human Pose and Shape Estimation from Monocular Video. In: IEEE\/CVF conference on computer vision and pattern recognition. 2022.","DOI":"10.1109\/CVPR52688.2022.01286"},{"issue":"11","key":"10.1016\/j.cag.2026.104594_b10","doi-asserted-by":"crossref","first-page":"7232","DOI":"10.1109\/TPAMI.2024.3388042","article-title":"Bridging actions: Generate 3D poses and shapes in-between photos","volume":"46","author":"Wei","year":"2024","journal-title":"IEEE Trans Pattern Anal Mach Intell (TPAMI)"},{"key":"10.1016\/j.cag.2026.104594_b11","doi-asserted-by":"crossref","unstructured":"Mao Wei, Liu Miaomiao, Salzemann Mathieu, Li Hongdong. Learning Trajectory Dependencies for Human Motion Prediction. In: International conference on computer vision. 2019.","DOI":"10.1109\/ICCV.2019.00958"},{"issue":"01","key":"10.1016\/j.cag.2026.104594_b12","doi-asserted-by":"crossref","first-page":"681","DOI":"10.1109\/TPAMI.2021.3139918","article-title":"Investigating pose representations and motion contexts modeling for 3D motion prediction","volume":"45","author":"Liu","year":"2023","journal-title":"IEEE Trans Pattern Anal Mach Intell (TPAMI)"},{"key":"10.1016\/j.cag.2026.104594_b13","doi-asserted-by":"crossref","unstructured":"Petrovich Mathis, Black Michael J, Varol G\u00fcl. Action-conditioned 3D Human Motion Synthesis with Transformer VAE. In: International conference on computer vision. 2021.","DOI":"10.1109\/ICCV48922.2021.01080"},{"key":"10.1016\/j.cag.2026.104594_b14","doi-asserted-by":"crossref","unstructured":"Guo Chuan, Zuo Xinxin, Wang Sen, Zou Shihao, Sun Qingyao, Deng Annan, Gong Minglun, Cheng Li. Action2Motion: Conditioned Generation of 3D Human Motions. In: Proceedings of the ACM international conference on multimedia. 2020.","DOI":"10.1145\/3394171.3413635"},{"key":"10.1016\/j.cag.2026.104594_b15","doi-asserted-by":"crossref","unstructured":"Siyao Li, Yu Weijiang, Gu Tianpei, Lin Chunze, Wang Quan, Qian Chen, Loy Chen Change, Liu Ziwei. Bailando: 3D Dance Generation by Actor-Critic GPT with Choreographic Memory. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2022.","DOI":"10.1109\/CVPR52688.2022.01077"},{"key":"10.1016\/j.cag.2026.104594_b16","doi-asserted-by":"crossref","unstructured":"Tseng Jonathan, Castellon Rodrigo, Liu C Karen. EDGE: Editable Dance Generation From Music. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2023.","DOI":"10.1109\/CVPR52729.2023.00051"},{"key":"10.1016\/j.cag.2026.104594_b17","doi-asserted-by":"crossref","unstructured":"Ahuja Chaitanya, Morency Louis-Philippe. Language2Pose: Natural Language Grounded Pose Forecasting. In: International conference on 3D vision. 2019.","DOI":"10.1109\/3DV.2019.00084"},{"key":"10.1016\/j.cag.2026.104594_b18","doi-asserted-by":"crossref","unstructured":"Guo Chuan, Zou Shihao, Zuo Xinxin, Wang Sen, Ji Wei, Li Xingyu, Cheng Li. Generating Diverse and Natural 3D Human Motions from Text. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2022.","DOI":"10.1109\/CVPR52688.2022.00509"},{"key":"10.1016\/j.cag.2026.104594_b19","doi-asserted-by":"crossref","unstructured":"Ghosh Anindita, Cheema Noshaba, Oguz Cennet, Theobalt Christian, Slusallek Philipp. Synthesis of Compositional Animations from Textual Descriptions. In: International conference on computer vision. 2021.","DOI":"10.1109\/ICCV48922.2021.00143"},{"key":"10.1016\/j.cag.2026.104594_b20","unstructured":"Tevet Guy, Raab Sigal, Gordon Brian, Shafir Yoni, Cohen-or Daniel, Bermano Amit Haim. Human Motion Diffusion Model. In: International conference on learning representations. 2023."},{"key":"10.1016\/j.cag.2026.104594_b21","doi-asserted-by":"crossref","unstructured":"Chen Xin, Jiang Biao, Liu Wen, Huang Zilong, Fu Bin, Chen Tao, Yu Gang. Executing your Commands via Motion Diffusion in Latent Space. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2023.","DOI":"10.1109\/CVPR52729.2023.01726"},{"key":"10.1016\/j.cag.2026.104594_b22","doi-asserted-by":"crossref","unstructured":"Kim Jihoon, Kim Jiseob, Choi Sungjoon. FLAME: Free-form language-based motion synthesis & editing. In: Proceedings of the AAAI conference on artificial intelligence. 2023.","DOI":"10.1609\/aaai.v37i7.25996"},{"key":"10.1016\/j.cag.2026.104594_b23","doi-asserted-by":"crossref","unstructured":"Kong Hanyang, Gong Kehong, Lian Dongze, Mi Michael Bi, Wang Xinchao. Priority-centric Human Motion Generation in Discrete Latent Space. In: International conference on computer vision. 2023.","DOI":"10.1109\/ICCV51070.2023.01360"},{"key":"10.1016\/j.cag.2026.104594_b24","doi-asserted-by":"crossref","unstructured":"Zhang Mingyuan, Guo Xinying, Pan Liang, Cai Zhongang, Hong Fangzhou, Li Huirong, Yang Lei, Liu Ziwei. ReMoDiffuse: Retrieval-augmented Motion Diffusion Model. In: International conference on computer vision. 2023.","DOI":"10.1109\/ICCV51070.2023.00040"},{"issue":"6","key":"10.1016\/j.cag.2026.104594_b25","doi-asserted-by":"crossref","first-page":"4115","DOI":"10.1109\/TPAMI.2024.3355414","article-title":"MotionDiffuse: Text-driven human motion generation with diffusion model","volume":"46","author":"Zhang","year":"2024","journal-title":"IEEE Trans Pattern Anal Mach Intell (TPAMI)"},{"key":"10.1016\/j.cag.2026.104594_b26","series-title":"DiverseMotion: Towards diverse human motion generation via discrete diffusion","author":"Lou","year":"2023"},{"key":"10.1016\/j.cag.2026.104594_b27","doi-asserted-by":"crossref","unstructured":"Guo Chuan, Zuo Xinxin, Wang Sen, Cheng Li. TM2T: Stochastic and Tokenized Modeling for the Reciprocal Generation of 3D Human Motions and Texts. In: European conference on computer vision. 2022.","DOI":"10.1007\/978-3-031-19833-5_34"},{"key":"10.1016\/j.cag.2026.104594_b28","unstructured":"Lu Shunlin, Chen Ling-Hao, Zeng Ailing, Lin Jing, Zhang Ruimao, Zhang Lei, Shum Heung-Yeung. HumanTOMATO: Text-aligned Whole-body Motion Generation. In: Proceedings of the international conference on machine learning. 2024."},{"key":"10.1016\/j.cag.2026.104594_b29","doi-asserted-by":"crossref","unstructured":"Tevet Guy, Gordon Brian, Hertz Amir, Bermano Amit H, Cohen-Or Daniel. Motionclip: Exposing human motion generation to clip space. In: European conference on computer vision. 2022.","DOI":"10.1007\/978-3-031-20047-2_21"},{"key":"10.1016\/j.cag.2026.104594_b30","unstructured":"Cho Jungbin, Kim Junwan, Kim Jisoo, Kim Minseo, Kang Mingu, Hong Sungeun, Oh Tae-Hyun, Yu Youngjae. DisCoRD: Discrete Tokens to Continuous Motion via Rectified Flow Decoding. In: International conference on computer vision. 2025."},{"key":"10.1016\/j.cag.2026.104594_b31","unstructured":"Lin Angela S, Wu Lemeng, Corona Rodolfo, Tai Kevin, Huang Qixing, Mooney Raymond J. Generating Animated Videos of Human Activities from Natural Language Descriptions. In: Proceedings of the visually grounded interaction and language workshop at neurIPS. 2018."},{"key":"10.1016\/j.cag.2026.104594_b32","doi-asserted-by":"crossref","first-page":"13","DOI":"10.1016\/j.robot.2018.07.006","article-title":"Learning a bidirectional mapping between human whole-body motion and natural language using deep recurrent neural networks","volume":"109","author":"Plappert","year":"2018","journal-title":"Robot Auton Syst"},{"key":"10.1016\/j.cag.2026.104594_b33","doi-asserted-by":"crossref","unstructured":"Petrovich Mathis, Black Michael J, Varol G\u00fcl. TEMOS: Generating diverse human motions from textual descriptions. In: European conference on computer vision. 2022.","DOI":"10.1007\/978-3-031-20047-2_28"},{"key":"10.1016\/j.cag.2026.104594_b34","doi-asserted-by":"crossref","unstructured":"Athanasiou Nikos, Petrovich Mathis, Black Michael J, Varol G\u00fcl. TEACH: Temporal Action Compositions for 3D Humans. In: International conference on 3D vision. 2022.","DOI":"10.1109\/3DV57658.2022.00053"},{"key":"10.1016\/j.cag.2026.104594_b35","unstructured":"Devlin Jacob, Chang Ming-Wei, Lee Kenton, Toutanova Kristina. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In: Proceedings of the conference of the North American chapter of the association for computational linguistics: human language technologies. 2019."},{"key":"10.1016\/j.cag.2026.104594_b36","doi-asserted-by":"crossref","unstructured":"Pinyoanuntapong Ekkasit, Saleem Muhammad Usama, Wang Pu, Lee Minwoo, Das Srijan, Chen Chen. BAMM: Bidirectional Autoregressive Motion Model. In: European conference on computer vision. 2024.","DOI":"10.1007\/978-3-031-72633-0_10"},{"key":"10.1016\/j.cag.2026.104594_b37","unstructured":"Zhang Mingyuan, Li Huirong, Cai Zhongang, Ren Jiawei, Yang Lei, Liu Ziwei. FineMoGen: Fine-Grained Spatio-Temporal Motion Generation and Editing. In: Proceedings of the international conference on neural information processing systems. 2023."},{"key":"10.1016\/j.cag.2026.104594_b38","doi-asserted-by":"crossref","unstructured":"Sun Haowen, Zheng Ruikun, Huang Haibin, Ma Chongyang, Huang Hui, Hu Ruizhen. LGTM: Local-to-Global Text-Driven Human Motion Diffusion Model. In: ACM SIGGRAPH Conference Papers. 2024.","DOI":"10.1145\/3641519.3657422"},{"key":"10.1016\/j.cag.2026.104594_b39","doi-asserted-by":"crossref","unstructured":"Jin Peng, Li Hao, Cheng Zesen, Li Kehan, Yu Runyi, Liu Chang, Ji Xiangyang, Yuan Li, Chen Jie. Local Action-Guided Motion Diffusion Model for Text-to-Motion Generation. In: European conference on computer vision. 2024.","DOI":"10.1007\/978-3-031-72698-9_23"},{"issue":"7","key":"10.1016\/j.cag.2026.104594_b40","doi-asserted-by":"crossref","first-page":"4277","DOI":"10.1007\/s11263-025-02392-9","article-title":"Fg-T2M++: LLMs-augmented fine-grained text driven human motion generation","volume":"133","author":"Wang","year":"2025","journal-title":"Int J Comput Vis (IJCV)"},{"key":"10.1016\/j.cag.2026.104594_b41","unstructured":"Yuan Weihao, He Yisheng, Shen Weichao, Dong Yuan, Gu Xiaodong, Dong Zilong, Bo Liefeng, Huang Qixing. MoGenTS: Motion Generation based on Spatial-Temporal Joint Modeling. In: Proceedings of the international conference on neural information processing systems. 2024."},{"key":"10.1016\/j.cag.2026.104594_b42","unstructured":"Radford Alec, Kim Jong Wook, Hallacy Chris, Ramesh Aditya, Goh Gabriel, Agarwal Sandhini, Sastry Girish, Askell Amanda, Mishkin Pamela, Clark Jack, Krueger Gretchen, Sutskever Ilya. Learning Transferable Visual Models From Natural Language Supervision. In: Proceedings of the international conference on machine learning. 2021."},{"key":"10.1016\/j.cag.2026.104594_b43","unstructured":"Vaswani Ashish, Shazeer Noam, Parmar Niki, Uszkoreit Jakob, Jones Llion, Gomez Aidan N, Kaiser \u0141ukasz, Polosukhin Illia. Attention is All you Need. In: Proceedings of the international conference on neural information processing systems. 2017."},{"key":"10.1016\/j.cag.2026.104594_b44","unstructured":"Holtzman Ari, Buys Jan, Du Li, Forbes Maxwell, Choi Yejin. The Curious Case of Neural Text Degeneration. In: International conference on learning representations. 2020."},{"key":"10.1016\/j.cag.2026.104594_b45","unstructured":"Ho Jonathan, Salimans Tim. Classifier-free Diffusion Guidance. In: NeurIPS workshop on deep generative models and downstream applications. 2021."},{"issue":"4","key":"10.1016\/j.cag.2026.104594_b46","doi-asserted-by":"crossref","first-page":"236","DOI":"10.1089\/big.2016.0028","article-title":"The KIT motion-language dataset","volume":"4","author":"Plappert","year":"2016","journal-title":"Big Data"},{"key":"10.1016\/j.cag.2026.104594_b47","doi-asserted-by":"crossref","unstructured":"Mahmood Naureen, Ghorbani Nima, Troje Nikolaus F, Pons-Moll Gerard, Black Michael. AMASS: Archive of Motion Capture As Surface Shapes. In: International conference on computer vision. 2019.","DOI":"10.1109\/ICCV.2019.00554"},{"issue":"3","key":"10.1016\/j.cag.2026.104594_b48","first-page":"3072","article-title":"Sequence labeling with meta-learning","volume":"35","author":"Li","year":"2023","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"9","key":"10.1016\/j.cag.2026.104594_b49","doi-asserted-by":"crossref","first-page":"4245","DOI":"10.1109\/TKDE.2020.3038670","article-title":"Few-shot named entity recognition via meta-learning","volume":"34","author":"Li","year":"2022","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"9","key":"10.1016\/j.cag.2026.104594_b50","doi-asserted-by":"crossref","first-page":"3819","DOI":"10.1109\/TNNLS.2020.3015912","article-title":"Domain generalization for named entity boundary detection via metalearning","volume":"32","author":"Li","year":"2021","journal-title":"IEEE Trans Neural Networks Learn Syst"},{"issue":"10","key":"10.1016\/j.cag.2026.104594_b51","doi-asserted-by":"crossref","first-page":"14396","DOI":"10.1109\/TNNLS.2023.3278938","article-title":"Few-shot relation extraction with dual graph neural network interaction","volume":"35","author":"Li","year":"2024","journal-title":"IEEE Trans Neural Networks Learn Syst"},{"issue":"2","key":"10.1016\/j.cag.2026.104594_b52","doi-asserted-by":"crossref","first-page":"828","DOI":"10.1109\/TKDE.2020.2983360","article-title":"Neural text segmentation and its application to sentiment analysis","volume":"34","author":"Li","year":"2022","journal-title":"IEEE Trans Knowl Data Eng"}],"container-title":["Computers &amp; Graphics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0097849326000658?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0097849326000658?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T16:57:56Z","timestamp":1776185876000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0097849326000658"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5]]},"references-count":52,"alternative-id":["S0097849326000658"],"URL":"https:\/\/doi.org\/10.1016\/j.cag.2026.104594","relation":{},"ISSN":["0097-8493"],"issn-type":[{"value":"0097-8493","type":"print"}],"subject":[],"published":{"date-parts":[[2026,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Tai-Chi: Text-to-motion generation with locality-aware bipartite body-part motion prior","name":"articletitle","label":"Article Title"},{"value":"Computers & Graphics","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.cag.2026.104594","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"104594"}}