{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T08:07:22Z","timestamp":1761898042618,"version":"3.40.3"},"publisher-location":"Cham","reference-count":48,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031781032"},{"type":"electronic","value":"9783031781049"}],"license":[{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-78104-9_30","type":"book-chapter","created":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T21:44:39Z","timestamp":1733089479000},"page":"446-461","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["FG-MDM: Towards Zero-Shot Human Motion Generation via ChatGPT-Refined Descriptions"],"prefix":"10.1007","author":[{"given":"Xu","family":"Shi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5857-5531","authenticated-orcid":false,"given":"Wei","family":"Yao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4360-7035","authenticated-orcid":false,"given":"Chuanchen","family":"Luo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5276-0114","authenticated-orcid":false,"given":"Junran","family":"Peng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8633-4551","authenticated-orcid":false,"given":"Hongwen","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4696-8848","authenticated-orcid":false,"given":"Yunlian","family":"Sun","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,2]]},"reference":[{"key":"30_CR1","doi-asserted-by":"crossref","unstructured":"Ahn, H., Ha, T., Choi, Y., Yoo, H., Oh, S.: Text2action: generative adversarial synthesis from language to action. In: IEEE International Conference on Robotics and Automation, pp. 5915\u20135920. IEEE (2018)","DOI":"10.1109\/ICRA.2018.8460608"},{"key":"30_CR2","doi-asserted-by":"crossref","unstructured":"Athanasiou, N., Petrovich, M., Black, M.J., Varol, G.: SINC: spatial composition of 3d human motions for simultaneous action generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9984\u20139995 (2023)","DOI":"10.1109\/ICCV51070.2023.00916"},{"key":"30_CR3","doi-asserted-by":"crossref","unstructured":"Azadi, S., Shah, A., Hayes, T., Parikh, D., Gupta, S.: Make-an-animation: large-scale text-conditional 3D human motion generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2023)","DOI":"10.1109\/ICCV51070.2023.01381"},{"key":"30_CR4","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"557","DOI":"10.1007\/978-3-031-20071-7_33","volume-title":"ECCV 2022","author":"Z Cai","year":"2022","unstructured":"Cai, Z., et al.: HuMMan: multi-modal 4D human dataset for versatile sensing and modeling. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13667, pp. 557\u2013577. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20071-7_33"},{"key":"30_CR5","doi-asserted-by":"crossref","unstructured":"Chen, X., et al.: Executing your commands via motion diffusion in latent space. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18000\u201318010 (2023)","DOI":"10.1109\/CVPR52729.2023.01726"},{"key":"30_CR6","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. Adv. Neural. Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"30","key":"30_CR7","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2305016120","volume":"120","author":"F Gilardi","year":"2023","unstructured":"Gilardi, F., Alizadeh, M., Kubli, M.: ChatGPT outperforms crowd workers for text-annotation tasks. Proc. Natl. Acad. Sci. 120(30), e2305016120 (2023)","journal-title":"Proc. Natl. Acad. Sci."},{"key":"30_CR8","doi-asserted-by":"crossref","unstructured":"Guo, C., et al.: Generating diverse and natural 3D human motions from text. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5152\u20135161 (2022)","DOI":"10.1109\/CVPR52688.2022.00509"},{"key":"30_CR9","doi-asserted-by":"crossref","unstructured":"Guo, C., et al.: Action2motion: conditioned generation of 3D human motions. In: Proceedings of the ACM International Conference on Multimedia, pp. 2021\u20132029 (2020)","DOI":"10.1145\/3394171.3413635"},{"key":"30_CR10","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"30_CR11","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv:2207.12598 (2022)"},{"issue":"4","key":"30_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530094","volume":"41","author":"F Hong","year":"2022","unstructured":"Hong, F., Zhang, M., Pan, L., Cai, Z., Yang, L., Liu, Z.: AvatarClip: zero-shot text-driven generation and animation of 3D avatars. ACM Trans. Graph. 41(4), 1\u201319 (2022)","journal-title":"ACM Trans. Graph."},{"key":"30_CR13","unstructured":"Jiang, B., Chen, X., Liu, W., Yu, J., Yu, G., Chen, T.: MotionGPT: human motion as a foreign language. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"30_CR14","doi-asserted-by":"crossref","unstructured":"Kalakonda, S.S., Maheshwari, S., Sarvadevabhatla, R.K.: Action-GPT: leveraging large-scale language models for improved and generalized action generation. In: IEEE International Conference on Multimedia and Expo (2023)","DOI":"10.1109\/ICME55011.2023.00014"},{"key":"30_CR15","doi-asserted-by":"crossref","unstructured":"Kim, J., Kim, J., Choi, S.: Flame: free-form language-based motion synthesis & editing. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 8255\u20138263 (2023)","DOI":"10.1609\/aaai.v37i7.25996"},{"key":"30_CR16","doi-asserted-by":"crossref","unstructured":"Li, R., Yang, S., Ross, D.A., Kanazawa, A.: AI choreographer: music conditioned 3D dance generation with AIST++. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13401\u201313412 (2021)","DOI":"10.1109\/ICCV48922.2021.01315"},{"key":"30_CR17","doi-asserted-by":"crossref","unstructured":"Liang, H., et al.: OMG: towards open-vocabulary motion generation via mixture of controllers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2024)","DOI":"10.1109\/CVPR52733.2024.00053"},{"key":"30_CR18","unstructured":"Lin, A.S., Wu, L., Corona, R., Tai, K., Huang, Q., Mooney, R.J.: Generating animated videos of human activities from natural language descriptions. Learning 2018(1) (2018)"},{"key":"30_CR19","unstructured":"Lin, J., et al.: Motion-X: a large-scale 3D expressive whole-body human motion dataset. Adv. Neural Inf. Process. Syst. (2023)"},{"key":"30_CR20","doi-asserted-by":"crossref","unstructured":"Lin, J., et al.: Being comes from not-being: open-vocabulary text-to-motion generation with wordless training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23222\u201323231 (2023)","DOI":"10.1109\/CVPR52729.2023.02224"},{"key":"30_CR21","unstructured":"Lin, X., Amer, M.R.: Human motion modeling using DVGANs. arXiv:1804.10652 (2018)"},{"key":"30_CR22","unstructured":"Lu, S., et al.: HumanTOMATO: text-aligned whole-body motion generation. arXiv:2310.12978 (2023)"},{"key":"30_CR23","doi-asserted-by":"crossref","unstructured":"Mahmood, N., Ghorbani, N., Troje, N.F., Pons-Moll, G., Black, M.J.: AMASS: archive of motion capture as surface shapes. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5442\u20135451 (2019)","DOI":"10.1109\/ICCV.2019.00554"},{"key":"30_CR24","doi-asserted-by":"crossref","unstructured":"Martinez, J., Black, M.J., Romero, J.: On human motion prediction using recurrent neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2891\u20132900 (2017)","DOI":"10.1109\/CVPR.2017.497"},{"key":"30_CR25","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. Adv. Neural. Inf. Process. Syst. 35, 27730\u201327744 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"30_CR26","doi-asserted-by":"crossref","unstructured":"Petrovich, M., Black, M.J., Varol, G.: Action-conditioned 3D human motion synthesis with transformer VAE. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10985\u201310995 (2021)","DOI":"10.1109\/ICCV48922.2021.01080"},{"key":"30_CR27","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"480","DOI":"10.1007\/978-3-031-20047-2_28","volume-title":"ECCV 2022","author":"M Petrovich","year":"2022","unstructured":"Petrovich, M., Black, M.J., Varol, G.: TEMOS: generating diverse human motions from textual descriptions. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13682, pp. 480\u2013497. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20047-2_28"},{"key":"30_CR28","doi-asserted-by":"crossref","unstructured":"Petrovich, M., Black, M.J., Varol, G.: TMR: text-to-motion retrieval using contrastive 3D human motion synthesis. In: International Conference on Computer Vision (2023)","DOI":"10.1109\/ICCV51070.2023.00870"},{"issue":"4","key":"30_CR29","doi-asserted-by":"publisher","first-page":"236","DOI":"10.1089\/big.2016.0028","volume":"4","author":"M Plappert","year":"2016","unstructured":"Plappert, M., Mandery, C., Asfour, T.: The kit motion-language dataset. Big data 4(4), 236\u2013252 (2016)","journal-title":"Big data"},{"key":"30_CR30","doi-asserted-by":"crossref","unstructured":"Raab, S., et al: MoDI: unconditional motion synthesis from diverse data. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13873\u201313883 (2023)","DOI":"10.1109\/CVPR52729.2023.01333"},{"key":"30_CR31","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"30_CR32","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. 1(2), 3 (2022). arXiv:2204.06125"},{"key":"30_CR33","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"issue":"1","key":"30_CR34","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3407659","volume":"40","author":"M Shi","year":"2020","unstructured":"Shi, M., et al.: MotioNet: 3D human motion reconstruction from monocular video with skeleton consistency. ACM Trans. Graph. 40(1), 1\u201315 (2020)","journal-title":"ACM Trans. Graph."},{"key":"30_CR35","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning, pp. 2256\u20132265. PMLR (2015)"},{"key":"30_CR36","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"358","DOI":"10.1007\/978-3-031-20047-2_21","volume-title":"ECCV 2022","author":"G Tevet","year":"2022","unstructured":"Tevet, G., Gordon, B., Hertz, A., Bermano, A.H., Cohen-Or, D.: MotioncLIP: exposing human motion generation to clip space. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13682, pp. 358\u2013374. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20047-2_21"},{"key":"30_CR37","unstructured":"Tevet, G., Raab, S., Gordon, B., Shafir, Y., Cohen-or, D., Bermano, A.H.: Human motion diffusion model. In: The Eleventh International Conference on Learning Representations (2023)"},{"key":"30_CR38","doi-asserted-by":"publisher","first-page":"15406","DOI":"10.1109\/TPAMI.2023.3298850","volume":"45","author":"Y Tian","year":"2023","unstructured":"Tian, Y., Zhang, H., Liu, Y., Wang, L.: Recovering 3D human mesh from monocular images: a survey. IEEE Trans. Pattern Anal. Mach. Intell. 45, 15406\u201315425 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"30_CR39","doi-asserted-by":"crossref","unstructured":"Tseng, J., Castellon, R., Liu, K.: EDGE: editable dance generation from music. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 448\u2013458 (2023)","DOI":"10.1109\/CVPR52729.2023.00051"},{"key":"30_CR40","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"30_CR41","doi-asserted-by":"publisher","unstructured":"Yao, W., Zhang, H., Sun, Y., Tang, J.: STAF: 3D human mesh recovery from video with spatio-temporal alignment fusion. IEEE Trans. Circ. Syst. Video Technol., 1 (2024). https:\/\/doi.org\/10.1109\/TCSVT.2024.3410400","DOI":"10.1109\/TCSVT.2024.3410400"},{"key":"30_CR42","doi-asserted-by":"publisher","first-page":"12287","DOI":"10.1109\/TPAMI.2023.3271691","volume":"45","author":"H Zhang","year":"2023","unstructured":"Zhang, H., et al.: PyMAF-X: towards well-aligned full-body model regression from monocular images. IEEE Trans. Pattern Anal. Mach. Intell. 45, 12287\u201312303 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"30_CR43","doi-asserted-by":"crossref","unstructured":"Zhang, H., et al.: PyMAF: 3D human pose and shape regression with pyramidal mesh alignment feedback loop. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11446\u201311456 (2021)","DOI":"10.1109\/ICCV48922.2021.01125"},{"key":"30_CR44","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: T2M-GPT: generating human motion from textual descriptions with discrete representations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.01415"},{"key":"30_CR45","doi-asserted-by":"publisher","first-page":"4115","DOI":"10.1109\/TPAMI.2024.3355414","volume":"46","author":"M Zhang","year":"2024","unstructured":"Zhang, M., et al.: MotionDiffuse: text-driven human motion generation with diffusion model. IEEE Trans. Pattern Anal. Mach. Intell. 46, 4115\u20134128 (2024)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"30_CR46","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: MotionGPT: finetuned LLMs are general-purpose motion generators. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 7368\u20137376 (2024)","DOI":"10.1609\/aaai.v38i7.28567"},{"key":"30_CR47","doi-asserted-by":"crossref","unstructured":"Zhao, R., Su, H., Ji, Q.: Bayesian adversarial human motion synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6225\u20136234 (2020)","DOI":"10.1109\/CVPR42600.2020.00626"},{"key":"30_CR48","doi-asserted-by":"publisher","first-page":"2430","DOI":"10.1109\/TPAMI.2023.3330935","volume":"46","author":"W Zhu","year":"2023","unstructured":"Zhu, W., et al.: Human motion generation: a survey. IEEE Trans. Pattern Anal. Mach. Intell. 46, 2430\u20132449 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-78104-9_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T23:30:58Z","timestamp":1733095858000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-78104-9_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,2]]},"ISBN":["9783031781032","9783031781049"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-78104-9_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,2]]},"assertion":[{"value":"2 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kolkata","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpr2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icpr2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}