{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:16:10Z","timestamp":1777655770361,"version":"3.51.4"},"publisher-location":"Cham","reference-count":61,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729799","type":"print"},{"value":"9783031729805","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-72980-5_16","type":"book-chapter","created":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:15:43Z","timestamp":1730106943000},"page":"270-287","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["AnimateMe: 4D Facial Expressions via\u00a0Diffusion Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-8886-4625","authenticated-orcid":false,"given":"Dimitrios","family":"Gerogiannis","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-7203-8866","authenticated-orcid":false,"given":"Foivos Paraperas","family":"Papantoniou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0049-2589","authenticated-orcid":false,"given":"Rolandos Alexandros","family":"Potamias","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9964-6105","authenticated-orcid":false,"given":"Alexandros","family":"Lattas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7421-1335","authenticated-orcid":false,"given":"Stylianos","family":"Moschoglou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4836-1513","authenticated-orcid":false,"given":"Stylianos","family":"Ploumpis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5222-1740","authenticated-orcid":false,"given":"Stefanos","family":"Zafeiriou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,10,29]]},"reference":[{"key":"16_CR1","doi-asserted-by":"crossref","unstructured":"Aneja, S., Thies, J., Dai, A., Nie\u00dfner, M.: FaceTalk: audio-driven motion diffusion for neural parametric head models (2023)","DOI":"10.1109\/CVPR52733.2024.02009"},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Azadi, S., Shah, A., Hayes, T., Parikh, D., Gupta, S.: Make-an-animation: large-scale text-conditional 3D human motion generation. arXiv preprint arXiv:2305.09662 (2023)","DOI":"10.1109\/ICCV51070.2023.01381"},{"key":"16_CR3","doi-asserted-by":"crossref","unstructured":"Baltatzis, V., Potamias, R.A., Ververas, E., Sun, G., Deng, J., Zafeiriou, S.: Neural sign actors: a diffusion model for 3D sign language production from text. arXiv preprint arXiv:2312.02702 (2023)","DOI":"10.1109\/CVPR52733.2024.00194"},{"key":"16_CR4","unstructured":"Blanz, V., Vetter, T.: A morphable model for the synthesis of 3D faces. Seminal Graphics Papers: Pushing the Boundaries, Volume 2 (1999). https:\/\/api.semanticscholar.org\/CorpusID:203705211"},{"issue":"9","key":"16_CR5","doi-asserted-by":"publisher","first-page":"1063","DOI":"10.1109\/TPAMI.2003.1227983","volume":"25","author":"V Blanz","year":"2003","unstructured":"Blanz, V., Vetter, T.: Face recognition based on fitting a 3D morphable model. IEEE Trans. Pattern Anal. Mach. Intell. 25(9), 1063\u20131074 (2003)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"16_CR6","doi-asserted-by":"crossref","unstructured":"Blattmann, A., et al.: Align your latents: high-resolution video synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22563\u201322575 (2023)","DOI":"10.1109\/CVPR52729.2023.02161"},{"key":"16_CR7","doi-asserted-by":"crossref","unstructured":"Bouritsas, G., Bokhnyak, S., Ploumpis, S., Bronstein, M., Zafeiriou, S.: Neural 3D morphable models: spiral convolutional networks for 3D shape representation learning and generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7213\u20137222 (2019)","DOI":"10.1109\/ICCV.2019.00731"},{"key":"16_CR8","doi-asserted-by":"publisher","DOI":"10.1016\/j.iswa.2022.200139","volume":"16","author":"H Bouzid","year":"2022","unstructured":"Bouzid, H., Ballihi, L.: Facial expression video generation based-on spatio-temporal convolutional GAN: FEV-GAN. Intell. Syst. Appl. 16, 200139 (2022). https:\/\/doi.org\/10.1016\/j.iswa.2022.200139","journal-title":"Intell. Syst. Appl."},{"issue":"3","key":"16_CR9","doi-asserted-by":"publisher","first-page":"413","DOI":"10.1109\/TVCG.2013.249","volume":"20","author":"C Cao","year":"2014","unstructured":"Cao, C., Weng, Y., Zhou, S., Tong, Y., Zhou, K.: FaceWarehouse: a 3D facial expression database for visual computing. IEEE Trans. Vis. Comput. Graph. 20(3), 413\u2013425 (2014). https:\/\/doi.org\/10.1109\/TVCG.2013.249","journal-title":"IEEE Trans. Vis. Comput. Graph."},{"key":"16_CR10","doi-asserted-by":"crossref","unstructured":"Chen, X., et al.: Executing your commands via motion diffusion in latent space. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18000\u201318010 (2023)","DOI":"10.1109\/CVPR52729.2023.01726"},{"key":"16_CR11","doi-asserted-by":"crossref","unstructured":"Cheng, S., Kotsia, I., Pantic, M., Zafeiriou, S.: 4DFAB: a large scale 4D database for facial expression analysis and biometric applications. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00537"},{"key":"16_CR12","doi-asserted-by":"crossref","unstructured":"Cudeiro, D., Bolkart, T., Laidlaw, C., Ranjan, A., Black, M.J.: Capture, learning, and synthesis of 3D speaking styles. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10101\u201310111 (2019)","DOI":"10.1109\/CVPR.2019.01034"},{"key":"16_CR13","doi-asserted-by":"crossref","unstructured":"Dabral, R., Mughal, M.H., Golyanik, V., Theobalt, C.: MoFusion: a framework for denoising-diffusion-based motion synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9760\u20139770 (2023)","DOI":"10.1109\/CVPR52729.2023.00941"},{"key":"16_CR14","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. In: Advances in Neural Information Processing Systems, vol. 34, pp. 8780\u20138794 (2021)"},{"key":"16_CR15","doi-asserted-by":"crossref","unstructured":"Du, Y., Kips, R., Pumarola, A., Starke, S., Thabet, A., Sanakoyeu, A.: Avatars grow legs: generating smooth human motion from sparse tracking inputs with diffusion model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 481\u2013490 (2023)","DOI":"10.1109\/CVPR52729.2023.00054"},{"issue":"5","key":"16_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3395208","volume":"39","author":"B Egger","year":"2020","unstructured":"Egger, B., et al.: 3D morphable face models-past, present, and future. ACM Trans. Graph. (ToG) 39(5), 1\u201338 (2020)","journal-title":"ACM Trans. Graph. (ToG)"},{"key":"16_CR17","doi-asserted-by":"publisher","unstructured":"Fan, L., Huang, W., Gan, C., Huang, J., Gong, B.: Controllable image-to-video translation: a case study on facial expression generation. In: Proceedings of the Thirty-Third AAAI Conference on Artificial Intelligence and Thirty-First Innovative Applications of Artificial Intelligence Conference and Ninth AAAI Symposium on Educational Advances in Artificial Intelligence, AAAI 2019\/IAAI 2019\/EAAI 2019. AAAI Press (2019). https:\/\/doi.org\/10.1609\/aaai.v33i01.33013510","DOI":"10.1609\/aaai.v33i01.33013510"},{"key":"16_CR18","doi-asserted-by":"crossref","unstructured":"Fan, Y., Lin, Z., Saito, J., Wang, W., Komura, T.: FaceFormer: speech-driven 3D facial animation with transformers (2022)","DOI":"10.1109\/CVPR52688.2022.01821"},{"key":"16_CR19","doi-asserted-by":"crossref","unstructured":"Gong, S., Chen, L., Bronstein, M., Zafeiriou, S.: SpiralNet++: a fast and highly efficient mesh convolution operator. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) Workshops (2019)","DOI":"10.1109\/ICCVW.2019.00509"},{"key":"16_CR20","unstructured":"He, Y., Yang, T., Zhang, Y., Shan, Y., Chen, Q.: Latent video diffusion models for high-fidelity long video generation (2023)"},{"key":"16_CR21","unstructured":"Ho, J., et al.: Imagen video: high definition video generation with diffusion models (2022)"},{"key":"16_CR22","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advances in Neural Information Processing Systems, vol. 33, pp. 6840\u20136851 (2020)"},{"key":"16_CR23","unstructured":"Ho, J., Salimans, T., Gritsenko, A., Chan, W., Norouzi, M., Fleet, D.J.: Video diffusion models (2022)"},{"issue":"4","key":"16_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073658","volume":"36","author":"T Karras","year":"2017","unstructured":"Karras, T., Aila, T., Laine, S., Herva, A., Lehtinen, J.: Audio-driven facial animation by joint end-to-end learning of pose and emotion. ACM Trans. Graph. (TOG) 36(4), 1\u201312 (2017)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"16_CR25","doi-asserted-by":"crossref","unstructured":"Khachatryan, L., et al.: Text2Video-Zero: text-to-image diffusion models are zero-shot video generators. arXiv preprint arXiv:2303.13439 (2023)","DOI":"10.1109\/ICCV51070.2023.01462"},{"issue":"6","key":"16_CR26","first-page":"1","volume":"42","author":"J Li","year":"2023","unstructured":"Li, J., Wu, J., Liu, C.K.: Object motion guided human motion synthesis. ACM Trans. Graph. (TOG) 42(6), 1\u201311 (2023)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"16_CR27","doi-asserted-by":"crossref","unstructured":"Luo, S., Hu, W.: Diffusion probabilistic models for 3D point cloud generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2837\u20132845 (2021)","DOI":"10.1109\/CVPR46437.2021.00286"},{"key":"16_CR28","doi-asserted-by":"crossref","unstructured":"Luo, Z., et al.: VideoFusion: decomposed diffusion models for high-quality video generation (2023)","DOI":"10.1109\/CVPR52729.2023.00984"},{"key":"16_CR29","doi-asserted-by":"crossref","unstructured":"Lyu, Z., Wang, J., An, Y., Zhang, Y., Lin, D., Dai, B.: Controllable mesh generation through sparse latent point diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 271\u2013280 (2023)","DOI":"10.1109\/CVPR52729.2023.00034"},{"key":"16_CR30","unstructured":"Ma, Z., Zhu, X., Qi, G., Qian, C., Zhang, Z., Lei, Z.: DiffSpeaker: speech-driven 3D facial animation with diffusion transformer. arXiv preprint arXiv:2402.05712 (2024)"},{"issue":"2","key":"16_CR31","doi-asserted-by":"publisher","first-page":"848","DOI":"10.1109\/TPAMI.2020.3002500","volume":"44","author":"N Otberdout","year":"2022","unstructured":"Otberdout, N., Daoudi, M., Kacem, A., Ballihi, L., Berretti, S.: Dynamic facial expression generation on Hilbert hypersphere with conditional Wasserstein generative adversarial nets. IEEE Trans. Pattern Anal. Mach. Intell. 44(2), 848\u2013863 (2022). https:\/\/doi.org\/10.1109\/TPAMI.2020.3002500","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"16_CR32","doi-asserted-by":"crossref","unstructured":"Otberdout, N., Ferrari, C., Daoudi, M., Berretti, S., Bimbo, A.D.: Sparse to dense dynamic 3D facial expression generation (2022)","DOI":"10.1109\/CVPR52688.2022.01974"},{"key":"16_CR33","doi-asserted-by":"publisher","first-page":"467","DOI":"10.1007\/978-3-031-20074-8_27","volume-title":"European Conference on Computer Vision 2022","author":"A Papaioannou","year":"2022","unstructured":"Papaioannou, A., et al.: MimicME: a large scale diverse 4D database for facial expression analysis. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13668, pp. 467\u2013484. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20074-8_27"},{"key":"16_CR34","unstructured":"Park, I., Cho, J.: SAiD: speech-driven blendshape facial animation with diffusion. arXiv preprint arXiv:2401.08655 (2023)"},{"key":"16_CR35","doi-asserted-by":"crossref","unstructured":"Peng, Z., et al.: EmoTalk: speech-driven emotional disentanglement for 3D face animation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 20687\u201320697 (2023)","DOI":"10.1109\/ICCV51070.2023.01891"},{"key":"16_CR36","doi-asserted-by":"crossref","unstructured":"Pham, H.X., Cheung, S., Pavlovic, V.: Speech-driven 3D facial animation with implicit emotional awareness: a deep learning approach. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 80\u201388 (2017)","DOI":"10.1109\/CVPRW.2017.287"},{"issue":"11","key":"16_CR37","doi-asserted-by":"publisher","first-page":"4142","DOI":"10.1109\/TPAMI.2020.2991150","volume":"43","author":"S Ploumpis","year":"2020","unstructured":"Ploumpis, S., et al.: Towards a complete 3D morphable model of the human head. IEEE Trans. Pattern Anal. Mach. Intell. 43(11), 4142\u20134160 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"16_CR38","doi-asserted-by":"crossref","unstructured":"Ploumpis, S., Wang, H., Pears, N., Smith, W.A., Zafeiriou, S.: Combining 3D morphable models: a large scale face-and-head model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10934\u201310943 (2019)","DOI":"10.1109\/CVPR.2019.01119"},{"key":"16_CR39","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"278","DOI":"10.1007\/978-3-030-58526-6_17","volume-title":"Computer Vision \u2013 ECCV 2020","author":"RA Potamias","year":"2020","unstructured":"Potamias, R.A., Zheng, J., Ploumpis, S., Bouritsas, G., Ververas, E., Zafeiriou, S.: Learning to generate customized dynamic 3D facial expressions. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12374, pp. 278\u2013294. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58526-6_17"},{"key":"16_CR40","doi-asserted-by":"crossref","unstructured":"Ranjan, A., Bolkart, T., Sanyal, S., Black, M.J.: Generating 3D faces using convolutional mesh autoencoders. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 704\u2013720 (2018)","DOI":"10.1007\/978-3-030-01219-9_43"},{"key":"16_CR41","doi-asserted-by":"crossref","unstructured":"Richard, A., Zollh\u00f6fer, M., Wen, Y., de\u00a0la Torre, F., Sheikh, Y.: MeshTalk: 3D face animation from speech using cross-modality disentanglement. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 1173\u20131182 (2021)","DOI":"10.1109\/ICCV48922.2021.00121"},{"key":"16_CR42","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"16_CR43","unstructured":"Shafir, Y., Tevet, G., Kapon, R., Bermano, A.H.: Human motion diffusion as a generative prior. arXiv preprint arXiv:2303.01418 (2023)"},{"key":"16_CR44","unstructured":"Singer, U., et al.: Make-a-Video: text-to-video generation without text-video data. In: The Eleventh International Conference on Learning Representations (2023). https:\/\/openreview.net\/forum?id=nJfylDvgzlq"},{"key":"16_CR45","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)"},{"key":"16_CR46","doi-asserted-by":"crossref","unstructured":"Stan, S., Haque, K.I., Yumak, Z.: FaceDiffuser: speech-driven 3D facial animation synthesis using diffusion. In: Proceedings of the 16th ACM SIGGRAPH Conference on Motion, Interaction and Games, pp. 1\u201311 (2023)","DOI":"10.1145\/3623264.3624447"},{"key":"16_CR47","unstructured":"Tevet, G., Raab, S., Gordon, B., Shafir, Y., Cohen-Or, D., Bermano, A.H.: Human motion diffusion model. arXiv preprint arXiv:2209.14916 (2022)"},{"key":"16_CR48","doi-asserted-by":"crossref","unstructured":"Thambiraja, B., Aliakbarian, S., Cosker, D., Thies, J.: 3DiFACE: diffusion-based speech-driven 3D facial animation and editing. arXiv preprint arXiv:2312.00870 (2023)","DOI":"10.1109\/ICCV51070.2023.01885"},{"key":"16_CR49","doi-asserted-by":"crossref","unstructured":"Thambiraja, B., Habibie, I., Aliakbarian, S., Cosker, D., Theobalt, C., Thies, J.: Imitator: personalized speech-driven 3D facial animation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 20621\u201320631 (2023)","DOI":"10.1109\/ICCV51070.2023.01885"},{"key":"16_CR50","doi-asserted-by":"crossref","unstructured":"Tran, L., Liu, X.: Nonlinear 3D face morphable model. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7346\u20137355 (2018)","DOI":"10.1109\/CVPR.2018.00767"},{"key":"16_CR51","doi-asserted-by":"crossref","unstructured":"Tulyakov, S., Liu, M.Y., Yang, X., Kautz, J.: MoCoGAN: decomposing motion and content for video generation (2017)","DOI":"10.1109\/CVPR.2018.00165"},{"key":"16_CR52","doi-asserted-by":"crossref","unstructured":"Tzirakis, P., Papaioannou, A., Lattas, A., Tarasiou, M., Schuller, B., Zafeiriou, S.: Synthesising 3D facial motion from \u201cin-the-wild\u201d speech. In: 2020 15th IEEE International Conference on Automatic Face and Gesture Recognition (FG 2020), pp. 265\u2013272 (2020)","DOI":"10.1109\/FG47880.2020.00100"},{"key":"16_CR53","unstructured":"Vahdat, A., et al.: LION: latent point diffusion models for 3D shape generation. In: Advances in Neural Information Processing Systems, vol. 35, pp. 10021\u201310039 (2022)"},{"key":"16_CR54","doi-asserted-by":"publisher","unstructured":"Wang, Y., Bilinski, P., Bremond, F., Dantcheva, A.: G3AN: disentangling appearance and motion for video generation. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5263\u20135272 (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.00531","DOI":"10.1109\/CVPR42600.2020.00531"},{"key":"16_CR55","doi-asserted-by":"crossref","unstructured":"Wu, C.H., De\u00a0la Torre, F.: A latent space of stochastic diffusion models for zero-shot image editing and guidance. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7378\u20137387 (2023)","DOI":"10.1109\/ICCV51070.2023.00678"},{"key":"16_CR56","doi-asserted-by":"publisher","first-page":"8658","DOI":"10.1109\/tip.2021.3112059","volume":"30","author":"X Wu","year":"2021","unstructured":"Wu, X., et al.: F3A-GAN: facial flow for face animation with generative adversarial networks. IEEE Trans. Image Process. 30, 8658\u20138670 (2021). https:\/\/doi.org\/10.1109\/tip.2021.3112059","journal-title":"IEEE Trans. Image Process."},{"key":"16_CR57","doi-asserted-by":"crossref","unstructured":"Xing, J., Xia, M., Zhang, Y., Cun, X., Wang, J., Wong, T.T.: CodeTalker: speech-driven 3D facial animation with discrete motion prior. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12780\u201312790 (2023)","DOI":"10.1109\/CVPR52729.2023.01229"},{"key":"16_CR58","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1007\/978-3-031-27077-2_18","volume-title":"Multimedia Modeling","author":"F Zhang","year":"2023","unstructured":"Zhang, F., Ji, N., Gao, F., Li, Y.: DiffMotion: speech-driven gesture synthesis using denoising diffusion model. In: Dang-Nguyen, D.T., et al. (eds.) MMM 2023. LNCS, vol. 13833, pp. 231\u2013242. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-27077-2_18"},{"issue":"6","key":"16_CR59","doi-asserted-by":"publisher","first-page":"4115","DOI":"10.1109\/TPAMI.2024.3355414","volume":"46","author":"M Zhang","year":"2024","unstructured":"Zhang, M., et al.: MotionDiffuse: text-driven human motion generation with diffusion model. IEEE Trans. Pattern Anal. Mach. Intell. 46(6), 4115\u20134128 (2024)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"16_CR60","unstructured":"Zhou, D., Wang, W., Yan, H., Lv, W., Zhu, Y., Feng, J.: MagicVideo: efficient video generation with latent diffusion models (2023)"},{"key":"16_CR61","doi-asserted-by":"crossref","unstructured":"Zhou, L., Du, Y., Wu, J.: 3D shape generation and completion through point-voxel diffusion. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5826\u20135835 (2021)","DOI":"10.1109\/ICCV48922.2021.00577"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72980-5_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:20:36Z","timestamp":1730107236000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72980-5_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031729799","9783031729805"],"references-count":61,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72980-5_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"29 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}