{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T08:10:14Z","timestamp":1761811814400,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032043382","type":"print"},{"value":"9783032043399","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-032-04339-9_7","type":"book-chapter","created":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T08:06:55Z","timestamp":1761811615000},"page":"96-111","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Rhythm Fusion: Synchronizing Audio and\u00a0Motion Features for\u00a0Music-Driven Dance Generation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6068-2945","authenticated-orcid":false,"given":"Nuha","family":"Aldausari","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8087-2241","authenticated-orcid":false,"given":"Gelareh","family":"Mohammadi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9805-8943","authenticated-orcid":false,"given":"David","family":"Cooper","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,31]]},"reference":[{"key":"7_CR1","doi-asserted-by":"publisher","first-page":"3501","DOI":"10.1109\/LRA.2020.2977333","volume":"5","author":"H Ahn","year":"2020","unstructured":"Ahn, H., Kim, J., Kim, K., Oh, S.: Generative autoregressive networks for 3D dancing move synthesis from music. IEEE Robot. Autom. Lett. 5, 3501\u20133508 (2020)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"7_CR2","doi-asserted-by":"crossref","unstructured":"Aldausari, N., Sowmya, A., Marcus, N., Mohammadi, G.: Video generative adversarial networks: a review. ACM Comput. Surv. (CSUR) 1\u201325 (2022)","DOI":"10.1145\/3487891"},{"key":"7_CR3","doi-asserted-by":"crossref","unstructured":"Aldausari, N., Sowmya, A., Marcus, N., Mohammadi, G.: Diverse audio-to-video gan using multiscale image fusion. In: Australasian Joint Conference on Artificial Intelligence, pp. 29\u201342. Springer (2022)","DOI":"10.1007\/978-3-031-22695-3_3"},{"key":"7_CR4","doi-asserted-by":"crossref","unstructured":"Aldausari, N., Sowmya, A., Marcus, N., Mohammadi, G.: Cascaded siamese self-supervised audio to video GAN. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4691\u20134700 (2022)","DOI":"10.1109\/CVPRW56347.2022.00515"},{"key":"7_CR5","first-page":"26","volume":"8","author":"O Alemi","year":"2017","unstructured":"Alemi, O., Fran\u00e7oise, J., Pasquier, P.: GrooveNet: real-time music-driven dance movement generation using artificial neural networks. Networks 8, 26 (2017)","journal-title":"Networks"},{"key":"7_CR6","doi-asserted-by":"crossref","unstructured":"Chan, C., Ginosar, S., Zhou, T., Efros, A.: Everybody dance now. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5933\u20135942 (2019)","DOI":"10.1109\/ICCV.2019.00603"},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Chen, L., Cao, C., Torre, F., Saragih, J., Xu, C., Sheikh, Y.: High-fidelity face tracking for AR\/VR via deep lighting adaptation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13059\u201313069 (2021)","DOI":"10.1109\/CVPR46437.2021.01286"},{"key":"7_CR8","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. Adv. Neural. Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"7_CR9","doi-asserted-by":"crossref","unstructured":"Di, S., et al.: Video background music generation with controllable music transformer. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 2037\u20132045 (2021)","DOI":"10.1145\/3474085.3475195"},{"key":"7_CR10","doi-asserted-by":"crossref","unstructured":"Doukas, M., Zafeiriou, S., Sharmanska, V.: Headgan: one-shot neural head synthesis and editing. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 14398\u201314407 (2021)","DOI":"10.1109\/ICCV48922.2021.01413"},{"key":"7_CR11","unstructured":"Duan, Y., et al.: Semi-supervised learning for in-game expert-level music-to-dance translation. arXiv Preprint arXiv:2009.12763 (2020)"},{"key":"7_CR12","unstructured":"Foundation, B. Blender (software). https:\/\/www.blender.org\/"},{"key":"7_CR13","unstructured":"Goodfellow, I., Pouget-Abadie, J., et al.: Generative adversarial nets. In: Advances in Neural Information Processing Systems, pp. 2672\u20132680 (2014)"},{"key":"7_CR14","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"7_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2897824.2925975","volume":"35","author":"D Holden","year":"2016","unstructured":"Holden, D., Saito, J., Komura, T.: A deep learning framework for character motion synthesis and editing. ACM Trans. Graph. (TOG) 35, 1\u201311 (2016)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"7_CR16","unstructured":"Huang, R., Hu, H., Wu, W., Sawada, K., Zhang, M., Jiang, D.: Dance revolution: long-term dance generation with music via curriculum learning. arXiv Preprint arXiv:2006.06119 (2020)"},{"key":"7_CR17","doi-asserted-by":"crossref","unstructured":"Huang, P., Yang, F., Wang, Y.: Learning identity-invariant motion representations for cross-id face reenactment. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7084\u20137092 (2020)","DOI":"10.1109\/CVPR42600.2020.00711"},{"key":"7_CR18","first-page":"1","volume":"38","author":"H Kim","year":"2019","unstructured":"Kim, H., et al.: Neural style-preserving visual dubbing. ACM Trans. Graph. (TOG) 38, 1\u201313 (2019)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"7_CR19","unstructured":"Kingma, D., Welling, M.: Auto-encoding variational bayes. arXiv Preprint arXiv:1312.6114 (2013)"},{"key":"7_CR20","doi-asserted-by":"crossref","unstructured":"KR, P., Mukhopadhyay, R., Philip, J., Jha, A., Namboodiri, V., Jawahar, C.: Towards automatic face-to-face translation. In: Proceedings of the 27th ACM International Conference on Multimedia, pp. 1428\u20131436 (2019)","DOI":"10.1145\/3343031.3351066"},{"key":"7_CR21","unstructured":"Lee, H., et al.: Dancing to music. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"7_CR22","doi-asserted-by":"crossref","unstructured":"Li, B., Zhao, Y., Zhelun, S., Sheng, L.: Danceformer: music conditioned 3D dance generation with parametric motion transformer. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp. 1272\u20131279 (2022)","DOI":"10.1609\/aaai.v36i2.20014"},{"key":"7_CR23","doi-asserted-by":"crossref","unstructured":"Li, R., Yang, S., Ross, D., Kanazawa, A.: AI choreographer: music conditioned 3D dance generation with AIST++. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13401\u201313412 (2021)","DOI":"10.1109\/ICCV48922.2021.01315"},{"key":"7_CR24","unstructured":"Li, J., et al.: Learning to generate diverse dance motions with transformer. arXiv Preprint arXiv:2008.08171 (2020)"},{"key":"7_CR25","doi-asserted-by":"crossref","unstructured":"Loper, M., Mahmood, N., Romero, J., Pons-Moll, G., Black, M.J.: SMPL: a skinned multi-person linear model. In: Seminal Graphics Papers: Pushing the Boundaries, vol. 2, pp. 851\u2013866 (2023)","DOI":"10.1145\/3596711.3596800"},{"key":"7_CR26","doi-asserted-by":"crossref","unstructured":"M\u00fcller, M., R\u00f6der, T., Clausen, M.: Efficient content-based retrieval of motion capture data. In: ACM SIGGRAPH 2005 Papers, pp. 677\u2013685 (2005)","DOI":"10.1145\/1186822.1073247"},{"key":"7_CR27","unstructured":"Oord, A., Dieleman, S., Zen, H., et al.: Wavenet: a generative model for raw audio. arXiv Preprint arXiv:1609.03499 (2016)"},{"key":"7_CR28","unstructured":"Onuma, K., Faloutsos, C., Hodgins, J.: FMDistance: a fast and effective distance function for motion capture data. In: Eurographics (Short Papers), pp. 83\u201386 (2008)"},{"key":"7_CR29","doi-asserted-by":"crossref","unstructured":"Prajwal, K.R., Mukhopadhyay, R., Namboodiri, V.P., Jawahar, C.V.: A lip sync expert is all you need for speech to lip generation in the wild. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 484\u2013492 (2020)","DOI":"10.1145\/3394171.3413532"},{"key":"7_CR30","unstructured":"Rezende, D., Mohamed, S.: Variational inference with normalizing flows. In: International Conference on Machine Learning, pp. 1530\u20131538 (2015)"},{"key":"7_CR31","doi-asserted-by":"crossref","unstructured":"Siyao, L., et al.: Bailando: 3D dance generation by actor-critic GPT with choreographic memory. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11050\u201311059 (2022)","DOI":"10.1109\/CVPR52688.2022.01077"},{"key":"7_CR32","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning, pp. 2256\u20132265 (2015)"},{"key":"7_CR33","unstructured":"Song, Y., Ermon, S.: Generative modeling by estimating gradients of the data distribution. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"7_CR34","doi-asserted-by":"publisher","first-page":"497","DOI":"10.1109\/TMM.2020.2981989","volume":"23","author":"G Sun","year":"2020","unstructured":"Sun, G., Wong, Y., Cheng, Z., Kankanhalli, M., Geng, W., Li, X.: DeepDance: music-to-dance motion choreography with adversarial learning. IEEE Trans. Multimedia 23, 497\u2013509 (2020)","journal-title":"IEEE Trans. Multimedia"},{"key":"7_CR35","doi-asserted-by":"crossref","unstructured":"Tang, T., Jia, J., Mao, H.: Dance with melody: an LSTM-autoencoder approach to music-oriented dance synthesis. In: Proceedings of the 26th ACM International Conference on Multimedia, pp. 1598\u20131606 (2018)","DOI":"10.1145\/3240508.3240526"},{"key":"7_CR36","doi-asserted-by":"crossref","unstructured":"Tang, T., Jia, J., Mao, H.: Dance with melody: an LSTM autoencoder approach to music oriented dance synthesis. In: Proceedings of the 26th ACM International Conference on Multimedia, pp. 1598\u20131606 (2018)","DOI":"10.1145\/3240508.3240526"},{"key":"7_CR37","unstructured":"Tevet, G., Raab, S., Gordon, B., Shafir, Y., Cohen-Or, D., Bermano, A.: Human motion diffusion model. arXiv Preprint arXiv:2209.14916 (2022)"},{"key":"7_CR38","doi-asserted-by":"crossref","unstructured":"Tseng, J., Castellon, R., Liu, K.: Edge: editable dance generation from music. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 448\u2013458 (2023)","DOI":"10.1109\/CVPR52729.2023.00051"},{"key":"7_CR39","doi-asserted-by":"crossref","unstructured":"Wu, W., Zhang, Y., Li, C., Qian, C., Loy, C.: Reenactgan: learning to reenact faces via boundary transfer. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 603\u2013619 (2018)","DOI":"10.1007\/978-3-030-01246-5_37"},{"key":"7_CR40","unstructured":"Yang, S., Yang, Z., Wang, Z.: LongDanceDiff: Long-term Dance Generation with Conditional Diffusion Model. arXiv Preprint arXiv:2308.11945 (2023)"},{"key":"7_CR41","doi-asserted-by":"crossref","unstructured":"Yang, Z., Zhu, W., et al.: TransMoMo: invariance-driven unsupervised video motion retargeting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5306\u20135315 (2020)","DOI":"10.1109\/CVPR42600.2020.00535"},{"key":"7_CR42","doi-asserted-by":"crossref","unstructured":"Zakharov, E., Shysheya, A., Burkov, E., Lempitsky, V.: Few-shot adversarial learning of realistic neural talking head models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9459\u20139468 (2019)","DOI":"10.1109\/ICCV.2019.00955"},{"key":"7_CR43","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Wang, Z., Fang, C., Bui, T., Berg, T.: Dance dance generation: motion transfer for internet videos. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops (2019)","DOI":"10.1109\/ICCVW.2019.00153"},{"key":"7_CR44","doi-asserted-by":"crossref","unstructured":"Zhou, H., Liu, J., Liu, Z., Liu, Y., Wang, X.: Rotate-and-render: unsupervised photorealistic face rotation from single-view images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5911\u20135920 (2020)","DOI":"10.1109\/CVPR42600.2020.00595"},{"key":"7_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: Freenet: multi-identity face reenactment. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5326\u20135335 (2020)","DOI":"10.1109\/CVPR42600.2020.00537"},{"key":"7_CR46","unstructured":"Zhuang, W., Wang, C., et al.: Music2dance: music-driven dance generation using wavenet. arXiv Preprint arXiv:2002.03761 (2020)"}],"container-title":["Communications in Computer and Information Science","Deep Learning Theory and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04339-9_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T08:07:16Z","timestamp":1761811636000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04339-9_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783032043382","9783032043399"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04339-9_7","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"31 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DeLTA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Deep Learning Theory and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bilbao","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 June 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 June 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"delta2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/delta.scitevents.org\/?y=2025","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}