{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,15]],"date-time":"2025-08-15T00:59:50Z","timestamp":1755219590024,"version":"3.43.0"},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T00:00:00Z","timestamp":1748217600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T00:00:00Z","timestamp":1748217600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,26]]},"DOI":"10.1109\/fg61629.2025.11099188","type":"proceedings-article","created":{"date-parts":[[2025,8,6]],"date-time":"2025-08-06T17:55:00Z","timestamp":1754502900000},"page":"1-9","source":"Crossref","is-referenced-by-count":1,"title":["CDCGM: Composition-specified Dance Choreography Generation from Music"],"prefix":"10.1109","author":[{"given":"Ryo","family":"Ishii","sequence":"first","affiliation":[{"name":"Human Informatics Laboratories, NTT Corporation,Kanagawa,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shinichiro","family":"Eitoku","sequence":"additional","affiliation":[{"name":"Human Informatics Laboratories, NTT Corporation,Kanagawa,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Louis-Philippe","family":"Morency","sequence":"additional","affiliation":[{"name":"Language Technologies Institute Carnegie Mellon University,PA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547797"},{"key":"ref2","article-title":"Codified audio language modeling learns useful representations for music information retrieval","author":"Castellon","year":"2021","journal-title":"arXiv preprint arXiv:2107.05677"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459940"},{"key":"ref4","article-title":"Jukebox: A generative model for music","author":"Dhariwal","year":"2020","journal-title":"arXiv preprint arXiv:2005.00341"},{"article-title":"Sheet sage: Lead sheets from music audio","volume-title":"Proc. ISMIR Late-Breaking and Demo","author":"Donahue","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2022.108310"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2011.73"},{"key":"ref8","article-title":"Imagen video: High definition video generation with diffusion models","author":"Ho","year":"2022","journal-title":"arXiv preprint arXiv:2210.02303"},{"key":"ref9","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747838"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i7.25996"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-012-1288-5"},{"key":"ref13","article-title":"Learning to generate diverse dance motions with transformer","author":"Li","year":"2020","journal-title":"arXiv preprint arXiv:2008.08171"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01315"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3596711.3596800"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2011.2181492"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01080"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_28"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref21","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume":"35","author":"Saharia","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2014.131"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3407659"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01077"},{"key":"ref25","first-page":"2256","article-title":"Deep unsupervised learning using nonequilibrium thermodynamics","volume-title":"In International conference on machine learning","author":"Sohl-Dickstein"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3528223.3530090"},{"key":"ref27","article-title":"Human motion diffusion model","author":"Tevet","year":"2022","journal-title":"arXiv preprint arXiv:2209.14916"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00051"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3355414"}],"event":{"name":"2025 IEEE 19th International Conference on Automatic Face and Gesture Recognition (FG)","start":{"date-parts":[[2025,5,26]]},"location":"Tampa\/Clearwater, FL, USA","end":{"date-parts":[[2025,5,30]]}},"container-title":["2025 IEEE 19th International Conference on Automatic Face and Gesture Recognition (FG)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11099084\/11099070\/11099188.pdf?arnumber=11099188","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,7]],"date-time":"2025-08-07T05:21:05Z","timestamp":1754544065000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11099188\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,26]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/fg61629.2025.11099188","relation":{},"subject":[],"published":{"date-parts":[[2025,5,26]]}}}