{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T16:23:33Z","timestamp":1771950213840,"version":"3.50.1"},"reference-count":43,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,1,10]],"date-time":"2021-01-10T00:00:00Z","timestamp":1610236800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,1,10]],"date-time":"2021-01-10T00:00:00Z","timestamp":1610236800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,1,10]],"date-time":"2021-01-10T00:00:00Z","timestamp":1610236800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["JQ18017"],"award-info":[{"award-number":["JQ18017"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61976002"],"award-info":[{"award-number":["61976002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003995","name":"Natural Science Foundation of Anhui","doi-asserted-by":"publisher","award":["KJ2019A0033"],"award-info":[{"award-number":["KJ2019A0033"]}],"id":[{"id":"10.13039\/501100003995","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,10]]},"DOI":"10.1109\/icpr48806.2021.9412698","type":"proceedings-article","created":{"date-parts":[[2021,5,6]],"date-time":"2021-05-06T02:15:54Z","timestamp":1620267354000},"page":"3574-3581","source":"Crossref","is-referenced-by-count":5,"title":["Let's Play Music: Audio-Driven Performance Video Generation"],"prefix":"10.1109","author":[{"given":"Hao","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Yi","family":"Li","sequence":"additional","affiliation":[]},{"given":"Feixia","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Aihua","family":"Zheng","sequence":"additional","affiliation":[]},{"given":"Ran","family":"He","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","first-page":"349","article-title":"Deep cross-modal audiovisual generation","author":"chen","year":"0","journal-title":"Proceedings of the on Thematic Workshops of ACM Multimedia 2017"},{"key":"ref38","first-page":"694","article-title":"Perceptual losses for realtime style transfer and super-resolution","author":"johnson","year":"0","journal-title":"European Conference on Computer Vision"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00359"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073640"},{"key":"ref31","article-title":"Dancing to music","author":"lee","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref30","author":"oord","year":"2016","journal-title":"WaveNet A Generative Model for Raw Audio"},{"key":"ref37","author":"cao","year":"2018","journal-title":"OpenPose Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields"},{"key":"ref36","author":"chung","year":"2014","journal-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling"},{"key":"ref35","author":"simonyan","year":"2014","journal-title":"Very Deep Convolutional Networks for Large-scale Image Recognition"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00899"},{"key":"ref10","article-title":"Video-to-video synthesis","author":"wang","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref40","first-page":"3","article-title":"Constant-q transform toolbox for music processing","author":"sch\u00f6rkhuber","year":"0","journal-title":"Sound and Music Computing Conference"},{"key":"ref11","first-page":"234","article-title":"U-net: Convolutional networks for biomedical image segmentation","author":"ronneberger","year":"0","journal-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00870"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref15","first-page":"2672","article-title":"Generative adversarial nets","author":"goodfellow","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2017.2763119"},{"key":"ref17","first-page":"7355","article-title":"Adversarial discriminative heterogeneous face recognition","author":"song","year":"0","journal-title":"Conference on Artificial Intelligence"},{"key":"ref18","author":"zhu","year":"2020","journal-title":"Deep audio-visual learning A survey"},{"key":"ref19","article-title":"Everybody's talkin': Let me talk as you want","volume":"abs 2001 5201","author":"song","year":"2020","journal-title":"CoRR"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00790"},{"key":"ref4","first-page":"406","article-title":"Pose guided person image generation","author":"ma","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref27","author":"lee","year":"2018","journal-title":"Listen to dance Music-driven choreography generation using autoregressive encoder-decoder network"},{"key":"ref3","first-page":"5933","author":"chan","year":"2019","journal-title":"Everybody dance now"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00802"},{"key":"ref29","author":"zhuang","year":"2020","journal-title":"Music2dance Music-driven dance generation using wavenet"},{"key":"ref5","author":"jalalifar","year":"2018","journal-title":"Speech-driven facial reenactment using conditional generative adversarial networks"},{"key":"ref8","author":"kumar","year":"2017","journal-title":"ObamaNet Photo-realistic lip-sync from text"},{"key":"ref7","author":"li","year":"2019","journal-title":"Layoutgan Generating graphic layouts with wireframe discriminators"},{"key":"ref2","author":"song","year":"2018","journal-title":"Geometry-aware face completion and editing"},{"key":"ref9","article-title":"Semi-supervised classification with graph convolutional networks","author":"kipf","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240612"},{"key":"ref20","article-title":"Talking face generation by adversarially disentangled audio-visual representation","volume":"abs 1807 7860","author":"zhou","year":"2018","journal-title":"CoRR"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58589-1_42"},{"key":"ref21","author":"zhu","year":"2018","journal-title":"High-resolution talking face generation via mutual information approximation"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819861"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00037"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"ref23","author":"zhou","year":"2017","journal-title":"Visual to sound Generating natural sound for videos in the wild"},{"key":"ref26","first-page":"26","article-title":"Groovenet: Real-time music-driven dance movement generation using artificial neural networks","volume":"8","author":"alemi","year":"2017","journal-title":"Networks"},{"key":"ref43","author":"wang","year":"2018","journal-title":"Video-to-Video Synthesis"},{"key":"ref25","first-page":"501","article-title":"Example-based automatic music-driven conventional dance motion synthesis","volume":"18","author":"fan","year":"2011","journal-title":"IEEE Transactions on Visualization and Computer Graphics"}],"event":{"name":"2020 25th International Conference on Pattern Recognition (ICPR)","location":"Milan, Italy","start":{"date-parts":[[2021,1,10]]},"end":{"date-parts":[[2021,1,15]]}},"container-title":["2020 25th International Conference on Pattern Recognition (ICPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9411940\/9411911\/09412698.pdf?arnumber=9412698","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:40:53Z","timestamp":1652197253000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9412698\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,10]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/icpr48806.2021.9412698","relation":{},"subject":[],"published":{"date-parts":[[2021,1,10]]}}}