{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T13:34:55Z","timestamp":1758893695311,"version":"3.37.3"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,1,10]],"date-time":"2021-01-10T00:00:00Z","timestamp":1610236800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,1,10]],"date-time":"2021-01-10T00:00:00Z","timestamp":1610236800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,1,10]],"date-time":"2021-01-10T00:00:00Z","timestamp":1610236800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61976002"],"award-info":[{"award-number":["61976002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,10]]},"DOI":"10.1109\/icpr48806.2021.9412425","type":"proceedings-article","created":{"date-parts":[[2021,5,6]],"date-time":"2021-05-06T02:15:54Z","timestamp":1620267354000},"page":"3682-3689","source":"Crossref","is-referenced-by-count":4,"title":["Talking Face Generation via Learning Semantic and Temporal Synchronous Landmarks"],"prefix":"10.1109","author":[{"given":"Aihua","family":"Zheng","sequence":"first","affiliation":[]},{"given":"Feixia","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Hao","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Mandi","family":"Luo","sequence":"additional","affiliation":[]},{"given":"Ran","family":"He","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","first-page":"1755","article-title":"Dlib-ml: A machine learning toolkit","volume":"10","author":"king","year":"2009","journal-title":"Journal of Machine Learning Research"},{"journal-title":"Conditional generative adversarial nets","year":"2014","author":"mirza","key":"ref32"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01251-8"},{"journal-title":"You said that?","year":"2017","author":"chung","key":"ref30"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.69"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1121\/1.2229005"},{"key":"ref34","first-page":"87","article-title":"Lip reading in the wild","author":"chung","year":"2016","journal-title":"Asian Conference on Computer Vision"},{"key":"ref10","article-title":"Generation of mouthshape for a synthetic talking head","author":"simons","year":"1990","journal-title":"Proc of the Institute of Acoustics"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2006.12.001"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539823"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00917"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00252"},{"journal-title":"MINE Mutual information neural estimation","year":"2018","author":"belghazi","key":"ref15"},{"key":"ref16","first-page":"234","article-title":"U-net: Convolutional networks for biomedical image segmentation","author":"ronneberger","year":"2015","journal-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention"},{"key":"ref17","first-page":"545","article-title":"Joint face alignment and 3d face reconstruction","author":"liu","year":"2016","journal-title":"European Conference on Computer Vision"},{"key":"ref18","first-page":"365","article-title":"Image2mesh: A learning framework for single image 3d reconstruction","author":"pontes","year":"2018","journal-title":"Asian Conference on Computer Vision"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01016"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-93764-9_35"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_32"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00790"},{"key":"ref3","first-page":"2867","article-title":"Learning a high fidelity pose invariant model for high-resolution face frontalization","author":"cao","year":"2018","journal-title":"Advances in neural information processing systems"},{"journal-title":"Speech-driven facial reenactment using conditional generative adversarial networks","year":"2018","author":"jalalifar","key":"ref6"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/MASSP.1986.1165342"},{"key":"ref5","first-page":"670","article-title":"X2face: A network for controlling face generation using images, audio, and pose codes","author":"wiles","year":"2018","journal-title":"European Conference on Computer Vision"},{"journal-title":"Hierarchical cross-modal talking face generationwith dynamic pixel-wise loss","year":"2019","author":"chen","key":"ref8"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019299"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2017.2763119"},{"journal-title":"High-resolution talking face generation via mutual information approximation","year":"2018","author":"zhu","key":"ref9"},{"key":"ref1","first-page":"2672","article-title":"Generative adversarial nets","author":"goodfellow","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref20","first-page":"41","article-title":"Real-time convolutional networks for depth-based human pose estimation","author":"martinez-gonzalez","year":"2018","journal-title":"International Conference on Intelligent Robots and Systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.400"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.603"},{"journal-title":"Listen to dance Music-driven choreography generation using autoregressive encoder-decoder network","year":"2018","author":"lee","key":"ref24"},{"key":"ref23","first-page":"26","article-title":"Groovenet: Real-time music-driven dance movement generation using artificial neural networks","volume":"8","author":"alemi","year":"2017","journal-title":"Networks"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2019.8851872"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240526"}],"event":{"name":"2020 25th International Conference on Pattern Recognition (ICPR)","start":{"date-parts":[[2021,1,10]]},"location":"Milan, Italy","end":{"date-parts":[[2021,1,15]]}},"container-title":["2020 25th International Conference on Pattern Recognition (ICPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9411940\/9411911\/09412425.pdf?arnumber=9412425","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:40:53Z","timestamp":1652197253000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9412425\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,10]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/icpr48806.2021.9412425","relation":{},"subject":[],"published":{"date-parts":[[2021,1,10]]}}}