{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T15:49:04Z","timestamp":1775144944470,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T00:00:00Z","timestamp":1634428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,17]]},"DOI":"10.1145\/3474085.3475318","type":"proceedings-article","created":{"date-parts":[[2021,10,18]],"date-time":"2021-10-18T04:59:18Z","timestamp":1634533158000},"page":"1739-1747","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":32,"title":["Towards Realistic Visual Dubbing with Heterogeneous Sources"],"prefix":"10.1145","author":[{"given":"Tianyi","family":"Xie","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"given":"Liucheng","family":"Liao","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, China"}]},{"given":"Cheng","family":"Bi","sequence":"additional","affiliation":[{"name":"ByteDance AI Lab, Shanghai, China"}]},{"given":"Benlai","family":"Tang","sequence":"additional","affiliation":[{"name":"ByteDance AI Lab, Shanghai, China"}]},{"given":"Xiang","family":"Yin","sequence":"additional","affiliation":[{"name":"ByteDance AI Lab, Shanghai, China"}]},{"given":"Jianfei","family":"Yang","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"given":"Mingjie","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Guelph &amp; Memorial University of Newfoundland, Guelph, ON, Canada"}]},{"given":"Jiali","family":"Yao","sequence":"additional","affiliation":[{"name":"ByteDance AI Lab, Shanghai, China"}]},{"given":"Yang","family":"Zhang","sequence":"additional","affiliation":[{"name":"ByteDance AI Lab, Beijing, China"}]},{"given":"Zejun","family":"Ma","sequence":"additional","affiliation":[{"name":"ByteDance AI Lab, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Andrew Senior, Oriol Vinyals, and Andrew Zisserman.","author":"Afouras Triantafyllos","year":"2018"},{"key":"e_1_3_2_2_2_1","volume-title":"2020 a. What comprises a good talking-head video generation?: A Survey and Benchmark. arXiv preprint arXiv:2005.03201","author":"Chen Lele","year":"2020"},{"key":"e_1_3_2_2_3_1","volume-title":"2020 b. Talking-head Generation with Rhythmic Head Motion. arXiv preprint arXiv:2007.08547","author":"Chen Lele","year":"2020"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00802"},{"key":"e_1_3_2_2_5_1","volume-title":"Lip Reading in the Wild. In Asian Conference on Computer Vision .","author":"Chung J. S."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3306346.3323028"},{"key":"e_1_3_2_2_7_1","volume-title":"Marionette: Few-shot face reenactment preserving identity of unseen targets. arXiv preprint arXiv:1911.08139","author":"Ha Sungjoo","year":"2019"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295408"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.167"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01150-y"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_43"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3355089.3356500"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201283"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351066"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"crossref","unstructured":"Omkar M Parkhi Andrea Vedaldi and Andrew Zisserman. 2015. Deep face recognition. (2015).  Omkar M Parkhi Andrea Vedaldi and Andrew Zisserman. 2015. Deep face recognition. (2015).","DOI":"10.5244\/C.29.41"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413532"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_2_2_20_1","volume-title":"3rd International Conference on Learning Representations, ICLR","author":"Simonyan Karen","year":"2015"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073640"},{"key":"e_1_3_2_2_22_1","volume-title":"Neural voice puppetry: Audio-driven facial reenactment. arXiv preprint arXiv:1912.05566","author":"Thies Justus","year":"2019"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.5555\/3326943.3327049"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00917"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819861"},{"key":"e_1_3_2_2_26_1","unstructured":"Wayne Wu Chen Qian Shuo Yang Quan Wang Yici Cai and Qiang Zhou. 2018. Look at Boundary: A Boundary-Aware Face Alignment Algorithm. In CVPR.  Wayne Wu Chen Qian Shuo Yang Quan Wang Yici Cai and Qiang Zhou. 2018. Look at Boundary: A Boundary-Aware Face Alignment Algorithm. In CVPR."},{"key":"e_1_3_2_2_27_1","unstructured":"Yongzhe Yan Xavier Naturel Thierry Chateau Stefan Duffner Christophe Garcia and Christophe Blanc. 2018. A survey of deep facial landmark detection. In RFIAP.  Yongzhe Yan Xavier Naturel Thierry Chateau Stefan Duffner Christophe Garcia and Christophe Blanc. 2018. A survey of deep facial landmark detection. In RFIAP."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00457"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2019.00089"},{"key":"e_1_3_2_2_30_1","volume-title":"Fast Bi-layer Neural Synthesis of One-Shot Realistic Head Avatars. In European Conference on Computer Vision. Springer, 524--540","author":"Zakharov Egor","year":"2020"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00955"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3414685.3417774"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"}],"event":{"name":"MM '21: ACM Multimedia Conference","location":"Virtual Event China","acronym":"MM '21","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 29th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475318","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3474085.3475318","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:49:18Z","timestamp":1750193358000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475318"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,17]]},"references-count":34,"alternative-id":["10.1145\/3474085.3475318","10.1145\/3474085"],"URL":"https:\/\/doi.org\/10.1145\/3474085.3475318","relation":{},"subject":[],"published":{"date-parts":[[2021,10,17]]},"assertion":[{"value":"2021-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}