{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,8]],"date-time":"2026-02-08T06:04:08Z","timestamp":1770530648203,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Foundation of China","award":["61972163"],"award-info":[{"award-number":["61972163"]}]},{"name":"Guangdong Provincial Key Laboratory of Human Digital Twin","award":["2022B1212010004"],"award-info":[{"award-number":["2022B1212010004"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100003453","name":"Natural Science Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2022A1515011555, 2023A1515012568"],"award-info":[{"award-number":["2022A1515011555, 2023A1515012568"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100003453","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681017","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"3800-3808","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Self-Supervised Emotion Representation Disentanglement for Speech-Preserving Facial Expression Manipulation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0731-4585","authenticated-orcid":false,"given":"Zhihua","family":"Xu","sequence":"first","affiliation":[{"name":"Guangdong University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5848-5624","authenticated-orcid":false,"given":"Tianshui","family":"Chen","sequence":"additional","affiliation":[{"name":"Guangdong University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8336-5109","authenticated-orcid":false,"given":"Zhijing","family":"Yang","sequence":"additional","affiliation":[{"name":"Guangdong University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4733-306X","authenticated-orcid":false,"given":"Chunmei","family":"Qing","sequence":"additional","affiliation":[{"name":"South China University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9413-6528","authenticated-orcid":false,"given":"Yukai","family":"Shi","sequence":"additional","affiliation":[{"name":"Guangdong University of Technology, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2248-3755","authenticated-orcid":false,"given":"Liang","family":"Lin","sequence":"additional","affiliation":[{"name":"Sun Yat-sen University, Guangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"KDD workshop","volume":"10","author":"Berndt Donald J","year":"1994","unstructured":"Donald J Berndt and James Clifford. 1994. Using dynamic time warping to find patterns in time series. In KDD workshop, Vol. 10. Seattle, WA, USA:, 359--370."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/311535.311556"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00694"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00821"},{"key":"e_1_3_2_1_5_1","volume-title":"Workshop on Multi-view Lip-reading, ACCV.","author":"Chung J. S.","unstructured":"J. S. Chung and A. Zisserman. 2016. Out of time: automated lip sync in the wild. In Workshop on Multi-view Lip-reading, ACCV."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022627411411"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00482"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBIOM.2021.3049576"},{"key":"e_1_3_2_1_9_1","volume-title":"Improved Residual Networks for Image and Video Recognition. arXiv preprint arXiv:2004.04989","author":"Duta Ionut Cosmin","year":"2020","unstructured":"Ionut Cosmin Duta, Li Liu, Fan Zhu, and Ling Shao. 2020. Improved Residual Networks for Image and Video Recognition. arXiv preprint arXiv:2004.04989 (2020)."},{"key":"e_1_3_2_1_10_1","first-page":"5","article-title":"Facial action coding system: a technique for the measurement of facial movement","volume":"3","author":"Friesen E","year":"1978","unstructured":"E Friesen and Paul Ekman. 1978. Facial action coding system: a technique for the measurement of facial movement. Palo Alto, Vol. 3, 2 (1978), 5.","journal-title":"Palo Alto"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02069"},{"key":"e_1_3_2_1_12_1","unstructured":"Ian J. Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron C. Courville and Yoshua Bengio. 2014. Generative Adversarial Nets. In NIPS."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01912"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_15_1","volume-title":"Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_16_1","volume-title":"Alias-Free Generative Adversarial Networks. In Conference on Neural Information Processing Systems.","author":"Karras Tero","year":"2021","unstructured":"Tero Karras, Miika Aittala, Samuli Laine, Erik H\"ark\u00f6nen, Janne Hellsten, Jaakko Lehtinen, and Timo Aila. 2021. Alias-Free Generative Adversarial Networks. In Conference on Neural Information Processing Systems."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00590"},{"key":"e_1_3_2_1_20_1","first-page":"1","article-title":"Neural style-preserving visual dubbing","volume":"38","author":"Kim Hyeongwoo","year":"2019","unstructured":"Hyeongwoo Kim, Mohamed Elgharib, Michael Zollh\u00f6fer, Hans-Peter Seidel, Thabo Beeler, Christian Richardt, and Christian Theobalt. 2019. Neural style-preserving visual dubbing. ACM Transactions on Graphics, Vol. 38, 6 (2019), 1--13.","journal-title":"ACM Transactions on Graphics"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0196391"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00049"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01822"},{"key":"e_1_3_2_1_24_1","volume-title":"Proceedings of the 28th ACM International Conference on Multimedia","author":"Prajwal K R","unstructured":"K R Prajwal, Rudrabha Mukhopadhyay, Vinay P. Namboodiri, and C.V. Jawahar. 2020. A Lip Sync Expert Is All You Need for Speech to Lip Generation In the Wild. In Proceedings of the 28th ACM International Conference on Multimedia (Seattle, WA, USA). Association for Computing Machinery, New York, NY, USA, 484--492."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01210-3"},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 8748--8763."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01350"},{"key":"e_1_3_2_1_28_1","volume-title":"First Order Motion Model for Image Animation. In Conference on Neural Information Processing Systems.","author":"Siarohin Aliaksandr","year":"2019","unstructured":"Aliaksandr Siarohin, St\u00e9phane Lathuili\u00e8re, Sergey Tulyakov, Elisa Ricci, and Nicu Sebe. 2019. First Order Motion Model for Image Animation. In Conference on Neural Information Processing Systems."},{"key":"e_1_3_2_1_29_1","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition. CoRR","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. CoRR, Vol. abs\/1409.1556 (2014)."},{"key":"e_1_3_2_1_30_1","volume-title":"European Conference on Computer Vision. Springer, 104--120","author":"Solanki Girish Kumar","year":"2022","unstructured":"Girish Kumar Solanki and Anastasios Roussos. 2022. Deep semantic manipulation of facial videos. In European Conference on Computer Vision. Springer, 104--120."},{"key":"e_1_3_2_1_31_1","volume-title":"Continuously Controllable Facial Expression Editing in Talking Face Videos","author":"Sun Zhiyao","year":"2023","unstructured":"Zhiyao Sun, Yu-Hui Wen, Tian Lv, Yanan Sun, Ziyang Zhang, Yaoyuan Wang, and Yong-Jin Liu. 2023. Continuously Controllable Facial Expression Editing in Talking Face Videos. IEEE Transactions on Affective Computing (2023)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093474"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550469.3555382"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58589-1_42"},{"key":"e_1_3_2_1_36_1","volume-title":"International Conference on Learning Representations.","author":"Wang Yaohui","year":"2022","unstructured":"Yaohui Wang, Di Yang, Francois Bremond, and Antitza Dantcheva. 2022. Latent Image Animator: Learning to Animate Images via Latent Space Navigation. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_37_1","unstructured":"Less Wright. 2019. Ranger - a synergistic optimizer. https:\/\/github.com\/lessw2020\/Ranger-Deep-Learning-Optimizer."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3599730"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01920"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19790-1_6"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i4.16465"},{"key":"e_1_3_2_1_43_1","volume-title":"Face Reenactment Based on Motion Field Representation. In International Conference on Brain Inspired Cognitive Systems. 354--364","author":"Zheng Si","year":"2023","unstructured":"Si Zheng, Junbin Chen, Zhijing Yang, Tianshui Chen, and Yongyi Lu. 2023. Face Reenactment Based on Motion Field Representation. In International Conference on Brain Inspired Cognitive Systems. 354--364."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681017","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681017","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:36Z","timestamp":1750295856000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681017"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":43,"alternative-id":["10.1145\/3664647.3681017","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681017","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}