{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T05:09:08Z","timestamp":1781586548082,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","funder":[{"name":"JSPS KAKENHI","award":["JP25K15165"],"award-info":[{"award-number":["JP25K15165"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1145\/3731715.3734426","type":"proceedings-article","created":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T18:29:43Z","timestamp":1750876183000},"page":"2043-2047","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Audio-Visual Driven Compression for Low-Bitrate Talking Head Videos"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-1126-0498","authenticated-orcid":false,"given":"Riku","family":"Takahashi","sequence":"first","affiliation":[{"name":"Hosei University, Tokyo, Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6324-9291","authenticated-orcid":false,"given":"Ryugo","family":"Morita","sequence":"additional","affiliation":[{"name":"Hosei University, Tokyo, Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5078-0522","authenticated-orcid":false,"given":"Jinjia","family":"Zhou","sequence":"additional","affiliation":[{"name":"Hosei University, Tokyo, Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,6,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3101953"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/DCC52660.2022.00009"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-54427-4_19"},{"key":"e_1_3_2_1_4_1","volume-title":"Image quality assessment: Unifying structure and texture similarity","author":"Ding Keyan","year":"2020","unstructured":"Keyan Ding, Kede Ma, Shiqi Wang, and Eero P Simoncelli. 2020. Image quality assessment: Unifying structure and texture similarity. IEEE transactions on pattern analysis and machine intelligence, Vol. 44, 5 (2020), 2567--2581."},{"key":"e_1_3_2_1_5_1","volume-title":"LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control. arXiv preprint arXiv:2407.03168","author":"Guo Jianzhu","year":"2024","unstructured":"Jianzhu Guo, Dingyun Zhang, Xiaoqiang Liu, Zhizhou Zhong, Yuan Zhang, Pengfei Wan, and Di Zhang. 2024. LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control. arXiv preprint arXiv:2407.03168 (2024)."},{"key":"e_1_3_2_1_6_1","volume-title":"Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_7_1","volume-title":"Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114","author":"Kingma Diederik P","year":"2013","unstructured":"Diederik P Kingma. 2013. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP46576.2022.10458867"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP49359.2023.10222205"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414731"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413532"},{"key":"e_1_3_2_1_12_1","volume-title":"International conference on machine learning. PMLR, 28492--28518","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust speech recognition via large-scale weak supervision. In International conference on machine learning. PMLR, 28492--28518."},{"key":"e_1_3_2_1_13_1","volume-title":"U-net: Convolutional networks for biomedical image segmentation. In Medical image computing and computer-assisted intervention--MICCAI 2015: 18th international conference","author":"Ronneberger Olaf","year":"2015","unstructured":"Olaf Ronneberger, Philipp Fischer, and Thomas Brox. 2015. U-net: Convolutional networks for biomedical image segmentation. In Medical image computing and computer-assisted intervention--MICCAI 2015: 18th international conference, Munich, Germany, October 5--9, 2015, proceedings, part III 18. Springer, 234--241."},{"key":"e_1_3_2_1_14_1","volume-title":"Learning audio-visual speech representation by masked multimodal cluster prediction. arXiv preprint arXiv:2201.02184","author":"Shi Bowen","year":"2022","unstructured":"Bowen Shi, Wei-Ning Hsu, Kushal Lakhotia, and Abdelrahman Mohamed. 2022. Learning audio-visual speech representation by masked multimodal cluster prediction. arXiv preprint arXiv:2201.02184 (2022)."},{"key":"e_1_3_2_1_15_1","volume-title":"3rd International Conference on Learning Representations, ICLR","author":"Simonyan Karen","year":"2015","unstructured":"Karen Simonyan and Andrew Zisserman. 2015. Very Deep Convolutional Networks for Large-Scale Image Recognition. In 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.). http:\/\/arxiv.org\/abs\/1409.1556"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2012.2221191"},{"key":"e_1_3_2_1_17_1","volume-title":"Bidirectional Learned Facial Animation Codec for Low Bitrate Talking Head Videos. In 2025 Data Compression Conference (DCC). IEEE.","author":"Takahashi Riku","year":"2025","unstructured":"Riku Takahashi, Ryugo Morita, Fuma Kimishima, Kosuke Iwama, and Jinjia Zhou. 2025. Bidirectional Learned Facial Animation Codec for Low Bitrate Talking Head Videos. In 2025 Data Compression Conference (DCC). IEEE. in press."},{"key":"e_1_3_2_1_18_1","volume-title":"Towards Real-World Blind Face Restoration with Generative Facial Prior. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Wang Xintao","year":"2021","unstructured":"Xintao Wang, Yu Li, Honglun Zhang, and Ying Shan. 2021. Towards Real-World Blind Face Restoration with Generative Facial Prior. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00607"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_1_21_1","volume-title":"Musetalk: Real-time high quality lip synchronization with latent space inpainting. arXiv preprint arXiv:2410.10122","author":"Zhang Yue","year":"2024","unstructured":"Yue Zhang, Minhao Liu, Zhaokang Chen, Bin Wu, Yubin Zeng, Chao Zhan, Yingjie He, Junxin Huang, and Wenjiang Zhou. 2024. Musetalk: Real-time high quality lip synchronization with latent space inpainting. arXiv preprint arXiv:2410.10122 (2024)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20071-7_38"}],"event":{"name":"ICMR '25: International Conference on Multimedia Retrieval","location":"Chicago IL USA","acronym":"ICMR '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2025 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731715.3734426","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T04:08:14Z","timestamp":1755749294000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731715.3734426"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":22,"alternative-id":["10.1145\/3731715.3734426","10.1145\/3731715"],"URL":"https:\/\/doi.org\/10.1145\/3731715.3734426","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]},"assertion":[{"value":"2025-06-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}