{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:17:34Z","timestamp":1775578654409,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612831","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:40Z","timestamp":1698391660000},"page":"9615-9619","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Learning and Evaluating Human Preferences for Conversational Head Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3250-4978","authenticated-orcid":false,"given":"Mohan","family":"Zhou","sequence":"first","affiliation":[{"name":"Harbin Institute of Technology, Harbin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8416-9027","authenticated-orcid":false,"given":"Yalong","family":"Bai","sequence":"additional","affiliation":[{"name":"JD Explore Academy, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1492-8286","authenticated-orcid":false,"given":"Wei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Gaoding AI, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7587-101X","authenticated-orcid":false,"given":"Ting","family":"Yao","sequence":"additional","affiliation":[{"name":"HiDream.ai, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4659-4935","authenticated-orcid":false,"given":"Tiejun","family":"Zhao","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Harbin, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5990-7307","authenticated-orcid":false,"given":"Tao","family":"Mei","sequence":"additional","affiliation":[{"name":"HiDream.ai, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00515"},{"key":"e_1_3_2_1_2_1","volume-title":"Learning to rank with nonsmooth cost functions. Advances in neural information processing systems","author":"Burges Christopher","year":"2006","unstructured":"Christopher Burges, Robert Ragno, and Quoc Le. 2006. Learning to rank with nonsmooth cost functions. Advances in neural information processing systems, Vol. 19 (2006)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1102351.1102363"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58545-7_3"},{"key":"e_1_3_2_1_5_1","volume-title":"CatBoost: gradient boosting with categorical features support. arXiv preprint arXiv:1810.11363","author":"Dorogush Anna Veronika","year":"2018","unstructured":"Anna Veronika Dorogush, Vasily Ershov, and Andrey Gulin. 2018. CatBoost: gradient boosting with categorical features support. arXiv preprint arXiv:1810.11363 (2018)."},{"key":"e_1_3_2_1_6_1","volume-title":"DAE-Talker: High Fidelity Speech-Driven Talking Face Generation with Diffusion Autoencoder. arXiv preprint arXiv:2303.17550","author":"Du Chenpng","year":"2023","unstructured":"Chenpng Du, Qi Chen, Tianyu He, Xu Tan, Xie Chen, Kai Yu, Sheng Zhao, and Jiang Bian. 2023. DAE-Talker: High Fidelity Speech-Driven Talking Face Generation with Diffusion Autoencoder. arXiv preprint arXiv:2303.17550 (2023)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00854"},{"key":"e_1_3_2_1_8_1","volume-title":"Emotionally Enhanced Talking Face Generation. arXiv preprint arXiv:2303.11548","author":"Goyal Sahil","year":"2023","unstructured":"Sahil Goyal, Shagun Uppal, Sarthak Bhagat, Yi Yu, Yifang Yin, and Rajiv Ratn Shah. 2023. Emotionally Enhanced Talking Face Generation. arXiv preprint arXiv:2303.11548 (2023)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01810"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00151"},{"key":"e_1_3_2_1_11_1","volume-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861","author":"Howard Andrew G","year":"2017","unstructured":"Andrew G Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. 2017. Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528233.3530745"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20086-1_20"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00338"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19836-6_7"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.5555\/3104322.3104425"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01975"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413532"},{"key":"e_1_3_2_1_19_1","volume-title":"Anna Veronika Dorogush, and Andrey Gulin","author":"Prokhorenkova Liudmila","year":"2018","unstructured":"Liudmila Prokhorenkova, Gleb Gusev, Aleksandr Vorobev, Anna Veronika Dorogush, and Andrey Gulin. 2018. CatBoost: unbiased boosting with categorical features. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00197"},{"key":"e_1_3_2_1_21_1","volume-title":"Stavros Petridis, and Maja Pantic.","author":"Stypu\u0142kowski Micha\u0142","year":"2023","unstructured":"Micha\u0142 Stypu\u0142kowski, Konstantinos Vougioukas, Sen He, Maciej Zike ba, Stavros Petridis, and Maja Pantic. 2023. Diffused heads: Diffusion models beat gans on talking-face generation. arXiv preprint arXiv:2301.03396 (2023)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02011"},{"key":"e_1_3_2_1_23_1","volume-title":"Explicitly controllable 3d-aware portrait generation. arXiv preprint arXiv:2209.05434","author":"Tang Junshu","year":"2022","unstructured":"Junshu Tang, Bo Zhang, Binxin Yang, Ting Zhang, Dong Chen, Lizhuang Ma, and Fang Wen. 2022. Explicitly controllable 3d-aware portrait generation. arXiv preprint arXiv:2209.05434 (2022)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01251-8"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01408"},{"key":"e_1_3_2_1_26_1","volume-title":"Audio2head: Audio-driven one-shot talking-head generation with natural head motion. arXiv preprint arXiv:2107.09293","author":"Wang Suzhen","year":"2021","unstructured":"Suzhen Wang, Lincheng Li, Yu Ding, Changjie Fan, and Xin Yu. 2021. Audio2head: Audio-driven one-shot talking-head generation with natural head motion. arXiv preprint arXiv:2107.09293 (2021)."},{"key":"e_1_3_2_1_27_1","volume-title":"DFA-NeRF: Personalized talking head generation via disentangled face attributes neural rendering. arXiv preprint arXiv:2201.00791","author":"Yao Shunyu","year":"2022","unstructured":"Shunyu Yao, RuiZhe Zhong, Yichao Yan, Guangtao Zhai, and Xiaokang Yang. 2022. DFA-NeRF: Personalized talking head generation via disentangled face attributes neural rendering. arXiv preprint arXiv:2201.00791 (2022)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00825"},{"key":"e_1_3_2_1_29_1","first-page":"22451","article-title":"FNeVR: Neural volume rendering for face animation","volume":"35","author":"Zeng Bohan","year":"2022","unstructured":"Bohan Zeng, Boyu Liu, Hong Li, Xuhui Liu, Jianzhuang Liu, Dapeng Chen, Wei Peng, and Baochang Zhang. 2022. FNeVR: Neural volume rendering for face animation. Advances in Neural Information Processing Systems, Vol. 35 (2022), 22451--22462.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00384"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 13545--13555","author":"Zheng Yufeng","year":"2022","unstructured":"Yufeng Zheng, Victoria Fern\u00e1ndez Abrevaya, Marcel C B\u00fchler, Xu Chen, Michael J Black, and Otmar Hilliges. 2022. Im avatar: Implicit morphable head avatars from videos. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 13545--13555."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00416"},{"key":"e_1_3_2_1_33_1","unstructured":"Mohan Zhou Yalong Bai Wei Zhang Ting Yao and Tiejun Zhao. 2023 a. Interactive Conversational Head Generation. arxiv: 2307.02090 [cs.CV]"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19839-7_8"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095084"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612831","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612831","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:08:26Z","timestamp":1755821306000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612831"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":35,"alternative-id":["10.1145\/3581783.3612831","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612831","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}