{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T15:07:17Z","timestamp":1742915237518,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":32,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819601219"},{"type":"electronic","value":"9789819601226"}],"license":[{"start":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T00:00:00Z","timestamp":1731369600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T00:00:00Z","timestamp":1731369600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-0122-6_38","type":"book-chapter","created":{"date-parts":[[2024,11,16]],"date-time":"2024-11-16T18:23:49Z","timestamp":1731781429000},"page":"443-455","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Audio-Driven Face Photo-Sketch Video Generation"],"prefix":"10.1007","author":[{"given":"Siyue","family":"Zhou","sequence":"first","affiliation":[]},{"given":"Qun","family":"Guan","sequence":"additional","affiliation":[]},{"given":"Chunlei","family":"Peng","sequence":"additional","affiliation":[]},{"given":"Decheng","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Yu","family":"Zheng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,12]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Blanz, V., Vetter, T.: A morphable model for the synthesis of 3d faces. In: Seminal Graphics Papers: Pushing the Boundaries, vol. 2, pp. 157\u2013164 (2023)","key":"38_CR1","DOI":"10.1145\/3596711.3596730"},{"doi-asserted-by":"crossref","unstructured":"Chen, L., Maddox, R.K., Duan, Z., Xu, C.: Hierarchical cross-modal talking face generation with dynamic pixel-wise loss. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 7832\u20137841 (2019)","key":"38_CR2","DOI":"10.1109\/CVPR.2019.00802"},{"doi-asserted-by":"crossref","unstructured":"Cheng, K., et al.: Videoretalking: audio-based lip synchronization for talking head video editing in the wild. In: SIGGRAPH Asia 2022 Conference Papers, pp.\u00a01\u20139 (2022)","key":"38_CR3","DOI":"10.1145\/3550469.3555399"},{"doi-asserted-by":"crossref","unstructured":"Deng, J., Guo, J., Xue, N., Zafeiriou, S.: Arcface: additive angular margin loss for deep face recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4690\u20134699 (2019)","key":"38_CR4","DOI":"10.1109\/CVPR.2019.00482"},{"doi-asserted-by":"crossref","unstructured":"Deng, Y., Yang, J., Xu, S., Chen, D., Jia, Y., Tong, X.: Accurate 3d face reconstruction with weakly-supervised learning: from single image to image set. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (2019)","key":"38_CR5","DOI":"10.1109\/CVPRW.2019.00038"},{"unstructured":"Doersch, C.: Tutorial on variational autoencoders. arXiv preprint arXiv:1606.05908 (2016)","key":"38_CR6"},{"doi-asserted-by":"crossref","unstructured":"Garrido, P., et al.: Vdub: modifying face video of actors for plausible visual alignment to a dubbed audio track. In: Computer Graphics Forum, vol.\u00a034, pp. 193\u2013204. Wiley Online Library (2015)","key":"38_CR7","DOI":"10.1111\/cgf.12552"},{"unstructured":"Goodfellow, I., et al.: Generative adversarial nets. Advances in neural information processing systems 27 (2014)","key":"38_CR8"},{"doi-asserted-by":"crossref","unstructured":"Ji, X., et al.: Eamm: one-shot emotional talking face via audio-based emotion-aware motion model. In: ACM SIGGRAPH 2022 Conference Proceedings, pp. 1\u201310 (2022)","key":"38_CR9","DOI":"10.1145\/3528233.3530745"},{"issue":"4","key":"38_CR10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073658","volume":"36","author":"T Karras","year":"2017","unstructured":"Karras, T., Aila, T., Laine, S., Herva, A., Lehtinen, J.: Audio-driven facial animation by joint end-to-end learning of pose and emotion. ACM Trans. Graph. (TOG) 36(4), 1\u201312 (2017)","journal-title":"ACM Trans. Graph. (TOG)"},{"unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)","key":"38_CR11"},{"unstructured":"KR, P., Mukhopadhyay, R., Philip, J., Jha, A., Namboodiri, V., Jawahar, C.: Towards automatic face-to-face translation. In: Proceedings of the 27th ACM International Conference on Multimedia, pp. 1428\u20131436 (2019)","key":"38_CR12"},{"issue":"2","key":"38_CR13","doi-asserted-by":"publisher","first-page":"1447","DOI":"10.1007\/s11071-019-05170-8","volume":"98","author":"P Liu","year":"2019","unstructured":"Liu, P., Yu, H., Cang, S.: Adaptive neural network tracking control for underactuated systems with matched and mismatched disturbances. Nonlinear Dyn. 98(2), 1447\u20131464 (2019)","journal-title":"Nonlinear Dyn."},{"doi-asserted-by":"crossref","unstructured":"Lu, Y., Wu, S., Tai, Y.W., Tang, C.K.: Image generation from sketch constraint using contextual gan. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 205\u2013220 (2018)","key":"38_CR14","DOI":"10.1007\/978-3-030-01270-0_13"},{"issue":"9","key":"38_CR15","doi-asserted-by":"publisher","first-page":"2678","DOI":"10.1109\/TIP.2011.2131660","volume":"20","author":"ND Narvekar","year":"2011","unstructured":"Narvekar, N.D., Karam, L.J.: A no-reference image blur metric based on the cumulative probability of blur detection (cpbd). IEEE Trans. Image Process. 20(9), 2678\u20132683 (2011)","journal-title":"IEEE Trans. Image Process."},{"doi-asserted-by":"crossref","unstructured":"Prajwal, K., Mukhopadhyay, R., Namboodiri, V.P., Jawahar, C.: A lip sync expert is all you need for speech to lip generation in the wild. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 484\u2013492 (2020)","key":"38_CR16","DOI":"10.1145\/3394171.3413532"},{"issue":"9","key":"38_CR17","doi-asserted-by":"publisher","first-page":"3487","DOI":"10.1109\/JSEN.2018.2888815","volume":"19","author":"L Sun","year":"2018","unstructured":"Sun, L., Zhao, C., Yan, Z., Liu, P., Duckett, T., Stolkin, R.: A novel weakly-supervised approach for rgb-d-based nuclear waste object detection. IEEE Sens. J. 19(9), 3487\u20133500 (2018)","journal-title":"IEEE Sens. J."},{"unstructured":"Tang, X., Wang, X.: Face photo recognition using sketch. In: Proceedings. International Conference on Image Processing, vol.\u00a01, pp.\u00a0I\u2013I. IEEE (2002)","key":"38_CR18"},{"doi-asserted-by":"crossref","unstructured":"Tang, Z.c., Li, C., Wu, J.f., Liu, P.c., Cheng, S.w.: Classification of eeg-based single-trial motor imagery tasks using a b-csp method for bci. Front. Inf. Technol. Electronic Eng. 20(8), 1087\u20131098 (2019)","key":"38_CR19","DOI":"10.1631\/FITEE.1800083"},{"key":"38_CR20","doi-asserted-by":"publisher","first-page":"128185","DOI":"10.1109\/ACCESS.2019.2940034","volume":"7","author":"Z Tang","year":"2019","unstructured":"Tang, Z., Yu, H., Lu, C., Liu, P., Jin, X.: Single-trial classification of different movements on one arm based on erd\/ers and corticomuscular coherence. IEEE Access 7, 128185\u2013128197 (2019)","journal-title":"IEEE Access"},{"doi-asserted-by":"crossref","unstructured":"Thies, J., Elgharib, M., Tewari, A., Theobalt, C., Nie\u00dfner, M.: Neural voice puppetry: Audio-driven facial reenactment. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XVI 16, pp. 716\u2013731. Springer (2020)","key":"38_CR21","DOI":"10.1007\/978-3-030-58517-4_42"},{"doi-asserted-by":"crossref","unstructured":"Wang, L., Sindagi, V., Patel, V.: High-quality facial photo-sketch synthesis using multi-adversarial networks. In: 2018 13th IEEE International Conference on Automatic Face & Gesture Recognition (FG 2018), pp. 83\u201390. IEEE (2018)","key":"38_CR22","DOI":"10.1109\/FG.2018.00022"},{"doi-asserted-by":"crossref","unstructured":"Wang, S., Li, L., Ding, Y., Fan, C., Yu, X.: Audio2head: audio-driven one-shot talking-head generation with natural head motion. arXiv preprint arXiv:2107.09293 (2021)","key":"38_CR23","DOI":"10.24963\/ijcai.2021\/152"},{"doi-asserted-by":"crossref","unstructured":"Wang, T.C., Mallya, A., Liu, M.Y.: One-shot free-view neural talking-head synthesis for video conferencing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10039\u201310049 (2021)","key":"38_CR24","DOI":"10.1109\/CVPR46437.2021.00991"},{"issue":"11","key":"38_CR25","doi-asserted-by":"publisher","first-page":"1955","DOI":"10.1109\/TPAMI.2008.222","volume":"31","author":"X Wang","year":"2008","unstructured":"Wang, X., Tang, X.: Face photo-sketch synthesis and recognition. IEEE Trans. Pattern Anal. Mach. Intell. 31(11), 1955\u20131967 (2008)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"5","key":"38_CR26","doi-asserted-by":"publisher","first-page":"1280","DOI":"10.1109\/TIFS.2018.2871746","volume":"14","author":"ZL Yang","year":"2018","unstructured":"Yang, Z.L., Guo, X.Q., Chen, Z.M., Huang, Y.F., Zhang, Y.J.: Rnn-stega: linguistic steganography based on recurrent neural networks. IEEE Trans. Inf. Forensics Secur. 14(5), 1280\u20131295 (2018)","journal-title":"IEEE Trans. Inf. Forensics Secur."},{"issue":"1\u20132","key":"38_CR27","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1016\/S0167-6393(98)00048-X","volume":"26","author":"H Yehia","year":"1998","unstructured":"Yehia, H., Rubin, P., Vatikiotis-Bateson, E.: Quantitative association of vocal-tract and facial behavior. Speech Commun. 26(1\u20132), 23\u201343 (1998)","journal-title":"Speech Commun."},{"doi-asserted-by":"crossref","unstructured":"Zhang, W., et al.: Sadtalker: learning realistic 3d motion coefficients for stylized audio-driven single image talking face animation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8652\u20138661 (2023)","key":"38_CR28","DOI":"10.1109\/CVPR52729.2023.00836"},{"doi-asserted-by":"crossref","unstructured":"Zhang, Z., Li, L., Ding, Y., Fan, C.: Flow-guided one-shot talking face generation with a high-resolution audio-visual dataset. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3661\u20133670 (2021)","key":"38_CR29","DOI":"10.1109\/CVPR46437.2021.00366"},{"doi-asserted-by":"crossref","unstructured":"Zhou, H., Sun, Y., Wu, W., Loy, C.C., Wang, X., Liu, Z.: Pose-controllable talking face generation by implicitly modularized audio-visual representation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4176\u20134186 (2021)","key":"38_CR30","DOI":"10.1109\/CVPR46437.2021.00416"},{"issue":"6","key":"38_CR31","first-page":"1","volume":"39","author":"Y Zhou","year":"2020","unstructured":"Zhou, Y., Han, X., Shechtman, E., Echevarria, J., Kalogerakis, E., Li, D.: Makelttalk: speaker-aware talking-head animation. ACM Trans. Graph. (TOG) 39(6), 1\u201315 (2020)","journal-title":"ACM Trans. Graph. (TOG)"},{"issue":"9","key":"38_CR32","doi-asserted-by":"publisher","first-page":"5200","DOI":"10.1109\/TCSVT.2023.3253773","volume":"33","author":"M Zhu","year":"2023","unstructured":"Zhu, M., Wu, Z., Wang, N., Yang, H., Gao, X.: Dual conditional normalization pyramid network for face photo-sketch synthesis. IEEE Trans. Circuits Syst. Video Technol. 33(9), 5200\u20135211 (2023)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."}],"container-title":["Lecture Notes in Computer Science","PRICAI 2024: Trends in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-0122-6_38","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,16]],"date-time":"2024-11-16T19:22:13Z","timestamp":1731784933000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-0122-6_38"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,12]]},"ISBN":["9789819601219","9789819601226"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-0122-6_38","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,12]]},"assertion":[{"value":"12 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"PRICAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific Rim International Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kyoto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 November 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pricai2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.pricai.org\/2024\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}