{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T17:01:12Z","timestamp":1771261272240,"version":"3.50.1"},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,7,15]]},"DOI":"10.1109\/icme57554.2024.10687417","type":"proceedings-article","created":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T17:24:16Z","timestamp":1727717056000},"page":"1-6","source":"Crossref","is-referenced-by-count":2,"title":["Voice-to-Face Generation: Couple of Self-Supervised Representation Learning with Diffusion Model"],"prefix":"10.1109","author":[{"given":"Wuyang","family":"Chen","sequence":"first","affiliation":[{"name":"National University of Defense Technology,Changsha,China"}]},{"given":"Kele","family":"Xu","sequence":"additional","affiliation":[{"name":"National University of Defense Technology,Changsha,China"}]},{"given":"Yong","family":"Dou","sequence":"additional","affiliation":[{"name":"National University of Defense Technology,Changsha,China"}]},{"given":"Tian","family":"Gao","sequence":"additional","affiliation":[{"name":"iFlytek Research,Hefei,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2012-169"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2008.4761624"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1044\/jshr.0302.157"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1111\/j.2044-8295.2011.02041.x"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1037\/a0030945"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.cub.2003.09.005"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.tics.2004.01.008"},{"key":"ref8","article-title":"Face reconstruction from voice using generative adversarial networks","volume":"32","author":"Wen","year":"2019","journal-title":"Adv. Neural Inf. Process"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00622"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR48806.2021.9412721"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01563"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00337"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00658"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2023-852"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747669"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747631"},{"key":"ref17","article-title":"From inference to generation: End-to-end fully self-supervised generation of human face from speech","author":"Choi","year":"2020","journal-title":"ICLR"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01499"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01750"},{"key":"ref20","article-title":"Generative adversarial nets","volume":"27","author":"Goodfellow","year":"2014","journal-title":"Adv. Neural Inf. Process"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240601"},{"key":"ref22","article-title":"Disjoint mapping network for cross-modal matching of voices and faces","author":"Wen","year":"2019","journal-title":"ICLR"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01608"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01229"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-20873-8_18"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/526"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10097207"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_5"},{"key":"ref29","article-title":"Contrastive audiovisual masked autoencoder","author":"Gong","year":"2023","journal-title":"ICLR"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01479"},{"issue":"4","key":"ref31","first-page":"9","article-title":"Tr\u00a8aumerai: Dreaming music with stylegan","volume":"2","author":"Jeong","year":"2021"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01457"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19836-6_6"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00772"},{"key":"ref35","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021","journal-title":"ICML"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref37","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho","year":"2020","journal-title":"Adv. Neural Inf. Process"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2017-950"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3071243"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19839-7_8"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20071-7_38"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"}],"event":{"name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","location":"Niagara Falls, ON, Canada","start":{"date-parts":[[2024,7,15]]},"end":{"date-parts":[[2024,7,19]]}},"container-title":["2024 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10685847\/10687354\/10687417.pdf?arnumber=10687417","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T06:11:55Z","timestamp":1727763115000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10687417\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,15]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/icme57554.2024.10687417","relation":{},"subject":[],"published":{"date-parts":[[2024,7,15]]}}}