{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T09:40:55Z","timestamp":1773740455643,"version":"3.50.1"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030012335","type":"print"},{"value":"9783030012342","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01234-2_32","type":"book-chapter","created":{"date-parts":[[2018,10,5]],"date-time":"2018-10-05T16:13:11Z","timestamp":1538755991000},"page":"538-553","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":192,"title":["Lip Movements Generation at a Glance"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7073-0450","authenticated-orcid":false,"given":"Lele","family":"Chen","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5972-4826","authenticated-orcid":false,"given":"Zhiheng","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2668-0238","authenticated-orcid":false,"given":"Ross K.","family":"Maddox","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8334-9974","authenticated-orcid":false,"given":"Zhiyao","family":"Duan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2183-822X","authenticated-orcid":false,"given":"Chenliang","family":"Xu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,10,6]]},"reference":[{"key":"32_CR1","unstructured":"Assael, Y.M., Shillingford, B., Whiteson, S., de Freitas, N.: LipNet: end-to-end sentence-level lipreading. arXiv preprint arXiv:1611.01599 (2017)"},{"issue":"7","key":"32_CR2","doi-asserted-by":"publisher","first-page":"e1000436","DOI":"10.1371\/journal.pcbi.1000436","volume":"5","author":"Chandramouli Chandrasekaran","year":"2009","unstructured":"Chandrasekaran, C., Trubanova, A., Stillittano, S., Caplier, A., Ghazanfar, A.A.: The natural statistics of audiovisual speech. PLOS Comput. Biol. 5(7) (2009)","journal-title":"PLoS Computational Biology"},{"key":"32_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"879","DOI":"10.1007\/978-3-319-49409-8_71","volume-title":"Computer Vision \u2013 ECCV 2016 Workshops","author":"J Charles","year":"2016","unstructured":"Charles, J., Magee, D., Hogg, D.: Virtual immortality: reanimating characters from TV shows. In: Hua, G., J\u00e9gou, H. (eds.) ECCV 2016. LNCS, vol. 9915, pp. 879\u2013886. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-49409-8_71"},{"key":"32_CR4","doi-asserted-by":"crossref","unstructured":"Chen, L., Srivastava, S., Duan, Z., Xu, C.: Deep cross-modal audio-visual generation. In: Proceedings of Multimedia Thematic Workshops. ACM (2017)","DOI":"10.1145\/3126686.3126723"},{"key":"32_CR5","unstructured":"Chen, X., Duan, Y., Houthooft, R., Schulman, J., Sutskever, I., Abbeel, P.: InfoGAN: interpretable representation learning by information maximizing generative adversarial nets. In: Proceedings of NIPS. Curran Associates, Inc. (2016)"},{"key":"32_CR6","unstructured":"Chung, J.S., Jamaludin, A., Zisserman, A.: You said that? In: Proceedings of BMVC. Springer (2017)"},{"key":"32_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1007\/978-3-319-54184-6_6","volume-title":"Computer Vision \u2013 ACCV 2016","author":"JS Chung","year":"2017","unstructured":"Chung, J.S., Zisserman, A.: Lip reading in the wild. In: Lai, S.-H., Lepetit, V., Nishino, K., Sato, Y. (eds.) ACCV 2016. LNCS, vol. 10112, pp. 87\u2013103. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-54184-6_6"},{"issue":"5","key":"32_CR8","doi-asserted-by":"publisher","first-page":"2421","DOI":"10.1121\/1.2229005","volume":"120","author":"M Cooke","year":"2006","unstructured":"Cooke, M., Barker, J., Cunningham, S., Shao, X.: An audio-visual corpus for speech perception and automatic speech recognition. J. Acoust. Soc. Am. 120(5), 2421\u20132424 (2006)","journal-title":"J. Acoust. Soc. Am."},{"key":"32_CR9","unstructured":"Cutler, R., Davis, L.S.: Look who\u2019s talking: speaker detection using video and audio correlation. In: Proceedings of ICME. IEEE (2000)"},{"key":"32_CR10","doi-asserted-by":"crossref","unstructured":"Das, P., Xu, C., Doell, R., Corso, J.J.: A thousand frames in just a few words: lingual description of videos through latent topics and sparse object stitching. In: Proceedings of CVPR. IEEE (2013)","DOI":"10.1109\/CVPR.2013.340"},{"key":"32_CR11","doi-asserted-by":"crossref","unstructured":"Dosovitskiy, A., et al.: FlowNet: learning optical flow with convolutional networks. In: Proceedings of ICCV. IEEE (2015)","DOI":"10.1109\/ICCV.2015.316"},{"key":"32_CR12","doi-asserted-by":"crossref","unstructured":"Fan, B., Wang, L., Soong, F.K., Xie, L.: Photo-real talking head with deep bidirectional LSTM. In: ICASSP. IEEE (2015)","DOI":"10.1109\/ICASSP.2015.7178899"},{"issue":"2","key":"32_CR13","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1111\/cgf.12552","volume":"34","author":"P. Garrido","year":"2015","unstructured":"Garrido, P., et al.: VDub: modifying face video of actors for plausible visual alignment to a dubbed audio track. Comput. Graph. Forum 34(2), 193\u2013204 (2015)","journal-title":"Computer Graphics Forum"},{"key":"32_CR14","unstructured":"Goodfellow, I.J., et al.: Generative adversarial nets. In: Proceedings of NIPS. Curran Associates, Inc. (2014)"},{"key":"32_CR15","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Delving deep into rectifiers: surpassing human-level performance on ImageNet classification. In: Proceedings of CVPR. IEEE (2015)","DOI":"10.1109\/ICCV.2015.123"},{"key":"32_CR16","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1007\/978-1-4612-4380-9_14","volume-title":"Breakthroughs in Statistics","author":"H Hotelling","year":"1992","unstructured":"Hotelling, H.: Relations between two sets of variates. In: Kotz, S., Johnson, N.L. (eds.) Breakthroughs in Statistics, pp. 162\u2013190. Springer, New York (1992). https:\/\/doi.org\/10.1007\/978-1-4612-4380-9_14"},{"key":"32_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"694","DOI":"10.1007\/978-3-319-46475-6_43","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Johnson","year":"2016","unstructured":"Johnson, J., Alahi, A., Fei-Fei, L.: Perceptual losses for real-time style transfer and super-resolution. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9906, pp. 694\u2013711. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46475-6_43"},{"key":"32_CR18","first-page":"1755","volume":"10","author":"DE King","year":"2009","unstructured":"King, D.E.: Dlib-ml: a machine learning toolkit. JMLR 10, 1755\u20131758 (2009)","journal-title":"JMLR"},{"key":"32_CR19","doi-asserted-by":"crossref","unstructured":"Kulkarni, G., et al.: Baby talk: understanding and generating simple image descriptions. In: Proceedings of CVPR. IEEE (2011)","DOI":"10.1109\/CVPR.2011.5995466"},{"key":"32_CR20","unstructured":"Mirza, M., Osindero, S.: Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784 (2014)"},{"issue":"9","key":"32_CR21","first-page":"2678","volume":"20","author":"ND Narvekar","year":"2011","unstructured":"Narvekar, N.D., Karam, L.J.: A no-reference image blur metric based on the cumulative probability of blur detection (CPBD). IEEE TIP 20(9), 2678\u20132683 (2011)","journal-title":"IEEE TIP"},{"key":"32_CR22","unstructured":"Odena, A., Olah, C., Shlens, J.: Conditional image synthesis with auxiliary classifier GANs. In: Proceedings of ICML. PMLR (2017)"},{"key":"32_CR23","doi-asserted-by":"crossref","unstructured":"Owens, A., Isola, P., McDermott, J., Torralba, A., Adelson, E.H., Freeman, W.T.: Visually indicated sounds. In: Proceedings of CVPR. IEEE (2016)","DOI":"10.1109\/CVPR.2016.264"},{"key":"32_CR24","doi-asserted-by":"crossref","unstructured":"Rasiwasia, N., et al.: A new approach to cross-modal multimedia retrieval. In: Proceedings of Multimedia. ACM (2010)","DOI":"10.1145\/1873951.1873987"},{"key":"32_CR25","unstructured":"Reed, S.E., Akata, Z., Yan, X., Logeswaran, L., Schiele, B., Lee, H.: Generative adversarial text to image synthesis. In: ICML. PMLR (2016)"},{"key":"32_CR26","unstructured":"Richie, S., Warburton, C., Carter, M.: Audiovisual database of spoken American English. Linguistic Data Consortium (2009)"},{"issue":"6088","key":"32_CR27","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1038\/323533a0","volume":"323","author":"DE Rumelhart","year":"1986","unstructured":"Rumelhart, D.E., Hinton, G.E., Williams, R.J.: Learning representations by back-propagating errors. Nature 323(6088), 533\u2013536 (1986)","journal-title":"Nature"},{"key":"32_CR28","doi-asserted-by":"crossref","unstructured":"Son Chung, J., Senior, A., Vinyals, O., Zisserman, A.: Lip reading sentences in the wild. In: Proceedings of CVPR. IEEE (2017)","DOI":"10.1109\/CVPR.2017.367"},{"issue":"4","key":"32_CR29","doi-asserted-by":"publisher","first-page":"95:1","DOI":"10.1145\/3072959.3073640","volume":"36","author":"S Suwajanakorn","year":"2017","unstructured":"Suwajanakorn, S., Seitz, S.M., Kemelmacher-Shlizerman, I.: Synthesizing Obama: learning lip sync from audio. ACM Trans. Graph. 36(4), 95:1\u201395:13 (2017)","journal-title":"ACM Trans. Graph."},{"key":"32_CR30","unstructured":"Vondrick, C., Pirsiavash, H., Torralba, A.: Generating videos with scene dynamics. In: Proceedings of NIPS. Curran Associates, Inc. (2016)"},{"issue":"3","key":"32_CR31","doi-asserted-by":"publisher","first-page":"328","DOI":"10.1109\/29.21701","volume":"37","author":"AH Waibel","year":"1989","unstructured":"Waibel, A.H., Hanazawa, T., Hinton, G.E., Shikano, K., Lang, K.J.: Phoneme recognition using time-delay neural networks. IEEE Trans. Acoust. Speech Signal Process. 37(3), 328\u2013339 (1989)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"issue":"4","key":"32_CR32","first-page":"600","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE TIP 13(4), 600\u2013612 (2004)","journal-title":"IEEE TIP"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01234-2_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,5]],"date-time":"2022-10-05T00:28:28Z","timestamp":1664929708000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01234-2_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012335","9783030012342"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01234-2_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"6 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}