{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T17:15:43Z","timestamp":1763572543268},"reference-count":81,"publisher":"Springer Science and Business Media LLC","issue":"10-11","license":[{"start":{"date-parts":[[2020,4,28]],"date-time":"2020-04-28T00:00:00Z","timestamp":1588032000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,4,28]],"date-time":"2020-04-28T00:00:00Z","timestamp":1588032000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"National Science Foundation","award":["1763523","1747778"],"award-info":[{"award-number":["1763523","1747778"]}]},{"name":"National Science Foundation","award":["1733843","1703883"],"award-info":[{"award-number":["1733843","1703883"]}]},{"name":"National Science Foundation","award":["IIS-1703883","S&AS-1723869"],"award-info":[{"award-number":["IIS-1703883","S&AS-1723869"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2020,11]]},"DOI":"10.1007\/s11263-020-01328-9","type":"journal-article","created":{"date-parts":[[2020,4,28]],"date-time":"2020-04-28T05:16:34Z","timestamp":1588050994000},"page":"2514-2533","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Towards Image-to-Video Translation: A Structure-Aware Approach via Multi-stage Generative Adversarial Networks"],"prefix":"10.1007","volume":"128","author":[{"given":"Long","family":"Zhao","sequence":"first","affiliation":[]},{"given":"Xi","family":"Peng","sequence":"additional","affiliation":[]},{"given":"Yu","family":"Tian","sequence":"additional","affiliation":[]},{"given":"Mubbasir","family":"Kapadia","sequence":"additional","affiliation":[]},{"given":"Dimitris N.","family":"Metaxas","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,4,28]]},"reference":[{"key":"1328_CR1","unstructured":"Aifanti, N., Papachristou, C., & Delopoulos, A. (2010). The MUG facial expression database. In International workshop on image analysis for multimedia interactive services (WIAMIS)."},{"key":"1328_CR2","unstructured":"Amos, B., Ludwiczuk, B., & Satyanarayanan, M. (2016). OpenFace: A general-purpose face recognition library with mobile applications. Technical report, CMU-CS-16-118, CMU School of Computer Science."},{"key":"1328_CR3","unstructured":"Arjovsky, M., Chintala, S., & Bottou, L. (2017). Wasserstein generative adversarial networks. In International conference on machine learning (ICML)."},{"issue":"9","key":"1328_CR4","doi-asserted-by":"publisher","first-page":"1063","DOI":"10.1109\/TPAMI.2003.1227983","volume":"25","author":"V Blanz","year":"2003","unstructured":"Blanz, V., & Vetter, T. (2003). Face recognition based on fitting a 3D morphable model. IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 25(9), 1063\u20131074.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"},{"issue":"3","key":"1328_CR5","doi-asserted-by":"publisher","first-page":"413","DOI":"10.1109\/TVCG.2013.249","volume":"20","author":"C Cao","year":"2014","unstructured":"Cao, C., Weng, Y., Zhou, S., Tong, Y., & Zhou, K. (2014). Facewarehouse: A 3D facial expression database for visual computing. IEEE Transactions on Visualization and Computer Graphics (TVCG), 20(3), 413\u2013425.","journal-title":"IEEE Transactions on Visualization and Computer Graphics (TVCG)"},{"key":"1328_CR6","doi-asserted-by":"crossref","unstructured":"Chao, Y. W., Yang, J., Price, B., Cohen, S., & Deng, J. (2017). Forecasting human dynamics from static images. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.388"},{"key":"1328_CR7","unstructured":"Denton, E., & Birodkar, V. (2017). Unsupervised learning of disentangled representations from video. In Annual conference on neural information processing systems (NeurIPS) (pp. 4414\u20134423)."},{"key":"1328_CR8","unstructured":"Denton, E., Chintala, S., Szlam, A., & Fergus, R. (2015). Deep generative image models using a Laplacian pyramid of adversarial networks. In Annual conference on neural information processing systems (NeurIPS)."},{"key":"1328_CR9","doi-asserted-by":"crossref","unstructured":"Farneb\u00e4ck, G. (2003). Two-frame motion estimation based on polynomial expansion. In Scandinavian conference on Image analysis (pp. 363\u2013370).","DOI":"10.1007\/3-540-45103-X_50"},{"key":"1328_CR10","unstructured":"Finn, C., Goodfellow, I., & Levine, S. (2016). Unsupervised Learning for Physical Interaction through Video Prediction. In Annual conference on neural information processing systems (NeurIPS)."},{"key":"1328_CR11","doi-asserted-by":"crossref","unstructured":"Fragkiadaki, K., Levine, S., Felsen, P., & Malik, J. (2015). Recurrent network models for human dynamics. In IEEE international conference on computer vision (ICCV) (pp. 4346\u20134354).","DOI":"10.1109\/ICCV.2015.494"},{"key":"1328_CR12","unstructured":"Gatys, L., Ecker, A. S., & Bethge, M. (2015). Texture synthesis using convolutional neural networks. In Annual conference on neural information processing systems (NeurIPS) (pp. 262\u2013270)."},{"key":"1328_CR13","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., et al. (2014). Generative adversarial nets. In Annual conference on neural information processing systems (NeurIPS) (pp. 2672\u20132680)."},{"key":"1328_CR14","unstructured":"Gulrajani, I., Ahmed, F., Arjovsky, M., Dumoulin, V., & Courville, A. (2017). Improved training of Wasserstein GANs. In Annual conference on neural information processing systems (NeurIPS)."},{"key":"1328_CR15","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, S., van\u00a0der Maaten, L., & Weinberger, K. Q. (2018). Condensenet: An efficient densenet using learned group convolutions. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2018.00291"},{"key":"1328_CR16","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., van\u00a0der Maaten, L., & Weinberger, K. Q. (2017). Densely connected convolutional networks. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.243"},{"issue":"7","key":"1328_CR17","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","volume":"36","author":"C Ionescu","year":"2014","unstructured":"Ionescu, C., Papava, D., Olaru, V., & Sminchisescu, C. (2014). Human3.6M: Large scale datasets and predictive methods for 3D human sensing in natural environments. IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 36(7), 1325\u20131339.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"},{"issue":"1","key":"1328_CR18","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2013","unstructured":"Ji, S., Xu, W., Yang, M., & Yu, K. (2013). 3D convolutional neural networks for human action recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 35(1), 221\u2013231.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"},{"key":"1328_CR19","unstructured":"Karras, T., Aila, T., Laine, S., & Lehtinen, J. (2018). Progressive growing of GANs for improved quality, stability, and variation. In International conference on learning representations (ICLR)."},{"key":"1328_CR20","unstructured":"Kingma, D. P., & Ba, J. (2014). Adam: A method for stochastic optimization. In International conference on learning representations (ICLR)."},{"key":"1328_CR21","unstructured":"Kingma, D. P., & Welling, M. (2014). Auto-encoding variational bayes. In International conference on learning representations (ICLR)."},{"key":"1328_CR22","doi-asserted-by":"crossref","unstructured":"Laine, S., Karras, T., Aila, T., Herva, A., Saito, S., Yu, R., Li, H., & Lehtinen, J. (2017). Production-level facial performance capture using deep convolutional neural networks. In Proceedings of the ACM SIGGRAPH\/Eurographics symposium on computer animation.","DOI":"10.1145\/3099564.3099581"},{"key":"1328_CR23","doi-asserted-by":"crossref","unstructured":"Li, Y., Fang, C., Yang, J., Wang, Z., Lu, X., & Yang, M. H. (2018). Flow-grounded spatial-temporal video prediction from still images. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-030-01240-3_37"},{"key":"1328_CR24","doi-asserted-by":"crossref","unstructured":"Liang, X., Lee, L., Dai, W., & Xing, E. P. (2017). Dual motion GAN for future-flow embedded video prediction. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.194"},{"key":"1328_CR25","doi-asserted-by":"crossref","unstructured":"Liang, X., Zhang, H., Lin, L., & Xing, E. (2018). Generative semantic manipulation with mask-contrasting GAN. In European conference on computer vision (ECCV) (pp. 558\u2013573).","DOI":"10.1007\/978-3-030-01261-8_34"},{"key":"1328_CR26","doi-asserted-by":"crossref","unstructured":"Liu, Z., Yeh, R. A., Tang, X., Liu, Y., & Agarwala, A. (2017). Video frame synthesis using deep voxel flow. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.478"},{"key":"1328_CR27","unstructured":"Lotter, W., Kreiman, G., & Cox, D. (2017). Deep predictive coding networks for video prediction and unsupervised learning. In International conference on learning representations (ICLR)."},{"key":"1328_CR28","doi-asserted-by":"crossref","unstructured":"Lu, J., Issaranon, T., & Forsyth, D. (2017). SafetyNet: Detecting and rejecting adversarial examples robustly. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.56"},{"key":"1328_CR29","unstructured":"Ma, L., Jia, X., Sun, Q., Schiele, B., Tuytelaars, T., & Van\u00a0Gool, L. (2017). Pose guided person image generation. In Annual conference on neural information processing systems (NeurIPS) (pp. 405\u2013415)."},{"key":"1328_CR30","unstructured":"Mathieu, M., Couprie, C., & LeCun, Y. (2016). Deep multi-scale video prediction beyond mean square error. In International conference on learning representations (ICLR)."},{"key":"1328_CR31","unstructured":"Mirza, M., & Osindero, S. (2014). Conditional generative adversarial nets. arXiv:1411.1784"},{"key":"1328_CR32","doi-asserted-by":"crossref","unstructured":"Newell, A., Yang, K., & Deng, J. (2016). Stacked hourglass networks for human pose estimation. In European conference on computer vision (ECCV) (pp. 483\u2013499).","DOI":"10.1007\/978-3-319-46484-8_29"},{"key":"1328_CR33","unstructured":"Odena, A., Olah, C., & Shlens, J. (2017). Conditional image synthesis with auxiliary classifier GANs. In International conference on machine learning (ICML)."},{"key":"1328_CR34","doi-asserted-by":"crossref","unstructured":"Olszewski, K., Li, Z., Yang, C., Zhou, Y., Yu, R., Huang, Z., Xiang, S., Saito, S., Kohli, P., & Li, H. (2017). Realistic dynamic facial textures from a single image using GANs. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.580"},{"key":"1328_CR35","doi-asserted-by":"crossref","unstructured":"Pan, J., Wang, C., Jia, X., Shao, J., Sheng, L., Yan, J., & Wang, X. (2019). Video generation from single semantic label map. In IEEE conference on computer vision and pattern recognition (CVPR) (pp. 3733\u20133742).","DOI":"10.1109\/CVPR.2019.00385"},{"key":"1328_CR36","doi-asserted-by":"crossref","unstructured":"Paysan, P., Knothe, R., Amberg, B., Romdhani, S., & Vetter, T. (2009). A 3D face model for pose and illumination invariant face recognition. In IEEE international conference on advanced video and signal based surveillance (AVSS) for security, safety and monitoring in smart environments.","DOI":"10.1109\/AVSS.2009.58"},{"key":"1328_CR37","doi-asserted-by":"crossref","unstructured":"Peng, X., Feris, R. S., Wang, X., & Metaxas, D. N. (2016). A recurrent encoder-decoder network for sequential face alignment. In European conference on computer vision (ECCV) (pp. 38\u201356).","DOI":"10.1007\/978-3-319-46448-0_3"},{"key":"1328_CR38","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1016\/j.cviu.2015.03.008","volume":"136","author":"X Peng","year":"2015","unstructured":"Peng, X., Huang, J., Hu, Q., Zhang, S., Elgammal, A., & Metaxas, D. (2015). From circle to 3-Sphere: Head pose estimation by instance parameterization. Computer Vision and Image Understanding (CVIU), 136, 92\u2013102.","journal-title":"Computer Vision and Image Understanding (CVIU)"},{"key":"1328_CR39","doi-asserted-by":"crossref","unstructured":"Peng, X., Tang, Z., Yang, F., Feris, R. S., & Metaxas, D. (2018). Jointly optimize data augmentation and network training: Adversarial data augmentation in human pose estimation. In IEEE conference on computer vision and pattern recognition (CVPR) (pp. 2226\u20132234).","DOI":"10.1109\/CVPR.2018.00237"},{"key":"1328_CR40","unstructured":"Perarnau, G., van\u00a0de Weijer, J., Raducanu, B., & \u00c1lvarez, J. M. (2016). Invertible conditional GANs for image editing. In NeurIPS workshop on adversarial training."},{"key":"1328_CR41","doi-asserted-by":"crossref","unstructured":"Pumarola, A., Agudo, A., Martinez, A., Sanfeliu, A., & Moreno-Noguer, F. (2018). GANimation: Anatomically-aware facial animation from a single image. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-030-01249-6_50"},{"key":"1328_CR42","unstructured":"Reed, S., Akata, Z., Yan, X., Logeswaran, L., Schiele, B., & Lee, H. (2016). Generative adversarial text-to-image synthesis. In International conference on machine learning (ICML)."},{"key":"1328_CR43","unstructured":"Reed, S. E., Zhang, Y., Zhang, Y., & Lee, H. (2015). Deep visual analogy-making. In Annual conference on neural information processing systems (NeurIPS)."},{"key":"1328_CR44","unstructured":"Rezende, D. J., Mohamed, S., & Wierstra, D. (2014). Stochastic backpropagation and approximate inference in deep generative models. In International conference on machine learning (ICML)."},{"key":"1328_CR45","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., & Brox, T. (2015). U-Net: Convolutional networks for biomedical image segmentation. In Medical image computing and computer-assisted intervention (MICCAI).","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"1328_CR46","doi-asserted-by":"crossref","unstructured":"Saito, M., Matsumoto, E., & Saito, S. (2017). Temporal generative adversarial nets with singular value clipping. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.308"},{"key":"1328_CR47","doi-asserted-by":"crossref","unstructured":"Shen, W., & Liu, R. (2017). Learning residual images for face attribute manipulation. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.135"},{"key":"1328_CR48","unstructured":"Shi, X., Chen, Z., Wang, H., Yeung, D.-Y., Wong, W. K., & Woo, W.-c. (2015). Convolutional LSTM network: A machine learning approach for precipitation nowcasting. In Annual conference on neural information processing systems (NeurIPS) (pp. 802\u2013810)."},{"key":"1328_CR49","doi-asserted-by":"crossref","unstructured":"Shrivastava, A., Pfister, T., Tuzel, O., Susskind, J., Wang, W., & Webb, R. (2017). Learning from simulated and unsupervised images through adversarial training. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.241"},{"key":"1328_CR50","unstructured":"Simonyan, K., & Zisserman, A. (2014). Two-stream convolutional networks for action recognition in videos. In Annual conference on neural information processing systems (NeurIPS) (pp. 568\u2013576)."},{"key":"1328_CR51","unstructured":"Simonyan, K., & Zisserman, A. (2015). Very deep convolutional networks for large-scale image recognition. In International conference on learning representations (ICLR)."},{"key":"1328_CR52","unstructured":"Srivastava, N., Mansimov, E., & Salakhutdinov, R. (2015). Unsupervised learning of video representations using LSTMs. In International conference on machine learning (ICML)."},{"key":"1328_CR53","doi-asserted-by":"crossref","unstructured":"Tang, Z., Peng, X., Geng, S., Wu, L., Zhang, S., & Metaxas, D. N. (2018a). Quantized densely connected U-nets for efficient landmark localization. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-030-01219-9_21"},{"key":"1328_CR54","unstructured":"Tang, Z., Peng, X., Geng, S., Zhu, Y., & Metaxas, D. (2018b). CU-Net: Coupled U-nets. In British machine vision conference (BMVC)."},{"key":"1328_CR55","doi-asserted-by":"crossref","unstructured":"Thies, J., Zollh\u00f6fer, M., Stamminger, M., Theobalt, C., & Nie\u00dfner, M. (2016). Face2Face: Real-time face capture and reenactment of RGB videos. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1145\/2929464.2929475"},{"key":"1328_CR56","doi-asserted-by":"crossref","unstructured":"Tian, Y., Peng, X., Zhao, L., Zhang, S., & Metaxas, D. N. (2018). CR-GAN: Learning complete representations for multi-view generation. In International joint conference on artificial intelligence (IJCAI) (pp. 942\u2013948).","DOI":"10.24963\/ijcai.2018\/131"},{"key":"1328_CR57","unstructured":"Tian, Y., Zhao, L., Peng, X., & Metaxas, D. N. (2019). Rethinking kernel methods for node representation learning on graphs. In Annual conference on neural information processing systems (NeurIPS)."},{"key":"1328_CR58","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., & Paluri, M. (2015). Learning spatiotemporal features with 3D convolutional networks. In IEEE international conference on computer vision (ICCV) (pp. 4489\u20134497).","DOI":"10.1109\/ICCV.2015.510"},{"key":"1328_CR59","doi-asserted-by":"crossref","unstructured":"Tulyakov, S., Liu, M. Y., Yang, X., & Kautz, J. (2018). MoCoGAN: Decomposing motion and content for video generation. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2018.00165"},{"key":"1328_CR60","unstructured":"van\u00a0den Oord, A., Kalchbrenner, N., & Kavukcuoglu, K. (2016). Pixel recurrent neural networks. In International conference on machine learning (ICML)."},{"key":"1328_CR61","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., Kaiser, L., & Polosukhin, I. (2017). Attention is all you need. In Annual conference on neural information processing systems (NeurIPS)."},{"key":"1328_CR62","unstructured":"Villegas, R., Yang, J., Hong, S., Lin, X., & Lee, H. (2017a). Decomposing motion and content for natural video sequence prediction. In International conference on learning representations (ICLR)."},{"key":"1328_CR63","unstructured":"Villegas, R., Yang, J., Zou, Y., Sohn, S., Lin, X., & Lee, H. (2017b). Learning to generate long-term future via hierarchical prediction. In International conference on machine learning (ICML)."},{"key":"1328_CR64","unstructured":"Vondrick, C., Pirsiavash, H., & Torralba, A. (2016). Generating videos with scene dynamics. In Annual conference on neural information processing systems (NeurIPS)."},{"key":"1328_CR65","unstructured":"Wang, T. C., Liu, M. Y., Zhu, J. Y., Liu, G., Tao, A., Kautz, J., & Catanzaro, B. (2018). Video-to-video synthesis. In Annual conference on neural information processing systems (NeurIPS) (pp. 1144\u20131156)."},{"key":"1328_CR66","doi-asserted-by":"crossref","unstructured":"Xiong, W., Luo, W., Ma, L., Liu, W., & Luo, J. (2018). Learning to generate time-lapse videos using multi-stage dynamic generative adversarial networks. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2018.00251"},{"key":"1328_CR67","doi-asserted-by":"crossref","unstructured":"Yan, S., Li, Z., Xiong, Y., Yan, H., & Lin, D. (2019). Convolutional sequence generation for skeleton-based action synthesis. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2019.00449"},{"key":"1328_CR68","doi-asserted-by":"crossref","unstructured":"Yang, C., Wang, Z., Zhu, X., Huang, C., Shi, J., & Lin, D. (2018). Pose guided human video generation. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-030-01249-6_13"},{"key":"1328_CR69","doi-asserted-by":"crossref","unstructured":"Zhang, B., Wang, L., Wang, Z., Qiao, Y., & Wang, H. (2016). Real-time action recognition with enhanced motion vector CNNs. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.297"},{"key":"1328_CR70","unstructured":"Zhang, H., Sindagi, V., & Patel, V. M. (2017a). Image de-raining using a conditional generative adversarial network. arXiv:1701.05957."},{"key":"1328_CR71","doi-asserted-by":"crossref","unstructured":"Zhang, H., Xu, T., Li, H., Zhang, S., Wang, X., Huang, X., & Metaxas, D. (2017b). StackGAN++: Realistic image synthesis with stacked generative adversarial networks. arXiv:1710.10916.","DOI":"10.1109\/ICCV.2017.629"},{"key":"1328_CR72","doi-asserted-by":"crossref","unstructured":"Zhang, H., Xu, T., Li, H., Zhang, S., Wang, X., Huang, X., & Metaxas, D. (2017c). StackGAN: Text to photo-realistic image synthesis with stacked generative adversarial networks. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.629"},{"key":"1328_CR73","doi-asserted-by":"crossref","unstructured":"Zhang, W., Zhu, M., & Derpanis, K. (2013a). From actemes to action: A strongly-supervised representation for detailed action understanding. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2013.280"},{"key":"1328_CR74","doi-asserted-by":"crossref","unstructured":"Zhang, X., Yin, L., Cohn, J. F., Canavan, S., Reale, M., Horowitz, A., et al. (2013b). A high-resolution spontaneous 3D dynamic facial expression database. In IEEE international conference and workshops on automatic face and gesture recognition (FG) (pp. 1\u20136).","DOI":"10.1109\/FG.2013.6553788"},{"key":"1328_CR75","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Xie, Y., & Yang, L. (2018a). Photographic text-to-image synthesis with a hierarchically-nested adversarial network. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2018.00649"},{"key":"1328_CR76","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Yang, L., & Zheng, Y. (2018b). Translating and segmenting multimodal medical volumes with cycle-and shape consistency generative adversarial network. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2018.00963"},{"key":"1328_CR77","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1016\/j.cag.2019.01.004","volume":"79","author":"L Zhao","year":"2019","unstructured":"Zhao, L., Han, F., Peng, X., Zhang, X., Kapadia, M., Pavlovic, V., et al. (2019a). Cartoonish sketch-based face editing in videos using identity deformation transfer. Computers & Graphics, 79, 58\u201368.","journal-title":"Computers & Graphics"},{"key":"1328_CR78","doi-asserted-by":"crossref","unstructured":"Zhao, L., Peng, X., Tian, Y., Kapadia, M., & Metaxas, D. (2018). Learning to forecast and refine residual motion for image-to-video generation. In European conference on computer vision (ECCV) (pp. 387\u2013403).","DOI":"10.1007\/978-3-030-01267-0_24"},{"key":"1328_CR79","doi-asserted-by":"crossref","unstructured":"Zhao, L., Peng, X., Tian, Y., Kapadia, M., & Metaxas, D. N. (2019b). Semantic graph convolutional networks for 3D human pose regression. In IEEE conference on computer vision and pattern recognition (CVPR) (pp. 3425\u20133435).","DOI":"10.1109\/CVPR.2019.00354"},{"key":"1328_CR80","doi-asserted-by":"crossref","unstructured":"Zhu, X., Lei, Z., Liu, X., Shi, H., & Li, S. (2016). Face alignment across large poses: A 3D solution. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.23"},{"issue":"1","key":"1328_CR81","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1109\/TPAMI.2017.2778152","volume":"41","author":"X Zhu","year":"2019","unstructured":"Zhu, X., Liu, X., Lei, Z., & Li, S. Z. (2019). Face alignment in full pose range: A 3D total solution. IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 41(1), 78\u201392.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-020-01328-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-020-01328-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-020-01328-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,27]],"date-time":"2021-04-27T23:18:09Z","timestamp":1619565489000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-020-01328-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,4,28]]},"references-count":81,"journal-issue":{"issue":"10-11","published-print":{"date-parts":[[2020,11]]}},"alternative-id":["1328"],"URL":"https:\/\/doi.org\/10.1007\/s11263-020-01328-9","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,4,28]]},"assertion":[{"value":"28 April 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 April 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 April 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}