{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T02:00:30Z","timestamp":1769824830319,"version":"3.49.0"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031624940","type":"print"},{"value":"9783031624957","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-62495-7_11","type":"book-chapter","created":{"date-parts":[[2024,6,21]],"date-time":"2024-06-21T20:19:24Z","timestamp":1719001164000},"page":"134-151","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Enhancing Bandwidth Efficiency for\u00a0Video Motion Transfer Applications Using Deep Learning Based Keypoint Prediction"],"prefix":"10.1007","author":[{"given":"Xue","family":"Bai","sequence":"first","affiliation":[]},{"given":"Tasmiah","family":"Haque","sequence":"additional","affiliation":[]},{"given":"Sumit","family":"Mohan","sequence":"additional","affiliation":[]},{"given":"Yuliang","family":"Cai","sequence":"additional","affiliation":[]},{"given":"Byungheon","family":"Jeong","sequence":"additional","affiliation":[]},{"given":"\u00c1d\u00e1m","family":"Hal\u00e1sz","sequence":"additional","affiliation":[]},{"given":"Srinjoy","family":"Das","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,6,22]]},"reference":[{"key":"11_CR1","doi-asserted-by":"crossref","unstructured":"Wang, T.C., Mallya, A., Liu, M.Y.: One-shot free-view neural talking-head synthesis for video conferencing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10039\u201310049 (2021)","DOI":"10.1109\/CVPR46437.2021.00991"},{"key":"11_CR2","doi-asserted-by":"crossref","unstructured":"Chan, C., Ginosar, S., Zhou, T., Efros, A.A.: Everybody dance now. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5933\u20135942 (2019)","DOI":"10.1109\/ICCV.2019.00603"},{"key":"11_CR3","unstructured":"McDonald, K.: Dance x machine learning: first steps (2019). https:\/\/medium.com\/@kcimc\/discrete-figures-7d9e9c275c47. Accessed 21 Mar 2019"},{"issue":"3","key":"11_CR4","doi-asserted-by":"publisher","DOI":"10.2196\/29506","volume":"24","author":"HC Yang","year":"2022","unstructured":"Yang, H.C., Rahmanti, A.R., Huang, C.W., Li, Y.C.: How can research on artificial empathy be enhanced by applying deepfakes? J. Med. Internet Res. 24(3), e29506 (2022)","journal-title":"J. Med. Internet Res."},{"key":"11_CR5","doi-asserted-by":"crossref","unstructured":"Siarohin, A., Lathuili\u00e8re, S., Tulyakov, S., Ricci, E., Sebe, N.: Animating arbitrary objects via deep motion transfer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2377\u20132386 (2019)","DOI":"10.1109\/CVPR.2019.00248"},{"key":"11_CR6","unstructured":"Siarohin, A., Lathuili\u00e8re, S., Tulyakov, S., Ricci, E., Sebe, N.: First order motion model for image animation. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"11_CR7","unstructured":"Chung, J., Kastner, K., Dinh, L., Goel, K., Courville, A.C., Bengio, Y.: A recurrent latent variable model for sequential data. In: Advances in Neural Information Processing Systems, vol. 28 (2015)"},{"key":"11_CR8","unstructured":"Luc, P., et al.: Transformation-based adversarial video prediction on large-scale data. arXiv preprint arXiv:2003.04035 (2020)"},{"key":"11_CR9","unstructured":"Mathieu, M., Couprie, C., LeCun, Y.: Deep multi-scale video prediction beyond mean square error. arXiv preprint arXiv:1511.05440 (2015)"},{"key":"11_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"262","DOI":"10.1007\/978-3-319-46484-8_16","volume-title":"Computer Vision \u2013 ECCV 2016","author":"Y Zhou","year":"2016","unstructured":"Zhou, Y., Berg, T.L.: Learning temporal transformations from time-lapse videos. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 262\u2013277. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_16"},{"key":"11_CR11","doi-asserted-by":"crossref","unstructured":"Jin, B., et al.: Exploring spatial-temporal multi-frequency analysis for high-fidelity and temporal-consistency video prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4554\u20134563 (2020)","DOI":"10.1109\/CVPR42600.2020.00461"},{"key":"11_CR12","doi-asserted-by":"crossref","unstructured":"Reda, F.A., et al.: SDC-Net: video prediction using spatially-displaced convolution. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 718\u2013733 (2018)","DOI":"10.1007\/978-3-030-01234-2_44"},{"key":"11_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"440","DOI":"10.1007\/3-540-60268-2_327","volume-title":"Computer Analysis of Images and Patterns","author":"R Lopez","year":"1995","unstructured":"Lopez, R., Huang, T.S.: Head pose computation for very low bit-rate video coding. In: Hlav\u00e1\u010d, V., \u0160\u00e1ra, R. (eds.) CAIP 1995. LNCS, vol. 970, pp. 440\u2013447. Springer, Heidelberg (1995). https:\/\/doi.org\/10.1007\/3-540-60268-2_327"},{"issue":"14","key":"11_CR14","doi-asserted-by":"publisher","first-page":"1031","DOI":"10.1016\/S0262-8856(99)00005-0","volume":"17","author":"I Koufakis","year":"1999","unstructured":"Koufakis, I., Buxton, B.F.: Very low bit rate face video compression using linear combination of 2D face views and principal components analysis. Image Vis. Comput. 17(14), 1031\u20131051 (1999)","journal-title":"Image Vis. Comput."},{"key":"11_CR15","doi-asserted-by":"crossref","unstructured":"Tang, J., Hu, H., Zhou, Q., Shan, H., Tian, C., Quek, T.Q.: Pose guided global and local GAN for appearance preserving human video prediction. In: 2019 IEEE International Conference on Image Processing (ICIP), pp. 614\u2013618. IEEE (2019)","DOI":"10.1109\/ICIP.2019.8803792"},{"key":"11_CR16","unstructured":"Villegas, R., Erhan, D., Lee, H.: Hierarchical long-term video prediction without supervision. In: International Conference on Machine Learning, pp. 6038\u20136046. PMLR (2018)"},{"key":"11_CR17","unstructured":"Villegas, R., Yang, J., Zou, Y., Sohn, S., Lin, X., Lee, H.: Learning to generate long-term future via hierarchical prediction. In: International Conference on Machine Learning, pp. 3560\u20133569. PMLR (2017)"},{"key":"11_CR18","doi-asserted-by":"crossref","unstructured":"Walker, J., Marino, K., Gupta, A., Hebert, M.: The pose knows: video forecasting by generating pose futures. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3332\u20133341 (2017)","DOI":"10.1109\/ICCV.2017.361"},{"key":"11_CR19","unstructured":"Ranzato, M., Szlam, A., Bruna, J., Mathieu, M., Collobert, R., Chopra, S.: Video (language) modeling: a baseline for generative models of natural videos. arXiv preprint arXiv:1412.6604 (2014)"},{"key":"11_CR20","doi-asserted-by":"crossref","unstructured":"Terwilliger, A., Brazil, G., Liu, X.: Recurrent flow-guided semantic forecasting. In: 2019 IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 1703\u20131712. IEEE (2019)","DOI":"10.1109\/WACV.2019.00186"},{"issue":"8","key":"11_CR21","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"11_CR22","unstructured":"Minderer, M., Sun, C., Villegas, R., Cole, F., Murphy, K.P., Lee, H.: Unsupervised learning of object structure and dynamics from videos. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"11_CR23","doi-asserted-by":"crossref","unstructured":"Zakharov, E., Shysheya, A., Burkov, E., Lempitsky, V.: Few-shot adversarial learning of realistic neural talking head models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9459\u20139468 (2019)","DOI":"10.1109\/ICCV.2019.00955"},{"key":"11_CR24","doi-asserted-by":"crossref","unstructured":"Zhao, J., Zhang, H.: Thin-plate spline motion model for image animation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3657\u20133666 (2022)","DOI":"10.1109\/CVPR52688.2022.00364"},{"key":"11_CR25","doi-asserted-by":"crossref","unstructured":"Cai, Y., Mohan, S., Niranjan, A., Jain, N., Cloninger, A., Das, S.: A manifold learning based video prediction approach for deep motion transfer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4231\u20134238 (2021)","DOI":"10.1109\/ICCVW54120.2021.00470"},{"key":"11_CR26","unstructured":"Jakab, T., Gupta, A., Bilen, H., Vedaldi, A.: Unsupervised learning of object landmarks through conditional image generation. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"key":"11_CR27","unstructured":"Salehinejad, H., Sankar, S., Barfett, J., Colak, E., Valaee, S.: Recent advances in recurrent neural networks. arXiv preprint arXiv:1801.01078 (2017)"},{"key":"11_CR28","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"11_CR29","unstructured":"Rezende, D.J., Mohamed, S., Wierstra, D.: Stochastic backpropagation and approximate inference in deep generative models. In: International Conference on Machine Learning, pp. 1278\u20131286. PMLR (2014)"},{"key":"11_CR30","doi-asserted-by":"crossref","unstructured":"Ullah, S., Xu, Z., Wang, H., Menzel, S., Sendhoff, B., B\u00e4ck, T.: Exploring clinical time series forecasting with meta-features in variational recurrent models. In: 2020 International Joint Conference on Neural Networks (IJCNN), pp. 1\u20139. IEEE (2020)","DOI":"10.1109\/IJCNN48605.2020.9207254"},{"key":"11_CR31","unstructured":"Ebert, F., Finn, C., Lee, A.X., Levine, S.: Self-supervised visual planning with temporal skip connections. In: CoRL 12, p. 16 (2017)"},{"key":"11_CR32","doi-asserted-by":"crossref","unstructured":"Nagrani, A., Chung, J.S., Zisserman, A.: VoxCeleb: a large-scale speaker identification dataset. arXiv preprint arXiv:1706.08612 (2017)","DOI":"10.21437\/Interspeech.2017-950"},{"key":"11_CR33","unstructured":"Unterthiner, T., Van Steenkiste, S., Kurach, K., Marinier, R., Michalski, M., Gelly, S.: Towards accurate generative models of video: a new metric & challenges. arXiv preprint arXiv:1812.01717 (2018)"},{"key":"11_CR34","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"}],"container-title":["Communications in Computer and Information Science","Engineering Applications of Neural Networks"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-62495-7_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,21]],"date-time":"2024-06-21T20:21:05Z","timestamp":1719001265000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-62495-7_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031624940","9783031624957"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-62495-7_11","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"22 June 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"EANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Engineering Applications of Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Corfu","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 June 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 June 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eann2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eannconf.org\/2024\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}