{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T13:15:58Z","timestamp":1758892558735,"version":"3.40.3"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031197895"},{"type":"electronic","value":"9783031197901"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19790-1_5","type":"book-chapter","created":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T11:02:44Z","timestamp":1666522964000},"page":"68-84","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Controllable Video Generation Through Global and\u00a0Local Motion Dynamics"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5156-3741","authenticated-orcid":false,"given":"Aram","family":"Davtyan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3546-8247","authenticated-orcid":false,"given":"Paolo","family":"Favaro","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,24]]},"reference":[{"key":"5_CR1","unstructured":"Liberated pixel cup. https:\/\/lpc.opengameart.org. Accessed 07 Mar 2022"},{"key":"5_CR2","unstructured":"Universal LPC sprite sheet. https:\/\/github.com\/makrohn\/Universal-LPC-spritesheet\/tree\/7040e2fe85d2cb1e8154ec5fce382589d369bdb8. Accessed 07 Mar 2022"},{"key":"5_CR3","unstructured":"Acharya, D., Huang, Z., Paudel, D.P., Van Gool, L.: Towards high resolution video generation with progressive growing of sliced Wasserstein GANs. arXiv preprint arXiv:1810.02419 (2018)"},{"key":"5_CR4","unstructured":"Babaeizadeh, M., Finn, C., Erhan, D., Campbell, R.H., Levine, S.: Stochastic variational video prediction. arXiv preprint arXiv:1710.11252 (2017)"},{"key":"5_CR5","unstructured":"Burgess, C.P., et al.: Understanding disentangling in $$\\beta $$-vae. arXiv preprint arXiv:1804.03599 (2018)"},{"key":"5_CR6","doi-asserted-by":"crossref","unstructured":"Caron, M., Bojanowski, P., Joulin, A., Douze, M.: Deep clustering for unsupervised learning of visual features. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 132\u2013149 (2018)","DOI":"10.1007\/978-3-030-01264-9_9"},{"key":"5_CR7","unstructured":"Chiappa, S., Racaniere, S., Wierstra, D., Mohamed, S.: Recurrent environment simulators. arXiv preprint arXiv:1704.02254 (2017)"},{"key":"5_CR8","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"5_CR9","unstructured":"Denton, E., Fergus, R.: Stochastic video generation with a learned prior. In: International Conference on Machine Learning, pp. 1174\u20131183. PMLR (2018)"},{"key":"5_CR10","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. Adv. Neural Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"5_CR11","unstructured":"Ebert, F., Finn, C., Lee, A.X., Levine, S.: Self-supervised visual planning with temporal skip connections. In: CoRL, pp. 344\u2013356 (2017)"},{"key":"5_CR12","unstructured":"Finn, C., Goodfellow, I., Levine, S.: Unsupervised learning for physical interaction through video prediction. Advances in Neural Inf. Process. Syst. 29 (2016)"},{"key":"5_CR13","unstructured":"Gafni, O., Wolf, L., Taigman, Y.: Vid2game: controllable characters extracted from real-world videos. arXiv preprint arXiv:1904.08379 (2019)"},{"key":"5_CR14","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local nash equilibrium. Adv. Neural Inf. Proces. Syst. 30 (2017)"},{"key":"5_CR15","doi-asserted-by":"crossref","unstructured":"Huang, J., Jin, Y., Yi, K.M., Sigal, L.: Layered controllable video generation. arXiv preprint arXiv:2111.12747 (2021)","DOI":"10.1007\/978-3-031-19787-1_31"},{"key":"5_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"694","DOI":"10.1007\/978-3-319-46475-6_43","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Johnson","year":"2016","unstructured":"Johnson, J., Alahi, A., Fei-Fei, L.: Perceptual losses for real-time style transfer and super-resolution. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9906, pp. 694\u2013711. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46475-6_43"},{"key":"5_CR17","first-page":"852","volume":"34","author":"T Karras","year":"2021","unstructured":"Karras, T., et al.: Alias-free generative adversarial networks. Advances Neural Inf. Process. Syst. 34, 852\u2013863 (2021)","journal-title":"Advances Neural Inf. Process. Syst."},{"key":"5_CR18","doi-asserted-by":"crossref","unstructured":"Kim, S.W., Zhou, Y., Philion, J., Torralba, A., Fidler, S.: Learning to simulate dynamic environments with GameGAN. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1231\u20131240 (2020)","DOI":"10.1109\/CVPR42600.2020.00131"},{"key":"5_CR19","unstructured":"Kim, Y., Nam, S., Cho, I., Kim, S.J.: Unsupervised keypoint learning for guiding class-conditional video prediction. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"5_CR20","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"5_CR21","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"5_CR22","unstructured":"Lee, A.X., Zhang, R., Ebert, F., Abbeel, P., Finn, C., Levine, S.: Stochastic adversarial video prediction. arXiv preprint arXiv:1804.01523 (2018)"},{"key":"5_CR23","unstructured":"Li, Y., Mandt, S.: Disentangled sequential autoencoder. In: International Conference on Machine Learning (2018)"},{"key":"5_CR24","doi-asserted-by":"crossref","unstructured":"Menapace, W., Lathuili\u00e8re, S., Tulyakov, S., Siarohin, A., Ricci, E.: Playable video generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10061\u201310070 (2021)","DOI":"10.1109\/CVPR46437.2021.00993"},{"key":"5_CR25","doi-asserted-by":"crossref","unstructured":"Mottaghi, R., Bagherinezhad, H., Rastegari, M., Farhadi, A.: Newtonian scene understanding: unfolding the dynamics of objects in static images. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3521\u20133529 (2016)","DOI":"10.1109\/CVPR.2016.383"},{"key":"5_CR26","doi-asserted-by":"crossref","unstructured":"Nunes, M.S., Dehban, A., Moreno, P., Santos-Victor, J.: Action-conditioned benchmarking of robotic video prediction models: a comparative study. In: 2020 IEEE International Conference on Robotics and Automation (ICRA), pp. 8316\u20138322. IEEE (2020)","DOI":"10.1109\/ICRA40945.2020.9196839"},{"key":"5_CR27","unstructured":"Oh, J., Guo, X., Lee, H., Lewis, R.L., Singh, S.: Action-conditional video prediction using deep networks in Atari games. Adv. Neural Inf. Process. Syst. 28 (2015)"},{"key":"5_CR28","unstructured":"Razavi, A., Van den Oord, A., Vinyals, O.: Generating diverse high-fidelity images with VQ-VAE-2. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"5_CR29","unstructured":"Rybkin, O., Pertsch, K., Derpanis, K.G., Daniilidis, K., Jaegle, A.: Learning what you can do before doing anything. arXiv preprint arXiv:1806.09655 (2018)"},{"issue":"1\u20132","key":"5_CR30","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1162\/1064546053278973","volume":"11","author":"L Smith","year":"2005","unstructured":"Smith, L., Gasser, M.: The development of embodied cognition: six lessons from babies. Artif. Life 11(1\u20132), 13\u201329 (2005)","journal-title":"Artif. Life"},{"key":"5_CR31","unstructured":"Srivastava, N., Mansimov, E., Salakhudinov, R.: Unsupervised learning of video representations using LSTMs. In: International Conference on Machine Learning, pp. 843\u2013852. PMLR (2015)"},{"key":"5_CR32","doi-asserted-by":"crossref","unstructured":"Tulyakov, S., Liu, M.Y., Yang, X., Kautz, J.: MocoGAN: decomposing motion and content for video generation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1526\u20131535 (2018)","DOI":"10.1109\/CVPR.2018.00165"},{"key":"5_CR33","unstructured":"Unterthiner, T., van Steenkiste, S., Kurach, K., Marinier, R., Michalski, M., Gelly, S.: Towards accurate generative models of video: a new metric & challenges. arXiv preprint arXiv:1812.01717 (2018)"},{"key":"5_CR34","unstructured":"Van Den Oord, A., Vinyals, O., et al.: Neural discrete representation learning. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"5_CR35","unstructured":"Vondrick, C., Pirsiavash, H., Torralba, A.: Anticipating the future by watching unlabeled video. arXiv preprint arXiv:1504.080232, 2 (2015)"},{"key":"5_CR36","unstructured":"Vondrick, C., Pirsiavash, H., Torralba, A.: Generating videos with scene dynamics. Adv. Neural Inf. Process. Syst. 29 (2016)"},{"key":"5_CR37","doi-asserted-by":"crossref","unstructured":"Wang, Y., Bilinski, P., Bremond, F., Dantcheva, A.: G3AN: disentangling appearance and motion for video generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5264\u20135273 (2020)","DOI":"10.1109\/CVPR42600.2020.00531"},{"key":"5_CR38","doi-asserted-by":"crossref","unstructured":"Wang, Y., Bilinski, P., Bremond, F., Dantcheva, A.: Imaginator: conditional spatio-temporal GAN for video generation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1160\u20131169 (2020)","DOI":"10.1109\/WACV45572.2020.9093492"},{"key":"5_CR39","unstructured":"Weissenborn, D., T\u00e4ckstr\u00f6m, O., Uszkoreit, J.: Scaling autoregressive video models. arXiv preprint arXiv:1906.02634 (2019)"},{"key":"5_CR40","unstructured":"Yan, W., Zhang, Y., Abbeel, P., Srinivas, A.: VideoGPT: video generation using VQ-VAE and transformers. arXiv preprint arXiv:2104.10157 (2021)"},{"key":"5_CR41","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018)","DOI":"10.1109\/CVPR.2018.00068"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19790-1_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T14:59:57Z","timestamp":1710255597000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19790-1_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031197895","9783031197901"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19790-1_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"24 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}