{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,15]],"date-time":"2026-01-15T13:37:20Z","timestamp":1768484240402,"version":"3.49.0"},"reference-count":64,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2021,7,8]],"date-time":"2021-07-08T00:00:00Z","timestamp":1625702400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,7,8]],"date-time":"2021-07-08T00:00:00Z","timestamp":1625702400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2022,3]]},"DOI":"10.1007\/s10489-021-02500-5","type":"journal-article","created":{"date-parts":[[2021,7,8]],"date-time":"2021-07-08T10:03:12Z","timestamp":1625738592000},"page":"3640-3652","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Video prediction: a step-by-step improvement of a video synthesis network"],"prefix":"10.1007","volume":"52","author":[{"given":"Beibei","family":"Jing","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongwei","family":"Ding","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhijun","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liyong","family":"Bao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,7,8]]},"reference":[{"key":"2500_CR1","unstructured":"Kalchbrenner N, Oord A, Simonyan K, Danihelka I, Vinyals O, Graves, A, Kavukcuoglu K (2017) Video pixel networks. In: 2017 International Conference on Machine Learning, pp 1\u20132"},{"key":"2500_CR2","unstructured":"Lotter W, Kreiman G, Cox D (2016) Deep predictive coding networks for video prediction and unsupervised learning. arXiv preprint arXiv:1605.08104"},{"key":"2500_CR3","doi-asserted-by":"crossref","unstructured":"Byeon W, Wang Q, Srivastava RK, Koumoutsakos P (2018).Contextvp: fully context-aware video prediction. In: 2018 In Proceedings of the European Conference on Computer Vision (ECCV), pp 753-769","DOI":"10.1007\/978-3-030-01270-0_46"},{"key":"2500_CR4","unstructured":"Finn C, Goodfellow I, Levine S (2016) Unsupervised learning for physical interaction through video prediction. arXiv preprint arXiv:1605.07157"},{"key":"2500_CR5","unstructured":"Shi X, Chen Z, Wang H, Yeung DY, Wong WK, Woo WC. (2015) Convolutional LSTM network: a machine learning approach for precipitation nowcasting. arXiv preprint arXiv:1506.04214"},{"key":"2500_CR6","unstructured":"Xue T, Wu J, Bouman KL, Freeman WT (2016) Visual dynamics: probabilistic future frame synthesis via cross convolutional networks. arXiv preprint arXiv:1607.02586"},{"key":"2500_CR7","unstructured":"Villegas R, Yang J, Hong S, Lin X, Lee H (2017) Decomposing motion and content for natural video sequence prediction. arXiv preprint arXiv:1706.08033"},{"key":"2500_CR8","unstructured":"Mathieu M, Couprie C, LeCun Y (2015) Deep multi-scale video prediction beyond mean square error. arXiv preprint arXiv:1511.05440"},{"key":"2500_CR9","doi-asserted-by":"crossref","unstructured":"Liu W, Luo W, Lian D, Gao S (2018) Future frame prediction for anomaly detection\u2013a new baseline. In: 2018 Proceedings of the IEEE Conference On Computer Vision and Pattern Recognition, pp 1-7","DOI":"10.1109\/CVPR.2018.00684"},{"key":"2500_CR10","doi-asserted-by":"crossref","unstructured":"Yi Z, Zhang H, Tan P, Gong M (2017) Dualgan: unsupervised dual learning for image-to-image translation. In: 2017 in Proceedings of the IEEE international conference on computer vision, pp 2-3","DOI":"10.1109\/ICCV.2017.310"},{"key":"2500_CR11","doi-asserted-by":"crossref","unstructured":"Liang X, Lee L, Dai W, Xing, EP (2017) Dual motion gan for future-flow embedded video prediction. In: 2017 In proceedings of the IEEE international conference on computer vision, pp 1\u20137","DOI":"10.1109\/ICCV.2017.194"},{"key":"2500_CR12","unstructured":"Denton E, Birodkar V (2017) Unsupervised learning of disentangled representations from video. arXiv preprint arXiv:1705.10915"},{"key":"2500_CR13","unstructured":"Villegas R, Yang J, Zou Y, Sohn S, Lin X, Lee H (2017). Learning to generate long-term future via hierarchical prediction. In:2017 in international conference on machine learning, pp. 3560-3569"},{"key":"2500_CR14","doi-asserted-by":"crossref","unstructured":"Oprea S, Martinez-Gonzalez P, Garcia-Garcia A, Castro-Vargas JA, Orts-Escolano S, Garcia-Rodriguez J, Argyros A (2020) A review on deep learning techniques for video prediction. IEEE Trans Pattern Anal Mach Intell:1","DOI":"10.1109\/TPAMI.2020.3045007"},{"key":"2500_CR15","unstructured":"Hsieh JT, Liu B, Huang DA, Fei-Fei L, Niebles JC (2018) Learning to decompose and disentangle representations for video prediction. arXiv preprint arXiv:1806.04166"},{"key":"2500_CR16","doi-asserted-by":"crossref","unstructured":"Xu Y, Gao L, Tian K, Zhou S, Sun H (2019) Non-local convlstm for video compression artifact reduction. In:2019 in Proceedings of the IEEE\/CVF International Conference On Computer Vision, pp 7043-7052)","DOI":"10.1109\/ICCV.2019.00714"},{"key":"2500_CR17","unstructured":"Srivastava N, Mansimov E, Salakhudinov R (2015) Unsupervised learning of video representations using lstms. In:2015 in International conference on machine learning, pp 843-852"},{"key":"2500_CR18","doi-asserted-by":"crossref","unstructured":"Walker J, Doersch C, Gupta A, Hebert M (2016, October) An uncertain future: forecasting from static images using variational autoencoders. In:2016 in European Conference on Computer Vision (ECCV), pp 835-851","DOI":"10.1007\/978-3-319-46478-7_51"},{"key":"2500_CR19","doi-asserted-by":"crossref","unstructured":"Ye Y, Singh M, Gupta A, Tulsiani S (2019) Compositional video prediction. In:2019 in proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 10353-10362","DOI":"10.1109\/ICCV.2019.01045"},{"key":"2500_CR20","doi-asserted-by":"crossref","unstructured":"Saito M, Matsumoto E, Saito S (2017) Temporal generative adversarial nets with singular value clipping. In: 2017 in IEEE International Conference on Computer Vision (ICCV), pp 2830-2839","DOI":"10.1109\/ICCV.2017.308"},{"key":"2500_CR21","doi-asserted-by":"crossref","unstructured":"Tulyakov S, Liu MY, Yang X, Kautz J (2018) MoCoGAN: decomposing motion and content for video generation. In:2018 in IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp 1526-1535","DOI":"10.1109\/CVPR.2018.00165"},{"key":"2500_CR22","unstructured":"Wang TC, Liu MY, Tao A, Liu G, Kautz J, Catanzaro B (2019) Few-shot video-to-video synthesis. arXiv preprint arXiv:1910.12713"},{"key":"2500_CR23","unstructured":"Wang TC, Liu MY, Zhu JY, Liu G, Tao A, Kautz J, Catanzaro B (2018) Video-to-video synthesis. arXiv preprint arXiv:1808.06601"},{"key":"2500_CR24","unstructured":"Goodfellow IJ, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Bengio Y (2014) Generative adversarial networks. arXiv preprint arXiv:1406.2661"},{"key":"2500_CR25","unstructured":"Mirza M, Osindero S (2014) Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784"},{"key":"2500_CR26","doi-asserted-by":"crossref","unstructured":"Wang Y, Zhang J, Zhu H, Long M, Wang J, Yu PS (2019) Memory in memory: a predictive neural network for learning higher-order non-stationarity from spatiotemporal dynamics. In:2019 in Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 9154-9162","DOI":"10.1109\/CVPR.2019.00937"},{"key":"2500_CR27","unstructured":"Karacan L, Akata Z, Erdem A, Erdem E (2016) Learning to generate images of outdoor scenes from attributes and semantic layouts. arXiv preprint arXiv:1612.00215"},{"key":"2500_CR28","unstructured":"Reed S, Akata Z, Mohan S, Tenka S, Schiele B, Lee H (2016) Learning what and where to draw. arXiv preprint arXiv:1610.02454"},{"key":"2500_CR29","unstructured":"Lee AX, Zhang R, Ebert F, Abbeel P, Finn C, Levine S (2018) Stochastic adversarial video prediction. arXiv preprint arXiv:1804.01523"},{"key":"2500_CR30","doi-asserted-by":"crossref","unstructured":"Liang X, Lee L, Dai W, Xing EP (2017) Dual motion Gan for future-flow embedded video prediction. In:2017 In proceedings of the IEEE international conference on computer vision, pp 1744-1752","DOI":"10.1109\/ICCV.2017.194"},{"key":"2500_CR31","unstructured":"Vondrick C, Pirsiavash H, Torralba A (2016) Generating videos with scene dynamics. arXiv preprint arXiv:1609.02612"},{"key":"2500_CR32","doi-asserted-by":"crossref","unstructured":"Tran D, Bourdev L, Fergus R, Torresani L, Paluri M (2015) Learning spatiotemporal features with 3d convolutional networks. In:2015 in Proceedings of the IEEE international conference on computer vision, pp 4489-4497","DOI":"10.1109\/ICCV.2015.510"},{"key":"2500_CR33","doi-asserted-by":"crossref","unstructured":"Bousmalis K, Silberman N, Dohan D, Erhan D, Krishnan D (2017). Unsupervised pixel-level domain adaptation with generative adversarial networks. In:2017 in Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3722-3731","DOI":"10.1109\/CVPR.2017.18"},{"key":"2500_CR34","doi-asserted-by":"crossref","unstructured":"Huang X, Liu MY, Belongie S, Kautz J (2018) Multimodal unsupervised image-to-image translation. In:2018 in Proceedings of the European conference on computer vision (ECCV), pp 172-189","DOI":"10.1007\/978-3-030-01219-9_11"},{"key":"2500_CR35","doi-asserted-by":"crossref","unstructured":"Isola P, Zhu JY, Zhou T, Efros AA (2017) Image-to-image translation with conditional adversarial networks. In:2017 in Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1125-1134","DOI":"10.1109\/CVPR.2017.632"},{"key":"2500_CR36","unstructured":"Liu MY, Breuel T, Kautz J (2017) Unsupervised image-to-image translation networks. arXiv preprint arXiv:1703.00848"},{"key":"2500_CR37","unstructured":"Liu MY, Tuzel O (2016) Coupled generative adversarial networks. arXiv preprint arXiv:1606.07536"},{"key":"2500_CR38","doi-asserted-by":"crossref","unstructured":"Shrivastava A, Pfister T, Tuzel O, Susskind J, Wang W, Webb R (2017) Learning from simulated and unsupervised images through adversarial training. In:2017 In Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2107-2116","DOI":"10.1109\/CVPR.2017.241"},{"key":"2500_CR39","unstructured":"Taigman Y, Polyak A, Wolf L (2016). Unsupervised cross-domain image generation. arXiv preprint arXiv:1611.02200"},{"key":"2500_CR40","doi-asserted-by":"crossref","unstructured":"Wang TC, Liu MY, Zhu JY, Tao A, Kautz J, Catanzaro B (2018) High-resolution image synthesis and semantic manipulation with conditional gans. In:2018 in proceedings of the IEEE conference on computer vision and pattern recognition, pp. 8798-8807","DOI":"10.1109\/CVPR.2018.00917"},{"key":"2500_CR41","doi-asserted-by":"crossref","unstructured":"Zhu JY, Park T, Isola P, Efros AA. (2017). Unpaired image-to-image translation using cycle-consistent adversarial networks. In:2017 In Proceedings of the IEEE international conference on computer vision, pp 2223-2232","DOI":"10.1109\/ICCV.2017.244"},{"key":"2500_CR42","unstructured":"Zhu JY, Zhang R, Pathak D, Darrell T, Efros AA., Wang O, Shechtman E (2017) Toward multimodal image-to-image translation. arXiv preprint arXiv:1711.11586"},{"issue":"1","key":"2500_CR43","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/0262-8856(83)90006-9","volume":"1","author":"J Kittler","year":"1983","unstructured":"Kittler J (1983) On the accuracy of the Sobel edge detector. Image Vis Comput 1(1):37\u201342","journal-title":"Image Vis Comput"},{"key":"2500_CR44","first-page":"147","volume":"2","author":"V Torre","year":"1986","unstructured":"Torre V, Poggio TA (1986) On edge detection. IEEE 2:147\u2013163","journal-title":"IEEE"},{"issue":"5","key":"2500_CR45","doi-asserted-by":"publisher","first-page":"898","DOI":"10.1109\/TPAMI.2010.161","volume":"33","author":"P Arbelaez","year":"2010","unstructured":"Arbelaez P, Maire M, Fowlkes C, Malik J (2010) Contour detection and hierarchical image segmentation. IEEE Trans Pattern Anal Mach Intell 33(5):898\u2013916","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2500_CR46","unstructured":"Xiaofeng R, Bo L (2012) Discriminatively trained sparse code gradients for contour detection. In Advances in neural information processing systems, pp 584-592"},{"issue":"8","key":"2500_CR47","first-page":"1558","volume":"37","author":"P Doll\u00e1r","year":"2014","unstructured":"Doll\u00e1r P, Zitnick CL (2014) Fast edge detection using structured forests. In:2014 IEEE transactions on pattern analysis and machine intelligence 37(8):1558\u20131570","journal-title":"In:2014 IEEE transactions on pattern analysis and machine intelligence"},{"key":"2500_CR48","doi-asserted-by":"crossref","unstructured":"Xie S, Tu Z (2015) Holistically-nested edge detection. In:2015 in proceedings of the IEEE international conference on computer vision, pp 1395-1403","DOI":"10.1109\/ICCV.2015.164"},{"key":"2500_CR49","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T (2015) Fully convolutional networks for semantic segmentation. In:2015 in proceedings of the IEEE conference on computer vision and pattern recognition, pp 3431-3440","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"2500_CR50","doi-asserted-by":"crossref","unstructured":"Bertasius G, Shi J, Torresani L (2015) Deepedge: a multi-scale bifurcated deep network for top-down contour detection. In:2015 In Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4380-4389","DOI":"10.1109\/CVPR.2015.7299067"},{"key":"2500_CR51","doi-asserted-by":"publisher","first-page":"739","DOI":"10.1016\/j.jvcir.2016.08.022","volume":"40","author":"Z Wang","year":"2016","unstructured":"Wang Z, Zhu S, Li Y, Cui Z (2016) Convolutional neural network based deep conditional random fields for stereo matching. J Vis Commun Image Represent 40:739\u2013750","journal-title":"J Vis Commun Image Represent"},{"key":"2500_CR52","doi-asserted-by":"publisher","first-page":"679","DOI":"10.1109\/TPAMI.1986.4767851","volume":"6","author":"J Canny","year":"1986","unstructured":"Canny J (1986) A computational approach to edge detection. IEEE Trans Pattern Anal Mach Intell 6:679\u2013698","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2500_CR53","doi-asserted-by":"crossref","unstructured":"Yu Z, Feng C, Liu MY, Ramalingam S (2017) Casenet: Deep category-aware semantic edge detection. In :2017 In Proceedings of the IEEE conference on computer vision and pattern recognition,pp. 5964\u20135973","DOI":"10.1109\/CVPR.2017.191"},{"key":"2500_CR54","doi-asserted-by":"crossref","unstructured":"Karpathy A, Toderici G, Shetty S, Leung T, Sukthankar R, Fei-Fei L (2014) Large-scale video classification with convolutional neural networks. In: 2014 In Proceedings of the IEEE conference on Computer Vision and Pattern Recognition, pp 1725-1732","DOI":"10.1109\/CVPR.2014.223"},{"key":"2500_CR55","doi-asserted-by":"crossref","unstructured":"Ronneberger O, Fischer P, Brox T (2015) U-net: Convolutional networks for biomedical image segmentation. In:2015 In International Conference on Medical image computing and computer-assisted intervention,pp 234\u2013241","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"2500_CR56","doi-asserted-by":"crossref","unstructured":"Chen Q, Koltun V (2017) Photographic image synthesis with cascaded refinement networks. In:2017 In Proceedings of the IEEE international conference on computer vision, pp 1511-1520","DOI":"10.1109\/ICCV.2017.168"},{"key":"2500_CR57","doi-asserted-by":"publisher","first-page":"105590","DOI":"10.1016\/j.knosys.2020.105590","volume":"194","author":"F P\u00e9rez-Hern\u00e1ndez","year":"2020","unstructured":"P\u00e9rez-Hern\u00e1ndez F, Tableik S, Lamas A, Olmos R, Fujita H, Herrera F (2020) Object detection binary classifiers methodology based on deep learning to identify small objects handled similarly: application in video surveillance. Knowl-Based Syst 194:105590","journal-title":"Knowl-Based Syst"},{"key":"2500_CR58","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1016\/j.inffus.2018.11.015","volume":"49","author":"R Olmos","year":"2019","unstructured":"Olmos R, Tableik S, Lamas A, Perez-Hernandez F, Herrera F (2019) A binocular image fusion approach for minimizing false positives in handgun detection with deep learning. Information Fusion 49:271\u2013280","journal-title":"Information Fusion"},{"key":"2500_CR59","doi-asserted-by":"publisher","first-page":"4980","DOI":"10.1109\/TIP.2020.2977573","volume":"29","author":"J Ma","year":"2020","unstructured":"Ma J, Xu H, Jiang J, Mei X, Zhang XP (2020) DDcGAN: a dual-discriminator conditional generative adversarial network for multi-resolution image fusion. IEEE Trans Image Process 29:4980\u20134995","journal-title":"IEEE Trans Image Process"},{"key":"2500_CR60","doi-asserted-by":"publisher","first-page":"112855","DOI":"10.1016\/j.eswa.2019.112855","volume":"139","author":"VK Singh","year":"2020","unstructured":"Singh VK, Rashwan HA, Romani S, Akram F, Pandey N, Sarker MMK, Torrents-Barrena J (2020) Breast tumor segmentation and shape classification in mammograms using generative adversarial and convolutional neural network. Expert Syst Appl 139:112855","journal-title":"Expert Syst Appl"},{"issue":"5","key":"2500_CR61","first-page":"1216","volume":"42","author":"J Zhang","year":"2020","unstructured":"Zhang J, Yawei H (2020) Image-to-image translation based on improved cycle-consistent generative adversarial network. J Electron Inf Technol 42(5):1216\u20131222","journal-title":"J Electron Inf Technol"},{"key":"2500_CR62","doi-asserted-by":"crossref","unstructured":"He J, Zhang S, Yang M, Shan Y, Huang T (2019) Bi-directional cascade network for perceptual edge detection. In: 2019 in Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 3828-3837","DOI":"10.1109\/CVPR.2019.00395"},{"key":"2500_CR63","doi-asserted-by":"crossref","unstructured":"Yu Z, Liu W, Zou Y, Feng C, Ramalingam S, Kumar BVK, Kautz J (2018) Simultaneous edge alignment and learning. In:2018 in Proceedings of the European Conference on Computer Vision (ECCV), pp 388-404","DOI":"10.1007\/978-3-030-01219-9_24"},{"key":"2500_CR64","doi-asserted-by":"publisher","first-page":"107416","DOI":"10.1016\/j.patcog.2020.107416","volume":"107","author":"Y Zhang","year":"2020","unstructured":"Zhang Y, Shi L, Wu Y, Cheng K, Cheng J, Lu H (2020) Gesture recognition based on deep deformable 3D convolutional neural networks. Pattern Recogn 107:107416","journal-title":"Pattern Recogn"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-021-02500-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-021-02500-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-021-02500-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,22]],"date-time":"2022-02-22T06:36:36Z","timestamp":1645511796000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-021-02500-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,8]]},"references-count":64,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2022,3]]}},"alternative-id":["2500"],"URL":"https:\/\/doi.org\/10.1007\/s10489-021-02500-5","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,7,8]]},"assertion":[{"value":"3 May 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 July 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}