{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T03:59:16Z","timestamp":1752983956512,"version":"3.37.3"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,11,27]],"date-time":"2022-11-27T00:00:00Z","timestamp":1669507200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,11,27]],"date-time":"2022-11-27T00:00:00Z","timestamp":1669507200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1451916"],"award-info":[{"award-number":["1451916"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"DOI":"10.1007\/s42979-022-01498-y","type":"journal-article","created":{"date-parts":[[2022,11,27]],"date-time":"2022-11-27T20:26:59Z","timestamp":1669580819000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Inception Recurrent Neural Network Architecture for Video Frame Prediction"],"prefix":"10.1007","volume":"4","author":[{"given":"Matin","family":"Hosseini","sequence":"first","affiliation":[]},{"given":"Anthony","family":"Maida","sequence":"additional","affiliation":[]},{"given":"Seyedmajid","family":"Hosseini","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0794-4015","authenticated-orcid":false,"given":"Raju","family":"Gottumukkala","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,11,27]]},"reference":[{"unstructured":"Finn C, Goodfellow I, Levine S. Unsupervised learning for physical interaction through video prediction. Adv Neural Inf Process Syst. 2016;29.","key":"1498_CR1"},{"doi-asserted-by":"crossref","unstructured":"Liu W, Luo W, Lian D, Gao S. Future frame prediction for anomaly detection\u2013a new baseline, In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2018. p. 6536\u20136545.","key":"1498_CR2","DOI":"10.1109\/CVPR.2018.00684"},{"doi-asserted-by":"crossref","unstructured":"Hosseini M, Salehi MA, Gottumukkala R. Enabling interactive video streaming for public safety monitoring through batch scheduling, In: 2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC\/SmartCity\/DSS), IEEE; 2017. p. 474\u2013481.","key":"1498_CR3","DOI":"10.1109\/HPCC-SmartCity-DSS.2017.62"},{"doi-asserted-by":"crossref","unstructured":"Ishihara Y, Takahashi M. Empirical study of future image prediction for image-based mobile robot navigation. Robot Auton Syst. 2022. p. 104018.","key":"1498_CR4","DOI":"10.1016\/j.robot.2021.104018"},{"unstructured":"Hosseini M, Sohrab F, Gottumukkala R, Bhupatiraju RT, Katragadda S, Raitoharju J, Iosifidis A, Gabbouj M. Empathicschool: A multimodal dataset for real-time facial expressions and physiological data analysis under different stress conditions, arXiv preprint arXiv:2209.13542; 2022.","key":"1498_CR5"},{"doi-asserted-by":"crossref","unstructured":"Liu Z, Yeh RA, Tang X, Liu Y, Agarwala A. Video frame synthesis using deep voxel flow, In: Proceedings of the IEEE International Conference on Computer Vision; 2017. p. 4463\u20134471.","key":"1498_CR6","DOI":"10.1109\/ICCV.2017.478"},{"unstructured":"Medel JR, Savakis A. Anomaly detection in video using predictive convolutional long short-term memory networks, arXiv preprint arXiv:1612.00390; 2016.","key":"1498_CR7"},{"doi-asserted-by":"crossref","unstructured":"Xu H, Gao Y, Yu F, Darrell T. End-to-end learning of driving models from large-scale video datasets, In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), IEEE; 2017. p. 3530\u20133538.","key":"1498_CR8","DOI":"10.1109\/CVPR.2017.376"},{"doi-asserted-by":"crossref","unstructured":"Finn C, Levine S. Deep visual foresight for planning robot motion, In: Robotics and Automation (ICRA), 2017 IEEE International Conference on, IEEE; 2017. p. 2786\u20132793.","key":"1498_CR9","DOI":"10.1109\/ICRA.2017.7989324"},{"key":"1498_CR10","doi-asserted-by":"publisher","first-page":"815","DOI":"10.1098\/rstb.2005.1622","volume":"360","author":"K Friston","year":"2005","unstructured":"Friston K. A theory of cortical responses. Phil Trans R Soc B. 2005;360:815\u201336.","journal-title":"Phil Trans R Soc B"},{"unstructured":"Lotter W, Kreiman G, Cox D. Deep predictive coding networks for video prediction and unsupervised learning, arXiv preprint arXiv:1605.08104; 2016.","key":"1498_CR11"},{"unstructured":"Krizhevsky A, Sutskever I, Hinton GE. Imagenet classification with deep convolutional neural networks, In: Advances in neural information processing systems; 2012. p. 1097\u20131105.","key":"1498_CR12"},{"unstructured":"Simonyan K, Zisserman, A. Very deep convolutional networks for large-scale image recognition, arXiv preprint arXiv:1409.1556; 2014.","key":"1498_CR13"},{"doi-asserted-by":"crossref","unstructured":"Zagoruyko S, Komodakis N. Wide residual networks, arXiv preprint arXiv:1605.07146; 2016.","key":"1498_CR14","DOI":"10.5244\/C.30.87"},{"doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A. Going deeper with convolutions, In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition; 2015. p. 1\u20139.","key":"1498_CR15","DOI":"10.1109\/CVPR.2015.7298594"},{"unstructured":"Srivastava N, Mansimov E, Salakhudinov R. Unsupervised learning of video representations using lstms, In: International conference on machine learning, PMLR; 2015. p. 843\u2013852.","key":"1498_CR16"},{"key":"1498_CR17","doi-asserted-by":"publisher","first-page":"3914","DOI":"10.3390\/en13153914","volume":"13","author":"M Hosseini","year":"2020","unstructured":"Hosseini M, Katragadda S, Wojtkiewicz J, Gottumukkala R, Maida A, Chambers TL. Direct normal irradiance forecasting using multivariate gated recurrent units. Energies. 2020;13:3914.","journal-title":"Energies"},{"unstructured":"Lotter W, Kreiman G, Cox D. Unsupervised learning of visual structure using predictive generative networks, arXiv preprint arXiv:1511.06380; 2015.","key":"1498_CR18"},{"unstructured":"Xingjian S, Chen Z, Wang H, Yeung D-Y, Wong W-K, Woo W-c. Convolutional lstm network: A machine learning approach for precipitation nowcasting, In: Advances in neural information processing systems; 2015. p. 802\u2013810.","key":"1498_CR19"},{"unstructured":"Villegas R, Yang J, Hong S, Lin X, Lee H. Decomposing motion and content for natural video sequence prediction, arXiv preprint arXiv:1706.08033; 2017.","key":"1498_CR20"},{"key":"1498_CR21","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1038\/4580","volume":"2","author":"RP Rao","year":"1999","unstructured":"Rao RP, Ballard DH. Predictive coding in the visual cortex: a functional interpretation of some extra-classical receptive-field effects. Nat Neurosci. 1999;2:79.","journal-title":"Nat Neurosci"},{"doi-asserted-by":"crossref","unstructured":"Rane RP, Sz\u00fcgyi E, Saxena V, Ofner A, Stober S. Prednet and predictive coding: A critical review, In: Proceedings of the 2020 International Conference on Multimedia Retrieval; 2020. p. 233\u2013241.","key":"1498_CR22","DOI":"10.1145\/3372278.3390694"},{"doi-asserted-by":"crossref","unstructured":"Rane R, Sz\u00fcgyi E, Saxena V, Ofner A, Stober S. Prednet and predictive coding: A critical review, arXiv preprint arXiv:1906.11902; 2019.","key":"1498_CR23","DOI":"10.1145\/3372278.3390694"},{"unstructured":"Hosseini M, Maida A. Hierarchical predictive coding models in a deep-learning framework, arXiv preprint arXiv:2005.03230; 2020.","key":"1498_CR24"},{"key":"1498_CR25","doi-asserted-by":"publisher","first-page":"1231","DOI":"10.1177\/0278364913491297","volume":"32","author":"A Geiger","year":"2013","unstructured":"Geiger A, Lenz P, Stiller C, Urtasun R. Vision meets robotics: The kitti dataset. Int J Robot Res. 2013;32:1231\u20137.","journal-title":"Int J Robot Res."},{"unstructured":"Laptev I, Caputo B, et al. Recognizing human actions: a local svm approach, In: null, IEEE; 2004. p. 32\u201336.","key":"1498_CR26"},{"unstructured":"Jang Y, Kim G, Song Y. Video prediction with appearance and motion conditions, In: International Conference on Machine Learning, PMLR; 2018. p. 2225\u20132234.","key":"1498_CR27"},{"doi-asserted-by":"crossref","unstructured":"Liang X, Lee L, Dai W, Xing EP. Dual motion gan for future-flow embedded video prediction, In: proceedings of the IEEE international conference on computer vision; 2017. p. 1744\u20131752.","key":"1498_CR28","DOI":"10.1109\/ICCV.2017.194"},{"doi-asserted-by":"crossref","unstructured":"Ho Y-H, Cho C-Y, Peng W-H, Jin G-L. Sme-net: Sparse motion estimation for parametric video prediction through reinforcement learning, In: Proceedings of the IEEE\/CVF International Conference on Computer Vision; 2019. p. 10462\u201310470.","key":"1498_CR29","DOI":"10.1109\/ICCV.2019.01056"},{"doi-asserted-by":"crossref","unstructured":"Zhou Y, Berg TL. Learning temporal transformations from time-lapse videos, CoRR arXiv:1608.07724; 2016.","key":"1498_CR30","DOI":"10.1007\/978-3-319-46484-8_16"},{"unstructured":"Hsieh J-T, Liu B, Huang D-A, Fei-Fei LF, Niebles JC. Learning to decompose and disentangle representations for video prediction, In: Advances in Neural Information Processing Systems; 2018. p. 517\u2013526.","key":"1498_CR31"},{"unstructured":"Zhao M Zhuang C, Wang Y, Lee, TS. Predictive encoding of contextual relationships for perceptual inference, interpolation and prediction, arXiv preprint arXiv:1411.3815; 2014.","key":"1498_CR32"},{"unstructured":"Goroshin R, Mathieu M, LeCun Y. Learning to linearize under uncertainty, CoRR arXiv:1506.03011; 2015.","key":"1498_CR33"},{"key":"1498_CR34","doi-asserted-by":"publisher","first-page":"4055","DOI":"10.3390\/en12214055","volume":"12","author":"J Wojtkiewicz","year":"2019","unstructured":"Wojtkiewicz J, Hosseini M, Gottumukkala R, Chambers TL. Hour-ahead solar irradiance forecasting using multivariate gated recurrent units. Energies. 2019;12:4055.","journal-title":"Energies"},{"unstructured":"Wang Y, Long M, Wang J, Gao Z, Philip SY. Predrnn: Recurrent neural networks for predictive learning using spatiotemporal lstms, In: Advances in Neural Information Processing Systems; 2017. p. 879\u2013888.","key":"1498_CR35"},{"unstructured":"Shi X, Chen Z, Wang H, Yeung D-Y, Wong W-K. W.-C. WOO, Convolutional lstm network: A machine learning approach for precipitation nowcasting. In: Cortes C, Lawrence N, Lee D, Sugiyama M, Garnett R, editors. Advances in Neural Information Processing Systems, vol. 28. Curran Associates Inc; 2015. https:\/\/proceedings.neurips.cc\/paper\/2015\/file\/07563a3fe3bbe7e3ba84431ad9d055af-Paper.pdf.","key":"1498_CR36"},{"unstructured":"Jozefowicz R, Zaremba W, Sutskever I. An empirical exploration of recurrent network architectures, In: International Conference on Machine Learning; 2015. p. 2342\u20132350.","key":"1498_CR37"},{"unstructured":"Chung J, Gulcehre C, Cho K, Bengio Y. Empirical evaluation of gated recurrent neural networks on sequence modeling, arXiv preprint arXiv:1412.3555; 2014.","key":"1498_CR38"},{"doi-asserted-by":"crossref","unstructured":"Xu Z, Wang Y, Long M, Wang J, KLiss M. Predcnn: Predictive learning with cascade convolutions., In: IJCAI; 2018. p. 2940\u20132947.","key":"1498_CR39","DOI":"10.24963\/ijcai.2018\/408"},{"unstructured":"Kumar M, Babaeizadeh M, Erhan D, Finn C, Levine S, Dinh L, Kingma D. Videoflow: A conditional flow-based model for stochastic video generation, arXiv preprint arXiv:1903.01434; 2019.","key":"1498_CR40"},{"doi-asserted-by":"crossref","unstructured":"Reda FA, Liu G, Shih KJ, Kirby R, Barker J, Tarjan D, Tao A, Catanzaro B. Sdc-net: Video prediction using spatially-displaced convolution, In: Proceedings of the European Conference on Computer Vision (ECCV); 2018. p. 718\u2013733.","key":"1498_CR41","DOI":"10.1007\/978-3-030-01234-2_44"},{"doi-asserted-by":"crossref","unstructured":"Alom M, Hasan M, Yakopcic C, Tarek M, Taha T. Inception recurrent convolutional neural network for object recognition. arXiv preprint arXiv:1704.07709; 2017.","key":"1498_CR42","DOI":"10.1007\/s00521-018-3627-6"},{"doi-asserted-by":"crossref","unstructured":"Hosseini M, Maida AS, Hosseini M, Raju G. Inception-inspired lstm for next-frame video prediction, arXiv preprint arXiv:1909.05622; 2019.","key":"1498_CR43","DOI":"10.1609\/aaai.v34i10.7176"},{"doi-asserted-by":"crossref","unstructured":"Heidari M, Rafatirad S. Using transfer learning approach to implement convolutional neural network model to recommend airline tickets by using online reviews, In: 2020 15th International Workshop on Semantic and Social media Adaptation and Personalization. SMA, IEEE; 2020. p. 1\u20136.","key":"1498_CR44","DOI":"10.1109\/SMAP49528.2020.9248443"},{"doi-asserted-by":"crossref","unstructured":"J. Liu, J. Luo, M. Shah, Recognizing realistic actions from videos \u201cin the wild\u201d, In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, IEEE; 2009. p. 1996\u20132003.","key":"1498_CR45","DOI":"10.1109\/CVPR.2009.5206744"},{"key":"1498_CR46","first-page":"13809","volume":"34","author":"M Hosseini","year":"2020","unstructured":"Hosseini M, Maida AS, Hosseini M, Raju G. Inception lstm for next-frame video prediction (student abstract). Proc AAAI Conf Artif Intell. 2020;34:13809\u201310.","journal-title":"Proc AAAI Conf Artif Intell."},{"doi-asserted-by":"crossref","unstructured":"Sch\u00fcldt C, Laptev I, Caputo B. Recognizing human actions: a local SVM approach, In: Proc. Int. Conf. Pattern Recognition (ICPR\u201904), Cambridge, U.K; 2004.","key":"1498_CR47","DOI":"10.1109\/ICPR.2004.1334462"},{"key":"1498_CR48","doi-asserted-by":"publisher","first-page":"971","DOI":"10.1007\/s00138-012-0450-4","volume":"24","author":"KK Reddy","year":"2013","unstructured":"Reddy KK, Shah M. Recognizing 50 human action categories of web videos. Mach Vis Appl. 2013;24:971\u201381.","journal-title":"Mach Vis Appl"}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-022-01498-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-022-01498-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-022-01498-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,7]],"date-time":"2023-01-07T22:27:46Z","timestamp":1673130466000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-022-01498-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,27]]},"references-count":48,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2023,1]]}},"alternative-id":["1498"],"URL":"https:\/\/doi.org\/10.1007\/s42979-022-01498-y","relation":{},"ISSN":["2661-8907"],"issn-type":[{"type":"electronic","value":"2661-8907"}],"subject":[],"published":{"date-parts":[[2022,11,27]]},"assertion":[{"value":"10 February 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 November 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 November 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"The source code used for the new model, data and evaluation is made available at GitHub .","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Availability of data and materials"}}],"article-number":"69"}}