{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,13]],"date-time":"2024-11-13T05:21:12Z","timestamp":1731475272447,"version":"3.28.0"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2024,3,16]],"date-time":"2024-03-16T00:00:00Z","timestamp":1710547200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,16]],"date-time":"2024-03-16T00:00:00Z","timestamp":1710547200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s00371-024-03298-2","type":"journal-article","created":{"date-parts":[[2024,3,16]],"date-time":"2024-03-16T21:21:00Z","timestamp":1710624060000},"page":"9107-9128","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A lightweight multi-granularity asymmetric motion mode video frame prediction algorithm"],"prefix":"10.1007","volume":"40","author":[{"given":"Jie","family":"Yan","sequence":"first","affiliation":[]},{"given":"Guihe","family":"Qin","sequence":"additional","affiliation":[]},{"given":"Minghui","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Yanhua","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Zhonghan","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yinghui","family":"Xu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,16]]},"reference":[{"unstructured":"Brock, A., Donahue, J., Simonyan, K.: Large scale gan training for high fidelity natural image synthesis. arXiv:1809.11096 (2018)","key":"3298_CR1"},{"doi-asserted-by":"crossref","unstructured":"Zhai, X., Kolesnikov, A., Houlsby, N., Beyer, L.: Scaling vision transformers. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1204\u20131213 (2021)","key":"3298_CR2","DOI":"10.1109\/CVPR52688.2022.01179"},{"unstructured":"Chen, H., He, B., Wang, H., Ren, Y., Lim, S.-N., Shrivastava, A.: Nerv: neural representations for videos. In: Neural Information Processing Systems (2021)","key":"3298_CR3"},{"doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, S., Maaten, L., Weinberger, K.Q.: Condensenet: an efficient densenet using learned group convolutions. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2752\u20132761 (2017)","key":"3298_CR4","DOI":"10.1109\/CVPR.2018.00291"},{"unstructured":"Wang, S., Li, B.Z., Khabsa, M., Fang, H., Ma, H.: Linformer: self-attention with linear complexity. arXiv:2006.04768 (2020)","key":"3298_CR5"},{"unstructured":"Howard, A.G., Zhu, M., Chen, B., Kalenichenko, D., Wang, W., Weyand, T., Andreetto, M., Adam, H.: Mobilenets: efficient convolutional neural networks for mobile vision applications. arXiv:1704.04861 (2017)","key":"3298_CR6"},{"unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. CoRR arXiv:1409.1556 (2014)","key":"3298_CR7"},{"doi-asserted-by":"crossref","unstructured":"Xie, S., Girshick, R.B., Doll\u00e1r, P., Tu, Z., He, K.: Aggregated residual transformations for deep neural networks. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5987\u20135995 (2016)","key":"3298_CR8","DOI":"10.1109\/CVPR.2017.634"},{"doi-asserted-by":"crossref","unstructured":"Zhang, X., Zhou, X., Lin, M., Sun, J.: Shufflenet: an extremely efficient convolutional neural network for mobile devices. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6848\u20136856 (2017)","key":"3298_CR9","DOI":"10.1109\/CVPR.2018.00716"},{"key":"3298_CR10","doi-asserted-by":"publisher","first-page":"2673","DOI":"10.1109\/78.650093","volume":"45","author":"M Schuster","year":"1997","unstructured":"Schuster, M., Paliwal, K.K.: Bidirectional recurrent neural networks. IEEE Trans. Signal Process. 45, 2673\u20132681 (1997)","journal-title":"IEEE Trans. Signal Process."},{"doi-asserted-by":"crossref","unstructured":"Huang, X., Jiang, Y., Tang, J.: MApredRNN: multi-attention predictive RNN for traffic flow prediction by dynamic spatio-temporal data fusion. Appl. Intell. (2023)","key":"3298_CR11","DOI":"10.1007\/s10489-023-04494-8"},{"unstructured":"Shi, X., Chen, Z., Wang, H., Yeung, D.Y., Wong, W.-K., Woo, W.-c.: Convolutional LSTM network: a machine learning approach for precipitation nowcasting (2015)","key":"3298_CR12"},{"key":"3298_CR13","doi-asserted-by":"publisher","first-page":"2515","DOI":"10.1007\/s10489-018-1395-8","volume":"49","author":"M Majd","year":"2019","unstructured":"Majd, M., Safabakhsh, R.: A motion-aware convlstm network for action recognition. Appl. Intell. 49, 2515\u20132521 (2019)","journal-title":"Appl. Intell."},{"unstructured":"Zhang, L., Zhu, G., Mei, L., Shen, P., Shah, S.A.A., Bennamoun, M.: Attention in convolutional LSTM for gesture recognition. In: Neural Information Processing Systems (2018)","key":"3298_CR14"},{"doi-asserted-by":"crossref","unstructured":"Chiang, T.-H., Lin, Y.-T., Lin, J.C.-H., Tseng, Y.-C.: Trapezoid-structured lstm with segregated gates and bridge joints for video frame inpainting. Visual Comput. 1\u201314 (2023)","key":"3298_CR15","DOI":"10.1007\/s00371-023-02832-y"},{"key":"3298_CR16","doi-asserted-by":"publisher","first-page":"3791","DOI":"10.1007\/s00371-021-02221-3","volume":"38","author":"E Shibuya","year":"2021","unstructured":"Shibuya, E., Hotta, K.: Cell image segmentation by using feedback and convolutional LSTM. Vis. Comput. 38, 3791\u20133801 (2021)","journal-title":"Vis. Comput."},{"key":"3298_CR17","doi-asserted-by":"publisher","first-page":"2033","DOI":"10.1007\/s00371-021-02264-6","volume":"38","author":"SGE G\u00f6kstorp","year":"2021","unstructured":"G\u00f6kstorp, S.G.E., Breckon, T.: Temporal and non-temporal contextual saliency analysis for generalized wide-area search within unmanned aerial vehicle (uav) video. Vis. Comput. 38, 2033\u20132040 (2021)","journal-title":"Vis. Comput."},{"key":"3298_CR18","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1016\/j.sigpro.2018.01.021","volume":"147","author":"Q Zhang","year":"2018","unstructured":"Zhang, Q., Zhuo, L., Li, J., Zhang, J., Zhang, H., Li, X.: Vehicle color recognition using multiple-layer feature representations of lightweight convolutional neural network. Signal Process. 147, 146\u2013153 (2018)","journal-title":"Signal Process."},{"unstructured":"Howard, A.G., Zhu, M., Chen, B., Kalenichenko, D., Wang, W., Weyand, T., Andreetto, M., Adam, H.: Mobilenets: efficient convolutional neural networks for mobile vision applications. arXiv:1704.04861 (2017)","key":"3298_CR19"},{"doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A.G., Zhu, M., Zhmoginov, A., Chen, L.-C.: Mobilenetv2: inverted residuals and linear bottlenecks. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","key":"3298_CR20","DOI":"10.1109\/CVPR.2018.00474"},{"doi-asserted-by":"crossref","unstructured":"Howard, A.G., Sandler, M., Chu, G., Chen, L.-C., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., Adam, H.: Searching for mobilenetv3. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 1314\u20131324 (2019)","key":"3298_CR21","DOI":"10.1109\/ICCV.2019.00140"},{"doi-asserted-by":"crossref","unstructured":"Zhang, X., Zhou, X., Lin, M., Sun, J.: Shufflenet: an extremely efficient convolutional neural network for mobile devices. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6848\u20136856 (2017)","key":"3298_CR22","DOI":"10.1109\/CVPR.2018.00716"},{"doi-asserted-by":"crossref","unstructured":"Ma, N., Zhang, X., Zheng, H., Sun, J.: Shufflenet v2: practical guidelines for efficient cnn architecture design. arXiv:1807.11164 (2018)","key":"3298_CR23","DOI":"10.1007\/978-3-030-01264-9_8"},{"doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 770\u2013778 (2015)","key":"3298_CR24","DOI":"10.1109\/CVPR.2016.90"},{"doi-asserted-by":"crossref","unstructured":"Han, K., Wang, Y., Tian, Q., Guo, J., Xu, C., Xu, C.: Ghostnet: more features from cheap operations. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1577\u20131586 (2019)","key":"3298_CR25","DOI":"10.1109\/CVPR42600.2020.00165"},{"doi-asserted-by":"crossref","unstructured":"Ding, X., Guo, Y., Ding, G., Han, J.: Acnet: strengthening the kernel skeletons for powerful cnn via asymmetric convolution blocks. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 1911\u20131920 (2019)","key":"3298_CR26","DOI":"10.1109\/ICCV.2019.00200"},{"key":"3298_CR27","doi-asserted-by":"publisher","first-page":"4927","DOI":"10.1109\/TCSVT.2021.3138431","volume":"32","author":"Y Liu","year":"2022","unstructured":"Liu, Y., Jia, Q., Fan, X., Wang, S., Ma, S., Gao, W.: Cross-SRN: structure-preserving super-resolution network with cross convolution. IEEE Trans. Circuits Syst. Video Technol. 32, 4927\u20134939 (2022)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z.: Rethinking the inception architecture for computer vision. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2818\u20132826 (2015)","key":"3298_CR28","DOI":"10.1109\/CVPR.2016.308"},{"key":"3298_CR29","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1109\/LSP.2021.3051845","volume":"28","author":"Y Li","year":"2021","unstructured":"Li, Y., Li, X., Xiao, C., Li, H., Zhang, W.: Eacnet: enhanced asymmetric convolution for real-time semantic segmentation. IEEE Signal Process. Lett. 28, 234\u2013238 (2021)","journal-title":"IEEE Signal Process. Lett."},{"unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., Houlsby, N.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv:2010.11929 (2020)","key":"3298_CR30"},{"doi-asserted-by":"crossref","unstructured":"Dong, X., Bao, J., Chen, D., Zhang, W., Yu, N., Yuan, L., Chen, D., Guo, B.: Cswin transformer: a general vision transformer backbone with cross-shaped windows. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12114\u201312124 (2021)","key":"3298_CR31","DOI":"10.1109\/CVPR52688.2022.01181"},{"unstructured":"Ho, J., Kalchbrenner, N., Weissenborn, D., Salimans, T.: Axial attention in multidimensional transformers. arXiv:1912.12180 (2019)","key":"3298_CR32"},{"key":"3298_CR33","doi-asserted-by":"publisher","first-page":"11624","DOI":"10.1109\/TPAMI.2023.3284038","volume":"45","author":"Y Liu","year":"2022","unstructured":"Liu, Y., Li, G., Lin, L.: Cross-modal causal relational reasoning for event-level visual question answering. IEEE Trans. Pattern Anal. Mach. Intell. 45, 11624\u201311641 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"doi-asserted-by":"crossref","unstructured":"Yan, H., Liu, Y., Wei, Y., Li, Z., Li, G., Lin, L.: Skeletonmae: graph-based masked autoencoder for skeleton sequence pre-training. arXiv:2307.08476 (2023)","key":"3298_CR34","DOI":"10.1109\/ICCV51070.2023.00516"},{"doi-asserted-by":"crossref","unstructured":"Li, Z., Tang, H., Peng, Z., Qi, G.-J., Tang, J.: Knowledge-guided semantic transfer network for few-shot image recognition. IEEE Trans. Neural Netw. Learn. Syst. (2023)","key":"3298_CR35","DOI":"10.1109\/TNNLS.2023.3240195"},{"key":"3298_CR36","doi-asserted-by":"publisher","first-page":"108792","DOI":"10.1016\/j.patcog.2022.108792","volume":"130","author":"H Tang","year":"2022","unstructured":"Tang, H., Yuan, C., Li, Z., Tang, J.: Learning attention-guided pyramidal features for few-shot fine-grained recognition. Pattern Recognit. 130, 108792 (2022)","journal-title":"Pattern Recognit."},{"key":"3298_CR37","doi-asserted-by":"publisher","first-page":"3947","DOI":"10.1109\/TCSVT.2023.3236636","volume":"33","author":"Z Zha","year":"2022","unstructured":"Zha, Z., Tang, H., Sun, Y., Tang, J.: Boosting few-shot fine-grained recognition with background suppression and foreground alignment. IEEE Trans. Circuits Syst. Video Technol. 33, 3947\u20133961 (2022)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"3298_CR38","doi-asserted-by":"publisher","first-page":"1248","DOI":"10.1109\/TII.2022.3179243","volume":"19","author":"Y Zhu","year":"2023","unstructured":"Zhu, Y., Zhang, Y., Liu, L., Liu, Y., Li, G., Mao, M., Lin, L.: Hybrid-order representation learning for electricity theft detection. IEEE Trans. Ind. Inf. 19, 1248\u20131259 (2023)","journal-title":"IEEE Trans. Ind. Inf."},{"key":"3298_CR39","doi-asserted-by":"publisher","first-page":"1978","DOI":"10.1109\/TIP.2022.3147032","volume":"31","author":"Y Liu","year":"2021","unstructured":"Liu, Y., Wang, K., Lan, H., Lin, L.: Tcgl: temporal contrastive graph for self-supervised video representation learning. IEEE Trans. Image Process. 31, 1978\u20131993 (2021)","journal-title":"IEEE Trans. Image Process."},{"doi-asserted-by":"crossref","unstructured":"Tang, H., Liu, J., Yan, S., Yan, R., Li, Z., Tang, J.: M3net: Multi-view encoding, matching, and fusion for few-shot fine-grained action recognition. In: Proceedings of the 31st ACM International Conference on Multimedia (2023)","key":"3298_CR40","DOI":"10.1145\/3581783.3612221"},{"doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Weinberger, K.Q.: Densely connected convolutional networks. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2261\u20132269 (2016)","key":"3298_CR41","DOI":"10.1109\/CVPR.2017.243"},{"doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u2019ar, P., Girshick, R.B.: Masked autoencoders are scalable vision learners. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 15979\u201315988 (2021)","key":"3298_CR42","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"3298_CR43","doi-asserted-by":"publisher","first-page":"2208","DOI":"10.1109\/TPAMI.2022.3165153","volume":"45","author":"Y Wang","year":"2021","unstructured":"Wang, Y., Wu, H., Zhang, J., Gao, Z., Wang, J., Yu, P.S., Long, M.: Predrnn: a recurrent neural network for spatiotemporal predictive learning. IEEE Trans. Pattern Anal. Mach. Intell. 45, 2208\u20132225 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"unstructured":"Srivastava, N., Mansimov, E., Salakhutdinov, R.: Unsupervised learning of video representations using lstms. In: International Conference on Machine Learning (2015)","key":"3298_CR44"},{"doi-asserted-by":"crossref","unstructured":"Sch\u00fcldt, C., Laptev, I., Caputo, B.: Recognizing human actions: a local SVM approach. In: Proceedings of the 17th International Conference on Pattern Recognition, 2004. ICPR 2004. vol. 3, pp. 32\u2013363 (2004)","key":"3298_CR45","DOI":"10.1109\/ICPR.2004.1334462"},{"doi-asserted-by":"crossref","unstructured":"Blank, M., Gorelick, L., Shechtman, E., Irani, M., Basri, R.: Actions as space-time shapes. In: Tenth IEEE International Conference on Computer Vision (ICCV\u201905), vol. 12, pp. 1395\u201314022 (2005)","key":"3298_CR46","DOI":"10.1109\/ICCV.2005.28"},{"key":"3298_CR47","doi-asserted-by":"publisher","first-page":"1325","DOI":"10.1109\/TPAMI.2013.248","volume":"36","author":"C Ionescu","year":"2014","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3.6m: large scale datasets and predictive methods for 3d human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. 36, 1325\u20131339 (2014)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018)","key":"3298_CR48","DOI":"10.1109\/CVPR.2018.00068"},{"unstructured":"Shi, X., Gao, Z., Lausen, L., Wang, H., Yeung, D.Y., Wong, W.-K., Woo, W.-c.: Deep learning for precipitation nowcasting: a benchmark and a new model. arXiv:1706.03458 (2017)","key":"3298_CR49"},{"unstructured":"Villegas, R., Yang, J., Hong, S., Lin, X., Lee, H.: Decomposing motion and content for natural video sequence prediction. arXiv:1706.08033 (2017)","key":"3298_CR50"},{"unstructured":"Wang, Y., Gao, Z., Long, M., Wang, J., Yu, P.S.: Predrnn++: towards a resolution of the deep-in-time dilemma in spatiotemporal predictive learning. arXiv:1804.06300 (2018)","key":"3298_CR51"},{"key":"3298_CR52","doi-asserted-by":"publisher","first-page":"160","DOI":"10.1016\/j.neucom.2022.04.063","volume":"494","author":"J Yan","year":"2022","unstructured":"Yan, J., Qin, G., Sun, M., Liang, Y., Zhang, Z.: Dimension decoupling attention mechanism for time series prediction. Neurocomputing 494, 160\u2013170 (2022)","journal-title":"Neurocomputing"},{"key":"3298_CR53","doi-asserted-by":"publisher","first-page":"5015","DOI":"10.1007\/s10489-021-02631-9","volume":"52","author":"M Yuan","year":"2021","unstructured":"Yuan, M., Dai, Q.: A novel deep pixel restoration video prediction algorithm integrating attention mechanism. Appl. Intell. 52, 5015\u20135033 (2021)","journal-title":"Appl. Intell."}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03298-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-024-03298-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03298-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T09:21:49Z","timestamp":1731403309000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-024-03298-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,16]]},"references-count":53,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["3298"],"URL":"https:\/\/doi.org\/10.1007\/s00371-024-03298-2","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"type":"print","value":"0178-2789"},{"type":"electronic","value":"1432-2315"}],"subject":[],"published":{"date-parts":[[2024,3,16]]},"assertion":[{"value":"5 December 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 March 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}