{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T02:02:03Z","timestamp":1780020123689,"version":"3.53.1"},"reference-count":37,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100010418","name":"Institute of Information & Communications Technology Planning & Evaluation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100010418","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Knowledge-Based Systems"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.knosys.2026.116088","type":"journal-article","created":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T17:09:59Z","timestamp":1777568999000},"page":"116088","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Time-plane projection network for efficient time-series image analysis"],"prefix":"10.1016","volume":"346","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3856-5846","authenticated-orcid":false,"given":"Jaehan","family":"Park","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3997-4924","authenticated-orcid":false,"given":"Soo Young","family":"Shin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"issue":"1","key":"10.1016\/j.knosys.2026.116088_b1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s40537-021-00444-8","article-title":"Review of deep learning: concepts, CNN architectures, challenges, applications, future directions","volume":"8","author":"Alzubaidi","year":"2021","journal-title":"J. Big Data"},{"issue":"1","key":"10.1016\/j.knosys.2026.116088_b2","first-page":"18","article-title":"Image classification based on CNN: a survey","volume":"6","author":"Elngar","year":"2021","journal-title":"J. Cybersecur. Inf. Manag."},{"key":"10.1016\/j.knosys.2026.116088_b3","doi-asserted-by":"crossref","DOI":"10.1016\/j.jenvman.2021.114367","article-title":"Convolutional neural network (CNN) with metaheuristic optimization algorithms for landslide susceptibility mapping in Icheon, South Korea","volume":"305","author":"Hakim","year":"2022","journal-title":"J. Environ. Manag."},{"key":"10.1016\/j.knosys.2026.116088_b4","doi-asserted-by":"crossref","DOI":"10.1016\/j.image.2021.116181","article-title":"An optimized CNN-based quality assessment model for screen content image","volume":"94","author":"Jiang","year":"2021","journal-title":"Signal Process., Image Commun."},{"key":"10.1016\/j.knosys.2026.116088_b5","first-page":"16133","article-title":"ConvNeXt V2: Co-designing and scaling ConvNets with masked autoencoders","volume":"2023","author":"Woo","year":"2023","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR)"},{"key":"10.1016\/j.knosys.2026.116088_b6","doi-asserted-by":"crossref","first-page":"109","DOI":"10.1007\/978-1-4842-6168-2_10","article-title":"EfficientNet","author":"Koonce","year":"2021","journal-title":"Convolutional Neural Networks Swift TensorFlow: Image Recognit. Dataset Categorization"},{"issue":"3","key":"10.1016\/j.knosys.2026.116088_b7","doi-asserted-by":"crossref","first-page":"97","DOI":"10.32604\/jbd.2021.016993","article-title":"Survey on research of RNN-based spatio-temporal sequence prediction algorithms","volume":"3","author":"Fang","year":"2021","journal-title":"J. Big Data"},{"key":"10.1016\/j.knosys.2026.116088_b8","doi-asserted-by":"crossref","DOI":"10.1016\/j.image.2019.115731","article-title":"Correlation net: Spatiotemporal multimodal deep learning for action recognition","volume":"82","author":"Yudistira","year":"2020","journal-title":"Signal Process., Image Commun."},{"key":"10.1016\/j.knosys.2026.116088_b9","doi-asserted-by":"crossref","DOI":"10.1016\/j.cviu.2022.103597","article-title":"Learning representational invariances for data-efficient action recognition","volume":"227","author":"Zou","year":"2023","journal-title":"Comput. Vis. Image Underst."},{"key":"10.1016\/j.knosys.2026.116088_b10","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2024.108322","article-title":"Noise-robust pipe wall-thinning discrimination system using convolution recurrent neural network model","volume":"133","author":"Park","year":"2024","journal-title":"Eng. Appl. Artif. Intell."},{"issue":"1","key":"10.1016\/j.knosys.2026.116088_b11","doi-asserted-by":"crossref","first-page":"4409","DOI":"10.1038\/s41598-025-88459-6","article-title":"Breast cancer classification based on hybrid CNN with LSTM model","volume":"15","author":"Kaddes","year":"2025","journal-title":"Sci. Rep."},{"issue":"1","key":"10.1016\/j.knosys.2026.116088_b12","doi-asserted-by":"crossref","first-page":"7231","DOI":"10.1038\/s41467-022-34780-x","article-title":"Path sampling of recurrent neural networks by incorporating known physics","volume":"13","author":"Tsai","year":"2022","journal-title":"Nat. Commun."},{"issue":"11","key":"10.1016\/j.knosys.2026.116088_b13","doi-asserted-by":"crossref","first-page":"442","DOI":"10.3390\/info12110442","article-title":"Analysis of gradient vanishing of RNNs and performance comparison","volume":"12","author":"Noh","year":"2021","journal-title":"Information"},{"issue":"1","key":"10.1016\/j.knosys.2026.116088_b14","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1038\/s41598-020-79336-5","article-title":"A 3D-CNN model with CT-based parametric response mapping for classifying COPD subjects","volume":"11","author":"Ho","year":"2021","journal-title":"Sci. Rep."},{"issue":"5","key":"10.1016\/j.knosys.2026.116088_b15","doi-asserted-by":"crossref","first-page":"286","DOI":"10.1007\/s42979-020-00293-x","article-title":"Human action prediction with 3D-CNN","volume":"1","author":"Alfaifi","year":"2020","journal-title":"SN Comput. Sci."},{"issue":"1","key":"10.1016\/j.knosys.2026.116088_b16","article-title":"3D CNN with visual insights for early detection of lung cancer using gradient-weighted class activation","volume":"2021","author":"Neal","year":"2021","journal-title":"J. Heal. Eng."},{"key":"10.1016\/j.knosys.2026.116088_b17","article-title":"Two-stream convolutional networks for action recognition in videos","author":"Simonyan","year":"2014","journal-title":"Adv. Neural Inf. Process. Syst. (NeurIPS)"},{"key":"10.1016\/j.knosys.2026.116088_b18","doi-asserted-by":"crossref","unstructured":"J. Lin, C. Gan, S. Han, Tsm: Temporal shift module for efficient video understanding, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2019, pp. 7083\u20137093.","DOI":"10.1109\/ICCV.2019.00718"},{"key":"10.1016\/j.knosys.2026.116088_b19","doi-asserted-by":"crossref","unstructured":"Y. Li, B. Ji, X. Shi, J. Zhang, B. Kang, L. Wang, Tea: Temporal excitation and aggregation for action recognition, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp. 909\u2013918.","DOI":"10.1109\/CVPR42600.2020.00099"},{"key":"10.1016\/j.knosys.2026.116088_b20","article-title":"SlowFast networks for video recognition","author":"Feichtenhofer","year":"2019","journal-title":"Proc. IEEE Int. Conf. Comput. Vis. (ICCV)"},{"key":"10.1016\/j.knosys.2026.116088_b21","first-page":"5998","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"3","key":"10.1016\/j.knosys.2026.116088_b22","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3641289","article-title":"A survey on evaluation of large language models","volume":"15","author":"Chang","year":"2024","journal-title":"ACM Trans. Intell. Syst. Technol."},{"issue":"1","key":"10.1016\/j.knosys.2026.116088_b23","doi-asserted-by":"crossref","first-page":"5787","DOI":"10.1038\/s41598-025-90115-y","article-title":"Multimodal surface-based transformer model for early diagnosis of alzheimer\u2019s disease","volume":"15","author":"Duong","year":"2025","journal-title":"Sci. Rep."},{"key":"10.1016\/j.knosys.2026.116088_b24","doi-asserted-by":"crossref","DOI":"10.1016\/j.jvcir.2025.104441","article-title":"Multi-TuneV: Fine-tuning the fusion of multiple modules for video action recognition","volume":"109","author":"Liu","year":"2025","journal-title":"J. Vis. Commun. Image Represent."},{"key":"10.1016\/j.knosys.2026.116088_b25","doi-asserted-by":"crossref","DOI":"10.1016\/j.image.2024.117244","article-title":"Transformer-based multiview spatiotemporal feature interactive fusion for human action recognition in depth videos","volume":"131","author":"Wu","year":"2025","journal-title":"Signal Process., Image Commun."},{"key":"10.1016\/j.knosys.2026.116088_b26","doi-asserted-by":"crossref","DOI":"10.1016\/j.image.2023.116940","article-title":"Multi-level channel attention excitation network for human action recognition in videos","volume":"114","author":"Wu","year":"2023","journal-title":"Signal Process., Image Commun."},{"key":"10.1016\/j.knosys.2026.116088_b27","series-title":"International Conference on Learning Representations","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2021"},{"issue":"1","key":"10.1016\/j.knosys.2026.116088_b28","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1109\/TPAMI.2022.3152247","article-title":"A survey on vision transformer","volume":"45","author":"Han","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.116088_b29","doi-asserted-by":"crossref","unstructured":"T.-Y. Lin, P. Goyal, R. Girshick, K. He, P. Doll\u00e1r, Focal loss for dense object detection, in: Proceedings of the IEEE International Conference on Computer Vision, 2017, pp. 2980\u20132988.","DOI":"10.1109\/ICCV.2017.324"},{"key":"10.1016\/j.knosys.2026.116088_b30","doi-asserted-by":"crossref","unstructured":"C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, Z. Wojna, Rethinking the inception architecture for computer vision, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2016, pp. 2818\u20132826.","DOI":"10.1109\/CVPR.2016.308"},{"key":"10.1016\/j.knosys.2026.116088_b31","article-title":"VideoMAE V2: Scaling video masked autoencoders with dual masking","author":"Wang","year":"2023","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR)"},{"key":"10.1016\/j.knosys.2026.116088_b32","doi-asserted-by":"crossref","DOI":"10.1016\/j.image.2025.117381","article-title":"Mining the salient spatio-temporal feature with S2TF-net for action recognition","author":"Liu","year":"2025","journal-title":"Signal Process., Image Commun."},{"key":"10.1016\/j.knosys.2026.116088_b33","doi-asserted-by":"crossref","unstructured":"K. Kahatapitiya, A. Arnab, A. Nagrani, M.S. Ryoo, Victr: Video-conditioned text representations for activity recognition, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 18547\u201318558.","DOI":"10.1109\/CVPR52733.2024.01755"},{"key":"10.1016\/j.knosys.2026.116088_b34","doi-asserted-by":"crossref","unstructured":"R. Wang, D. Chen, Z. Wu, Y. Chen, X. Dai, M. Liu, L. Yuan, Y.-G. Jiang, Masked video distillation: Rethinking masked feature modeling for self-supervised video representation learning, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 6312\u20136322.","DOI":"10.1109\/CVPR52729.2023.00611"},{"key":"10.1016\/j.knosys.2026.116088_b35","first-page":"9758","article-title":"Self-supervised learning by cross-modal audio-video clustering","volume":"33","author":"Alwassel","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2026.116088_b36","doi-asserted-by":"crossref","unstructured":"D. Tran, L. Bourdev, R. Fergus, L. Torresani, M. Paluri, Learning spatiotemporal features with 3d convolutional networks, in: Proceedings of the IEEE International Conference on Computer Vision, 2015, pp. 4489\u20134497.","DOI":"10.1109\/ICCV.2015.510"},{"key":"10.1016\/j.knosys.2026.116088_b37","doi-asserted-by":"crossref","unstructured":"T. Pan, Y. Song, T. Yang, W. Jiang, W. Liu, Videomoco: Contrastive video representation learning with temporally adversarial examples, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 11205\u201311214.","DOI":"10.1109\/CVPR46437.2021.01105"}],"container-title":["Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126008142?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126008142?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T01:06:36Z","timestamp":1780016796000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0950705126008142"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":37,"alternative-id":["S0950705126008142"],"URL":"https:\/\/doi.org\/10.1016\/j.knosys.2026.116088","relation":{},"ISSN":["0950-7051"],"issn-type":[{"value":"0950-7051","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Time-plane projection network for efficient time-series image analysis","name":"articletitle","label":"Article Title"},{"value":"Knowledge-Based Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.knosys.2026.116088","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"116088"}}