{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T00:57:24Z","timestamp":1764723444007,"version":"3.46.0"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"16","license":[{"start":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T00:00:00Z","timestamp":1763337600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T00:00:00Z","timestamp":1763337600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s11760-025-04964-1","type":"journal-article","created":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T14:56:27Z","timestamp":1763391387000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["PGFiT-Net: a two-stream human action recognition model with pose-gated and FiLM-conditioned fusion"],"prefix":"10.1007","volume":"19","author":[{"given":"Hong","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Bo","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Shijin","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,17]]},"reference":[{"key":"4964_CR1","unstructured":"Parnami, A., Dutta, V.: Human activity recognition: A comprehensive review. Expert Syst. (2024)"},{"issue":"8","key":"4964_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3664815","volume":"20","author":"M Shaikh","year":"2024","unstructured":"Shaikh, M., Bilal, et al.: From CNNs to Transformers in multimodal human action recognition: A survey. ACM Trans. Multimedia Comput. Commun. Appl. 20(8), 1\u201324 (2024)","journal-title":"ACM Trans. Multimedia Comput. Commun. Appl."},{"key":"4964_CR3","doi-asserted-by":"crossref","first-page":"259","DOI":"10.3390\/info14050259","volume":"14","author":"HC Nguyen","year":"2023","unstructured":"Nguyen, H.C., et al.: Deep learning for human activity recognition on 3D skeleton data: A survey. Information. 14, 259 (2023)","journal-title":"Information"},{"key":"4964_CR4","unstructured":"Tan, M.: and Quoc Le. Efficientnetv2: Smaller models and faster training. International conference on machine learning. PMLR, (2021)"},{"key":"4964_CR5","unstructured":"Dosovitskiy, A., Beyer, L., et al.: An Image is Worth 16\u00d716 Words: Transformers for Image Recognition at Scale. ICLR (2021)"},{"key":"4964_CR6","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-Excitation Networks. CVPR (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"4964_CR7","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.Y., Kweon, I.S.: CBAM: Convolutional Block Attention Module. ECCV (2018)","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"4964_CR8","doi-asserted-by":"crossref","unstructured":"Perez, E., Strub, F., et al.: FiLM: Visual Reasoning with a General Conditioning Layer. AAAI (2018)","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"4964_CR9","first-page":"22229","volume":"35","author":"M Turkoglu","year":"2022","unstructured":"Turkoglu, M., Ozgur, et al.: Film-ensemble: Probabilistic deep learning via feature-wise linear modulation. Adv. Neural. Inf. Process. Syst. 35, 22229\u201322242 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4964_CR10","unstructured":"Simonyan, K.: and Andrew Zisserman. Two-stream convolutional networks for action recognition in videos. Adv. Neural. Inf. Process. Syst. 27 (2014)"},{"key":"4964_CR11","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Zisserman, A.: Convolutional Two-Stream Network Fusion for Video Action Recognition. CVPR (2016)","DOI":"10.1109\/CVPR.2016.213"},{"key":"4964_CR12","doi-asserted-by":"crossref","unstructured":"Peng, K., et al.: Referring atomic video action recognition. European Conference on Computer Vision. Cham: Springer Nature Switzerland, (2024)","DOI":"10.1007\/978-3-031-72655-2_10"},{"key":"4964_CR13","doi-asserted-by":"crossref","unstructured":"Peng, K., et al.: Navigating open set scenarios for skeleton-based action recognition. Proceedings of the AAAI conference on artificial intelligence. 38(5) (2024)","DOI":"10.1609\/aaai.v38i5.28247"},{"key":"4964_CR14","unstructured":"Wen, D., et al.: RoHOI: Robustness Benchmark for Human-Object Interaction Detection. arXiv preprint arXiv:2507.09111 (2025)"},{"key":"4964_CR15","doi-asserted-by":"crossref","unstructured":"Xu, Y., et al.: Skeleton-based human action recognition with noisy labels. 2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","DOI":"10.1109\/IROS58592.2024.10801681"},{"key":"4964_CR16","doi-asserted-by":"crossref","unstructured":"Yao, B., et al.: Human action recognition by learning bases of action attributes and parts. International conference on computer vision. (2011)","DOI":"10.1109\/ICCV.2011.6126386"},{"key":"4964_CR17","first-page":"181","volume-title":"Computer Vision in Sports","author":"K Soomro","year":"2015","unstructured":"Soomro, K.: Zamir. Action recognition in realistic sports videos. In: Computer Vision in Sports, pp. 181\u2013208. Springer International Publishing, Cham (2015)"},{"key":"4964_CR18","doi-asserted-by":"crossref","unstructured":"Rodriguez, M.D., Ahmed, J., Shah, M.: Action mach a spatio-temporal maximum average correlation height filter for action recognition. 2008 IEEE conference on computer vision and pattern recognition. IEEE, (2008)","DOI":"10.1109\/CVPR.2008.4587727"},{"key":"4964_CR19","unstructured":"Xiao, L., Jia, P., Chen, T., Dou, X., Su, X., Luo, S., Scherer, R.: Recognizing sports activities from video frames with multiscale features. J. Cloud Comput. (2023)"},{"key":"4964_CR20","doi-asserted-by":"crossref","unstructured":"Zhou, T., Wang, H., Zhan, M., Zhang, Y., Li, H., Liu, S.: Behavior recognition based on the improved density clustering and context-guided Bi-LSTM model. Multimedia Tools Appl. 82(29). (2023)","DOI":"10.1007\/s11042-023-15501-y"},{"key":"4964_CR21","doi-asserted-by":"publisher","first-page":"104090","DOI":"10.1016\/j.imavis.2020.104090","volume":"106","author":"F Afza","year":"2021","unstructured":"Afza, F., Khan, M.A., Sharif, M., Kadry, S., Manogaran, G., Saba, T., Ashraf, I.: A framework of human action recognition using length control features fusion and weighted entropy-variances based feature selection. Image Vis. Comput. 106, 104090 (2021)","journal-title":"Image Vis. Comput."},{"key":"4964_CR22","doi-asserted-by":"publisher","first-page":"820","DOI":"10.1016\/j.future.2021.06.045","volume":"125","author":"K Muhammad","year":"2021","unstructured":"Muhammad, K., Khan, S., Elhoseny, M., Hassan, A.U., De Baik, S.W.: Albuquerque V.H.C. Human action recognition using attention-based LSTM network with motion features. Future Generation Comput. Syst. 125, 820\u2013830 (2021)","journal-title":"Future Generation Comput. Syst."},{"issue":"13","key":"4964_CR23","doi-asserted-by":"publisher","first-page":"20019","DOI":"10.1007\/s11042-021-10636-2","volume":"80","author":"A Abdelbaky","year":"2021","unstructured":"Abdelbaky, A., Aly, S.: Human action recognition using three orthogonal planes with unsupervised deep convolutional neural network. Multimedia Tools Appl. 80(13), 20019\u201320043 (2021)","journal-title":"Multimedia Tools Appl."},{"key":"4964_CR24","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1016\/j.patcog.2018.01.020","volume":"79","author":"Z Tu","year":"2018","unstructured":"Tu, Z., Xie, W., Qin, Q., Poppe, R., Veltkamp, R.C., Li, B., Yuan, J.: Multi-stream CNN: Learning representations based on human-related regions for action recognition. Pattern Recogn. 79, 32 (2018)","journal-title":"Pattern Recogn."},{"key":"4964_CR25","doi-asserted-by":"crossref","unstructured":"Gammulle, H., Denman, S., Sridharan, S., Fookes, C., Two Stream, L.S.T.M.: A Deep Fusion Framework for Human Action Recognition.: IEEE Winter Conference on Applications of Computer Vision (WACV). IEEE; 2017:177\u2013186. (2017)","DOI":"10.1109\/WACV.2017.27"},{"key":"4964_CR26","doi-asserted-by":"publisher","first-page":"105820","DOI":"10.1016\/j.asoc.2019.105820","volume":"86","author":"C Dai","year":"2020","unstructured":"Dai, C., Liu, X., Lai, J.: Human action recognition using two-stream attention-based LSTM networks. Appl. Soft Comput. 86, 105820 (2020)","journal-title":"Appl. Soft Comput."},{"key":"4964_CR27","doi-asserted-by":"crossref","unstructured":"Hosseyni, S.R., Seyedin, S., Taheri, H.: Human Action Recognition in Still Images Using ConViT, 2024 32nd International Conference on Electrical Engineering (ICEE), Tehran, Iran, Islamic Republic of, pp. 1\u20137. (2024)","DOI":"10.1109\/ICEE63041.2024.10668316"},{"key":"4964_CR28","doi-asserted-by":"publisher","first-page":"e1396","DOI":"10.7717\/peerj-cs.1396","volume":"9","author":"R Surendran","year":"2023","unstructured":"Surendran, R., Anitha, J., Hemanth, J.D.: Recognition of human action for scene Understanding using world cup optimization and transfer learning approach. PeerJ Comput. Sci. 9, e1396 (2023)","journal-title":"PeerJ Comput. Sci."},{"issue":"11","key":"4964_CR29","doi-asserted-by":"publisher","first-page":"4422","DOI":"10.1109\/TIP.2015.2465147","volume":"24","author":"FS Khan","year":"2015","unstructured":"Khan, F.S., Anwer, R.M., van de Weijer, J., Bagdanov, A.D., Vanrell, M.: L\u00f3pez A. Recognizing actions through action-specific person detection. IEEE Trans. Image Process. 24(11), 4422\u20134432 (2015)","journal-title":"IEEE Trans. Image Process."},{"issue":"11","key":"4964_CR30","doi-asserted-by":"publisher","first-page":"5479","DOI":"10.1109\/TIP.2016.2605305","volume":"25","author":"Y Zhang","year":"2016","unstructured":"Zhang, Y., Cheng, L., Wu, J., Cai, J., Do, M.N., Lu, J.: Action recognition in still images with minimum annotation efforts. IEEE Trans. Image Process. 25(11), 5479\u20135490 (2016)","journal-title":"IEEE Trans. Image Process."},{"issue":"21","key":"4964_CR31","doi-asserted-by":"publisher","first-page":"32567","DOI":"10.1007\/s11042-021-11215-1","volume":"80","author":"SS Ashrafi","year":"2021","unstructured":"Ashrafi, S.S., Shokouhi, S.B., Ayatollahi, A.: Action recognition in still images using a multi-attention guided network with weakly supervised saliency detection. Multimedia Tools Appl. 80(21), 32567\u201332593 (2021)","journal-title":"Multimedia Tools Appl."},{"key":"4964_CR32","doi-asserted-by":"publisher","first-page":"109091","DOI":"10.1016\/j.knosys.2022.109091","volume":"250","author":"HA Dehkordi","year":"2022","unstructured":"Dehkordi, H.A., Nezhad, A.S., Ashrafi, S.S., Shokouhi, S.B.: Multi-expert human action recognition with hierarchical super-class learning. Knowl. Based Syst. 250, 109091 (2022)","journal-title":"Knowl. Based Syst."},{"key":"4964_CR33","doi-asserted-by":"publisher","first-page":"6760","DOI":"10.1007\/s10489-021-02760-1","volume":"52","author":"S Mi","year":"2022","unstructured":"Mi, S., Zhang, Y.: Pose-guided action recognition in static images using Lie-group. Appl. Intell. 52, 6760\u20136768 (2022)","journal-title":"Appl. Intell."},{"issue":"4","key":"4964_CR34","doi-asserted-by":"publisher","first-page":"1531","DOI":"10.3390\/app10041531","volume":"10","author":"B Bhandari","year":"2020","unstructured":"Bhandari, B., Lee, G., Cho, J.: Body-Part-Aware and Multitask-Aware Single-Image-Based action recognition. Appl. Sci. 10(4), 1531 (2020)","journal-title":"Appl. Sci."},{"key":"4964_CR35","unstructured":"Mohammadi, S., Ghofrani Majelan, S., Shokouhi, S.B.: Knowledge Distillation Framework for Action Recognition in Still Images. In: 2020 10th International Conference on Computer and Knowledge Engineering (ICCKE). IEEE; (2020)"},{"issue":"17","key":"4964_CR36","doi-asserted-by":"publisher","first-page":"25945","DOI":"10.1007\/s11042-023-14350-z","volume":"82","author":"SS Ashrafi","year":"2023","unstructured":"Ashrafi, S.S., Shokouhi, S.B., Ayatollahi, A.: Still image action recognition based on interactions between joints and objects. Multimedia Tools Appl. 82(17), 25945\u201325971 (2023)","journal-title":"Multimedia Tools Appl."},{"key":"4964_CR37","doi-asserted-by":"publisher","first-page":"104202","DOI":"10.1016\/j.cviu.2024.104202","volume":"249","author":"H Wang","year":"2024","unstructured":"Wang, H., Zhao, J., Gui, J.: Region-aware image-based human action retrieval with Transformers. Comput. Vis. Image Underst. 249, 104202 (2024)","journal-title":"Comput. Vis. Image Underst."},{"key":"4964_CR38","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep Residual Learning for Image Recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 770\u2013778. (2016)","DOI":"10.1109\/CVPR.2016.90"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04964-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-025-04964-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04964-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T00:55:21Z","timestamp":1764723321000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-025-04964-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,17]]},"references-count":38,"journal-issue":{"issue":"16","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["4964"],"URL":"https:\/\/doi.org\/10.1007\/s11760-025-04964-1","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"type":"print","value":"1863-1703"},{"type":"electronic","value":"1863-1711"}],"subject":[],"published":{"date-parts":[[2025,11,17]]},"assertion":[{"value":"27 August 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 November 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 November 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 November 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"The authors declare that there are no financial interests, commercial affiliations, or other potential conflicts of interest that could have influenced the objectivity of this research or the writing of this paper.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosures"}}],"article-number":"1371"}}