{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:13:45Z","timestamp":1778080425107,"version":"3.51.4"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2023,9,25]],"date-time":"2023-09-25T00:00:00Z","timestamp":1695600000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,9,25]],"date-time":"2023-09-25T00:00:00Z","timestamp":1695600000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62076183"],"award-info":[{"award-number":["62076183"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100007219","name":"Natural Science Foundation of Shanghai","doi-asserted-by":"publisher","award":["20ZR1473500"],"award-info":[{"award-number":["20ZR1473500"]}],"id":[{"id":"10.13039\/100007219","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1007\/s00371-023-03071-x","type":"journal-article","created":{"date-parts":[[2023,9,25]],"date-time":"2023-09-25T06:02:26Z","timestamp":1695621746000},"page":"4099-4113","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Patch excitation network for boxless action recognition in still images"],"prefix":"10.1007","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0457-6093","authenticated-orcid":false,"given":"Shuang","family":"Liang","sequence":"first","affiliation":[]},{"given":"Jiewen","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Zikun","family":"Zhuang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,25]]},"reference":[{"key":"3071_CR1","unstructured":"Minaee, S., Liang, X., Yan, S.: Modern augmented reality: applications, trends, and future directions. arXiv preprint arXiv:2202.09450 (2022)"},{"key":"3071_CR2","doi-asserted-by":"publisher","first-page":"993","DOI":"10.1007\/s00371-021-02064-y","volume":"38","author":"H Basly","year":"2022","unstructured":"Basly, H., Ouarda, W., Sayadi, F.E., Ouni, B., Alimi, A.M.: Dtr-har: deep temporal residual representation for human activity recognition. Vis. Comput. 38, 993\u20131013 (2022)","journal-title":"Vis. Comput."},{"issue":"3","key":"3071_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3567827","volume":"19","author":"C Xie","year":"2023","unstructured":"Xie, C., Zhuang, Z., Zhao, S., Liang, S.: Temporal dropout for weakly supervised action localization. ACM Trans. Multimed. Comput. Commun. Appl. 19(3), 1\u201324 (2023)","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"3071_CR4","doi-asserted-by":"crossref","unstructured":"Fang, H.-S., Cao, J., Tai, Y.-W., Lu, C.: Pairwise body-part attention for recognizing human-object interactions. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 51\u201367 (2018)","DOI":"10.1007\/978-3-030-01249-6_4"},{"key":"3071_CR5","doi-asserted-by":"crossref","unstructured":"Ma, W., Liang, S.: Human-object relation network for action recognition in still images. In: 2020 IEEE International Conference on Multimedia and Expo (ICME), pp. 1\u20136 (2020). IEEE","DOI":"10.1109\/ICME46284.2020.9102933"},{"key":"3071_CR6","doi-asserted-by":"crossref","unstructured":"Thurau, C., Hlav\u00e1c, V.: Pose primitive based human action recognition in videos or still images. In: 2008 IEEE Conference on Computer Vision and Pattern Recognition, pp. 1\u20138 (2008). IEEE","DOI":"10.1109\/CVPR.2008.4587721"},{"key":"3071_CR7","doi-asserted-by":"crossref","unstructured":"Zhao, Z., Ma, H., You, S.: Single image action recognition using semantic body part actions. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3391\u20133399 (2017)","DOI":"10.1109\/ICCV.2017.367"},{"issue":"3","key":"3071_CR8","doi-asserted-by":"publisher","first-page":"601","DOI":"10.1109\/TPAMI.2011.158","volume":"34","author":"A Prest","year":"2011","unstructured":"Prest, A., Schmid, C., Ferrari, V.: Weakly supervised learning of interactions between humans and objects. IEEE Trans. Pattern Anal. Mach. Intell. 34(3), 601\u2013614 (2011)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"4","key":"3071_CR9","doi-asserted-by":"publisher","first-page":"1116","DOI":"10.1109\/TCDS.2017.2783944","volume":"10","author":"S Yan","year":"2017","unstructured":"Yan, S., Smith, J.S., Lu, W., Zhang, B.: Multibranch attention networks for action recognition in still images. IEEE Trans. Cognit. Dev. Syst. 10(4), 1116\u20131125 (2017)","journal-title":"IEEE Trans. Cognit. Dev. Syst."},{"key":"3071_CR10","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1016\/j.neucom.2022.01.091","volume":"483","author":"X Zheng","year":"2022","unstructured":"Zheng, X., Gong, T., Lu, X., Li, X.: Human action recognition by multiple spatial clues network. Neurocomputing 483, 10\u201321 (2022)","journal-title":"Neurocomputing"},{"key":"3071_CR11","doi-asserted-by":"publisher","first-page":"383","DOI":"10.1016\/j.neucom.2020.07.016","volume":"413","author":"Y Zheng","year":"2020","unstructured":"Zheng, Y., Zheng, X., Lu, X., Wu, S.: Spatial attention based visual semantic learning for action recognition in still images. Neurocomputing 413, 383\u2013396 (2020)","journal-title":"Neurocomputing"},{"key":"3071_CR12","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"issue":"2","key":"3071_CR13","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The Pascal visual object classes (voc) challenge. Int. J. Comput. Vis. 88(2), 303\u2013338 (2010)","journal-title":"Int. J. Comput. Vis."},{"key":"3071_CR14","doi-asserted-by":"crossref","unstructured":"Yao, B., Jiang, X., Khosla, A., Lin, A.L., Guibas, L., Fei-Fei, L.: Human action recognition by learning bases of action attributes and parts. In: 2011 International Conference on Computer Vision, pp. 1331\u20131338 (2011). IEEE","DOI":"10.1109\/ICCV.2011.6126386"},{"key":"3071_CR15","doi-asserted-by":"crossref","unstructured":"Gkioxari, G., Girshick, R., Malik, J.: Contextual action recognition with r* cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1080\u20131088 (2015)","DOI":"10.1109\/ICCV.2015.129"},{"key":"3071_CR16","doi-asserted-by":"crossref","unstructured":"Wang, J., Liang, S.: Pose-enhanced relation feature for action recognition in still images. In: MultiMedia Modeling, pp. 154\u2013165. Springer, Cham (2022)","DOI":"10.1007\/978-3-030-98358-1_13"},{"issue":"11","key":"3071_CR17","doi-asserted-by":"publisher","first-page":"5479","DOI":"10.1109\/TIP.2016.2605305","volume":"25","author":"Y Zhang","year":"2016","unstructured":"Zhang, Y., Cheng, L., Wu, J., Cai, J., Do, M.N., Lu, J.: Action recognition in still images with minimum annotation efforts. IEEE Trans. Image Process. 25(11), 5479\u20135490 (2016)","journal-title":"IEEE Trans. Image Process."},{"issue":"12","key":"3071_CR18","first-page":"1797","volume":"35","author":"C Fan","year":"2019","unstructured":"Fan, C., Hu, C., Liu, B.: Linearized kernel dictionary learning with group sparse priors for action recognition. Vis. Comput. 35(12), 1797\u20131807 (2019)","journal-title":"Vis. Comput."},{"key":"3071_CR19","doi-asserted-by":"crossref","unstructured":"Feng, W., Zhang, X., Huang, X., Luo, Z.: Boxless action recognition in still images via recurrent visual attention. In: Neural Information Processing: 24th International Conference, ICONIP 2017, Guangzhou, China, November 14-18, 2017, Proceedings, Part II 24, pp. 663\u2013673 (2017). Springer","DOI":"10.1007\/978-3-319-70096-0_68"},{"issue":"21","key":"3071_CR20","doi-asserted-by":"publisher","first-page":"19269","DOI":"10.1007\/s00521-022-07514-9","volume":"34","author":"A Banerjee","year":"2022","unstructured":"Banerjee, A., Roy, S., Kundu, R., Singh, P.K., Bhateja, V., Sarkar, R.: An ensemble approach for still image-based human action recognition. Neural Comput. Appl. 34(21), 19269\u201319282 (2022)","journal-title":"Neural Comput. Appl."},{"key":"3071_CR21","doi-asserted-by":"crossref","unstructured":"Liu, L., Tan, R.T., You, S.: Loss guided activation for action recognition in still images. In: Asian Conference on Computer Vision, pp. 152\u2013167 (2018). Springer","DOI":"10.1007\/978-3-030-20873-8_10"},{"key":"3071_CR22","doi-asserted-by":"crossref","unstructured":"Gao, R., Xiong, B., Grauman, K.: Im2flow: Motion hallucination from static images for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5937\u20135947 (2018)","DOI":"10.1109\/CVPR.2018.00622"},{"key":"3071_CR23","doi-asserted-by":"crossref","unstructured":"Huang, S., Zhao, X., Niu, L., Zhang, L.: Static image action recognition with hallucinated fine-grained motion information. In: 2021 IEEE International Conference on Multimedia and Expo (ICME), pp. 1\u20136 (2021). IEEE","DOI":"10.1109\/ICME51207.2021.9428245"},{"key":"3071_CR24","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2021.103337","volume":"215","author":"L Niu","year":"2022","unstructured":"Niu, L., Huang, S., Zhao, X., Kang, L., Zhang, Y., Zhang, L.: Hallucinating uncertain motion and future for static image action recognition. Comput. Vis. Image Understand. 215, 103337 (2022)","journal-title":"Comput. Vis. Image Understand."},{"key":"3071_CR25","doi-asserted-by":"publisher","first-page":"3168","DOI":"10.1109\/TIP.2019.2957930","volume":"29","author":"Y Liu","year":"2019","unstructured":"Liu, Y., Lu, Z., Li, J., Yang, T., Yao, C.: Deep image-to-video adaptation and fusion networks for action recognition. IEEE Trans. Image Process. 29, 3168\u20133182 (2019)","journal-title":"IEEE Trans. Image Process."},{"key":"3071_CR26","doi-asserted-by":"publisher","first-page":"5573","DOI":"10.1109\/TIP.2021.3086590","volume":"30","author":"Y Liu","year":"2021","unstructured":"Liu, Y., Wang, K., Li, G., Lin, L.: Semantics-aware adaptive knowledge distillation for sensor-to-vision action recognition. IEEE Trans. Image Process. 30, 5573\u20135588 (2021)","journal-title":"IEEE Trans. Image Process."},{"key":"3071_CR27","first-page":"1","volume":"2018","author":"Y Liu","year":"2018","unstructured":"Liu, Y., Lu, Z., Li, J., Yao, C., Deng, Y.: Transferable feature representation for visible-to-infrared cross-dataset human action recognition. Complexity 2018, 1\u201320 (2018)","journal-title":"Complexity"},{"issue":"8","key":"3071_CR28","doi-asserted-by":"publisher","first-page":"2416","DOI":"10.1109\/TCSVT.2018.2868123","volume":"29","author":"Y Liu","year":"2018","unstructured":"Liu, Y., Lu, Z., Li, J., Yang, T.: Hierarchically learned view-invariant representations for cross-view action recognition. IEEE Trans. Circuits Syst. Video Technol. 29(8), 2416\u20132430 (2018)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"3071_CR29","doi-asserted-by":"publisher","DOI":"10.1007\/s00371-022-02758-x","author":"H Yang","year":"2023","unstructured":"Yang, H., Zhang, Y.: A context-and level-aware feature pyramid network for object detection with attention mechanism. Vis. Comput. (2023). https:\/\/doi.org\/10.1007\/s00371-022-02758-x","journal-title":"Vis. Comput."},{"key":"3071_CR30","doi-asserted-by":"publisher","first-page":"749","DOI":"10.1007\/s00371-021-02075-9","volume":"38","author":"Z Cheng","year":"2022","unstructured":"Cheng, Z., Qu, A., He, X.: Contour-aware semantic segmentation network with spatial attention mechanism for medical image. Vis. Comput. 38, 749\u2013762 (2022)","journal-title":"Vis. Comput."},{"key":"3071_CR31","doi-asserted-by":"crossref","unstructured":"Wang, F., Jiang, M., Qian, C., Yang, S., Li, C., Zhang, H., Wang, X., Tang, X.: Residual attention network for image classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3156\u20133164 (2017)","DOI":"10.1109\/CVPR.2017.683"},{"key":"3071_CR32","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7132\u20137141 (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"3071_CR33","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"key":"3071_CR34","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part I 16, pp. 213\u2013229 (2020). Springer","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"3071_CR35","doi-asserted-by":"crossref","unstructured":"Tamura, M., Ohashi, H., Yoshinaga, T.: Qpic: Query-based pairwise human-object interaction detection with image-wide contextual information. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10410\u201310419 (2021)","DOI":"10.1109\/CVPR46437.2021.01027"},{"key":"3071_CR36","doi-asserted-by":"crossref","unstructured":"Sudhakaran, S., Escalera, S., Lanz, O.: Lsta: Long short-term attention for egocentric action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9954\u20139963 (2019)","DOI":"10.1109\/CVPR.2019.01019"},{"key":"3071_CR37","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"issue":"9","key":"3071_CR38","doi-asserted-by":"publisher","first-page":"1869","DOI":"10.1007\/s00371-019-01775-7","volume":"36","author":"P Xi","year":"2020","unstructured":"Xi, P., Guan, H., Shu, C., Borgeat, L., Goubran, R.: An integrated approach for medical abnormality detection using deep patch convolutional neural networks. Vis. Comput. 36(9), 1869\u20131882 (2020)","journal-title":"Vis. Comput."},{"key":"3071_CR39","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1007\/s00371-017-1424-3","volume":"35","author":"G Cao","year":"2019","unstructured":"Cao, G., Li, J., Chen, X., He, Z.: Patch-based self-adaptive matting for high-resolution image and video. Vis. Comput. 35, 133\u2013147 (2019)","journal-title":"Vis. Comput."},{"key":"3071_CR40","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"3071_CR41","doi-asserted-by":"crossref","unstructured":"Hu, H., Gu, J., Zhang, Z., Dai, J., Wei, Y.: Relation networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3588\u20133597 (2018)","DOI":"10.1109\/CVPR.2018.00378"},{"key":"3071_CR42","unstructured":"Tan, M., Le, Q.: Efficientnet: Rethinking model scaling for convolutional neural networks. In: International Conference on Machine Learning, pp. 6105\u20136114 (2019). PMLR"},{"key":"3071_CR43","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.-Y., Kweon, I.S.: Cbam: Convolutional block attention module. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 3\u201319 (2018)","DOI":"10.1007\/978-3-030-01234-2_1"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-023-03071-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-023-03071-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-023-03071-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,6]],"date-time":"2024-06-06T11:13:34Z","timestamp":1717672414000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-023-03071-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,25]]},"references-count":43,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,6]]}},"alternative-id":["3071"],"URL":"https:\/\/doi.org\/10.1007\/s00371-023-03071-x","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,9,25]]},"assertion":[{"value":"12 August 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 September 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"There is no conflict of interest or competing interests to declare.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}