{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T16:53:56Z","timestamp":1776272036248,"version":"3.50.1"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2024,5,28]],"date-time":"2024-05-28T00:00:00Z","timestamp":1716854400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,5,28]],"date-time":"2024-05-28T00:00:00Z","timestamp":1716854400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2024,7]]},"DOI":"10.1007\/s00138-024-01558-8","type":"journal-article","created":{"date-parts":[[2024,5,28]],"date-time":"2024-05-28T22:01:43Z","timestamp":1716933703000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Human\u2013object interaction detection based on disentangled axial attention transformer"],"prefix":"10.1007","volume":"35","author":[{"given":"Limin","family":"Xia","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiyue","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,5,28]]},"reference":[{"key":"1558_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2022.104617","volume":"130","author":"M Antoun","year":"2023","unstructured":"Antoun, M., Asmar, D.: Human\u2013object interaction detection: design and survey. Image Vis. Comput. 130, 104617 (2023). https:\/\/doi.org\/10.1016\/j.imavis.2022.104617","journal-title":"Image Vis. Comput."},{"key":"1558_CR2","doi-asserted-by":"publisher","unstructured":"Chen, M., Liao, Y., Liu, S., Chen, Z., Wang, F., Qian, C.: Reformulating hoi detection as adaptive set prediction. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9000\u20139009 (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.00889","DOI":"10.1109\/CVPR46437.2021.00889"},{"key":"1558_CR3","doi-asserted-by":"publisher","first-page":"4495","DOI":"10.1007\/s10489-020-01794-1","volume":"50","author":"L-M Xia","year":"2020","unstructured":"Xia, L.-M., Li, R.: Multi-stream neural network fused with local information and global information for hoi detection. Appl. Intell. 50, 4495\u20134505 (2020). https:\/\/doi.org\/10.1007\/s10489-020-01794-1","journal-title":"Appl. Intell."},{"key":"1558_CR4","doi-asserted-by":"publisher","unstructured":"Yan, S., Xiong, Y., Lin, D.: Spatial temporal graph convolutional networks for skeleton-based action recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32(1) (2018). https:\/\/doi.org\/10.1609\/aaai.v32i1.12328","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"1558_CR5","doi-asserted-by":"publisher","unstructured":"Xiao, T., Liu, Y., Zhou, B., Jiang, Y., Sun, J.: Unified perceptual parsing for scene understanding. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) Proceedings of the European Conference on Computer Vision (ECCV), pp. 432\u2013448. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01228-1_26","DOI":"10.1007\/978-3-030-01228-1_26"},{"key":"1558_CR6","doi-asserted-by":"publisher","unstructured":"Chen, S., Jin, Q., Wang, P., Wu, Q.: Say as you wish: Fine-grained control of image caption generation with abstract scene graphs. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9959\u20139968 (2020).https:\/\/doi.org\/10.1109\/CVPR42600.2020.00998","DOI":"10.1109\/CVPR42600.2020.00998"},{"issue":"6","key":"1558_CR7","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39(6), 1137\u20131149 (2017). https:\/\/doi.org\/10.1109\/TPAMI.2016.2577031","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1558_CR8","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L.u., Polosukhin, I.: Attention is all you need. In: Guyon, I., Luxburg, U.V., Bengio, S., Wallach, H., Fergus, R., Vishwanathan, S., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"1558_CR9","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) Proceedings of the European Conference on Computer Vision (ECCV), pp. 213\u2013229. Springer, Cham (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1558_CR10","doi-asserted-by":"publisher","unstructured":"Kim, B., Lee, J., Kang, J., Kim, E.-S., Kim, H.J.: Hotr: end-to-end human\u2013object interaction detection with transformers. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 74\u201383 (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.00014","DOI":"10.1109\/CVPR46437.2021.00014"},{"key":"1558_CR11","doi-asserted-by":"publisher","unstructured":"Kim, B., Mun, J., On, K.-W., Shin, M., Lee, J., Kim, E.-S.: Mstr: multi-scale transformer for end-to-end human\u2013object interaction detection. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 19556\u201319565 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01897","DOI":"10.1109\/CVPR52688.2022.01897"},{"key":"1558_CR12","doi-asserted-by":"publisher","unstructured":"Tamura, M., Ohashi, H., Yoshinaga, T.: Qpic: query-based pairwise human\u2013object interaction detection with image-wide contextual information. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10405\u201310414 (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.01027","DOI":"10.1109\/CVPR46437.2021.01027"},{"key":"1558_CR13","unstructured":"Zhang, A., Liao, Y., Liu, S., Lu, M., Wang, Y., Gao, C., LI, X.: Mining the benefits of two-stage and one-stage hoi detection. In: Ranzato, M., Beygelzimer, A., Dauphin, Y., Liang, P.S., Vaughan, J.W. (eds.) Advances in Neural Information Processing Systems, vol. 34, pp. 17209\u201317220 (2021)"},{"key":"1558_CR14","doi-asserted-by":"publisher","unstructured":"Zhou, D., Liu, Z., Wang, J., Wang, L., Hu, T., Ding, E., Wang, J.: Human\u2013object interaction detection via disentangled transformer. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 19546\u201319555 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01896","DOI":"10.1109\/CVPR52688.2022.01896"},{"key":"1558_CR15","doi-asserted-by":"publisher","unstructured":"Zou, C., Wang, B., Hu, Y., Liu, J., Wu, Q., Zhao, Y., Li, B., Zhang, C., Zhang, C., Wei, Y., Sun, J.: End-to-end human object interaction detection with hoi transformer. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11820\u201311829 (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.01165","DOI":"10.1109\/CVPR46437.2021.01165"},{"key":"1558_CR16","doi-asserted-by":"publisher","unstructured":"Kim, S., Jung, D., Cho, M.: Relational context learning for human\u2013object interaction detection. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2925\u20132934 (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.00286","DOI":"10.1109\/CVPR52729.2023.00286"},{"key":"1558_CR17","doi-asserted-by":"crossref","unstructured":"Wang, H., Zhu, Y., Green, B., Adam, H., Yuille, A., Chen, L.-C.: Axial-deeplab: stand-alone axial-attention for panoptic segmentation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) Proceedings of the European Conference on Computer Vision (ECCV), pp. 108\u2013126. Springer, Cham (2020)","DOI":"10.1007\/978-3-030-58548-8_7"},{"key":"1558_CR18","unstructured":"Gupta, S., Malik, J.: Visual semantic role labeling. arXiv preprint arXiv:1505.04474 (2015)"},{"key":"1558_CR19","doi-asserted-by":"publisher","unstructured":"Chao, Y.-W., Liu, Y., Liu, X., Zeng, H., Deng, J.: Learning to detect human\u2013object interactions. In: 2018 IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 381\u2013389 (2018). https:\/\/doi.org\/10.1109\/WACV.2018.00048","DOI":"10.1109\/WACV.2018.00048"},{"key":"1558_CR20","doi-asserted-by":"publisher","DOI":"10.1007\/s10586-023-04004-y","author":"L Xia","year":"2023","unstructured":"Xia, L., Ding, X.: Human\u2013object interaction recognition based on interactivity detection and multi-feature fusion. Cluster Comput. (2023). https:\/\/doi.org\/10.1007\/s10586-023-04004-y","journal-title":"Cluster Comput."},{"key":"1558_CR21","doi-asserted-by":"publisher","unstructured":"Chao, Y.-W., Wang, Z., He, Y., Wang, J., Deng, J.: Hico: a benchmark for recognizing human\u2013object interactions in images. In: 2015 IEEE International Conference on Computer Vision (ICCV), pp. 1017\u20131025 (2015). https:\/\/doi.org\/10.1109\/ICCV.2015.122","DOI":"10.1109\/ICCV.2015.122"},{"key":"1558_CR22","doi-asserted-by":"publisher","unstructured":"Hou, Z., Yu, B., Qiao, Y., Peng, X., Tao, D.: Detecting human\u2013object interaction via fabricated compositional learning. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 14641\u201314650 (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.01441","DOI":"10.1109\/CVPR46437.2021.01441"},{"key":"1558_CR23","doi-asserted-by":"publisher","unstructured":"Liu, Y., Yuan, J., Chen, C.W.: Consnet: learning consistency graph for zero-shot human\u2013object interaction detection. In: Proceedings of the 28th ACM International Conference on Multimedia. MM \u201920, pp. 4235\u20134243. Association for Computing Machinery, New York, NY, USA (2020). https:\/\/doi.org\/10.1145\/3394171.3413600","DOI":"10.1145\/3394171.3413600"},{"key":"1558_CR24","doi-asserted-by":"publisher","unstructured":"Zhang, F.Z., Campbell, D., Gould, S.: Spatially conditioned graphs for detecting human\u2013object interactions. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 13299\u201313307 (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.01307","DOI":"10.1109\/ICCV48922.2021.01307"},{"key":"1558_CR25","doi-asserted-by":"publisher","unstructured":"Gkioxari, G., Girshick, R., Doll\u00e1r, P., He, K.: Detecting and recognizing human\u2013object interactions. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8359\u20138367 (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00872","DOI":"10.1109\/CVPR.2018.00872"},{"issue":"7","key":"1558_CR26","doi-asserted-by":"publisher","first-page":"3870","DOI":"10.1109\/TPAMI.2021.3054048","volume":"44","author":"Y-L Li","year":"2022","unstructured":"Li, Y.-L., Liu, X., Wu, X., Huang, X., Xu, L., Lu, C.: Transferable interactiveness knowledge for human\u2013object interaction detection. IEEE Trans. Pattern Anal. Mach. Intell. 44(7), 3870\u20133882 (2022). https:\/\/doi.org\/10.1109\/TPAMI.2021.3054048","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1558_CR27","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01458-8","author":"C Ding","year":"2021","unstructured":"Ding, C., Tao, D.: Polysemy deciphering network for robust human\u2013object interaction detection. Int. J. Comput. Vis. (2021). https:\/\/doi.org\/10.1007\/s11263-021-01458-8","journal-title":"Int. J. Comput. Vis."},{"key":"1558_CR28","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)"},{"key":"1558_CR29","doi-asserted-by":"publisher","unstructured":"Liao, Y., Liu, S., Wang, F., Chen, Y., Qian, C., Feng, J.: Ppdm: parallel point detection and matching for real-time human\u2013object interaction detection. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 479\u2013487 (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.00056","DOI":"10.1109\/CVPR42600.2020.00056"},{"key":"1558_CR30","doi-asserted-by":"crossref","unstructured":"Kim, B., Choi, T., Kang, J., Kim, H.J.: Uniondet: union-level detector towards real-time human\u2013object interaction detection. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) Proceedings of the European Conference on Computer Vision (ECCV), pp. 498\u2013514. Springer, Cham (2020)","DOI":"10.1007\/978-3-030-58555-6_30"},{"key":"1558_CR31","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"1558_CR32","doi-asserted-by":"publisher","unstructured":"Zhang, F.Z., Yuan, Y., Campbell, D., Zhong, Z., Gould, S.: Exploring predicate visual context in detecting of human\u2013object interactions. In: 2023 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 10377\u201310387 (2023). https:\/\/doi.org\/10.1109\/ICCV51070.2023.00955","DOI":"10.1109\/ICCV51070.2023.00955"},{"key":"1558_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.110021","volume":"146","author":"Y Cheng","year":"2024","unstructured":"Cheng, Y., Duan, H., Wang, C., Chen, Z.: Parallel disentangling network for human\u2013object interaction detection. Pattern Recogn. 146, 110021 (2024). https:\/\/doi.org\/10.1016\/j.patcog.2023.110021","journal-title":"Pattern Recogn."},{"issue":"4","key":"1558_CR34","doi-asserted-by":"publisher","first-page":"2415","DOI":"10.1109\/TPAMI.2023.3331738","volume":"46","author":"S Ma","year":"2024","unstructured":"Ma, S., Wang, Y., Wang, S., Wei, Y.: Fgahoi: fine-grained anchors for human\u2013object interaction detection. IEEE Trans. Pattern Anal. Mach. Intell. 46(4), 2415\u20132429 (2024). https:\/\/doi.org\/10.1109\/TPAMI.2023.3331738","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1558_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.110872","volume":"278","author":"Y Su","year":"2023","unstructured":"Su, Y., Zhu, H., Tan, Y., An, S., Xing, M.: Prime: privacy-preserving video anomaly detection via motion exemplar guidance. Knowl. Based Syst. 278, 110872 (2023). https:\/\/doi.org\/10.1016\/j.knosys.2023.110872","journal-title":"Knowl. Based Syst."},{"key":"1558_CR36","unstructured":"Yuan, H., Jiang, J., Albanie, S., Feng, T., Huang, Z., Ni, D., Tang, M.: Rlip: relational language-image pre-training for human\u2013object interaction detection. In: Koyejo, S., Mohamed, S., Agarwal, A., Belgrave, D., Cho, K., Oh, A. (eds.) Advances in Neural Information Processing Systems, vol. 35, pp. 37416\u201337431 (2022)"},{"key":"1558_CR37","doi-asserted-by":"publisher","unstructured":"Liao, Y., Zhang, A., Lu, M., Wang, Y., Li, X., Liu, S.: Gen-vlkt: simplify association and enhance interaction understanding for hoi detection. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 20091\u201320100 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01949","DOI":"10.1109\/CVPR52688.2022.01949"},{"key":"1558_CR38","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., Krueger, G., Sutskever, I.: Learning transferable visual models from natural language supervision. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 139, pp. 8748\u20138763 (2021)"},{"key":"1558_CR39","doi-asserted-by":"publisher","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 770\u2013778 (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"1558_CR40","doi-asserted-by":"crossref","unstructured":"Qi, S., Wang, W., Jia, B., Shen, J., Zhu, S.-C.: Learning human\u2013object interactions by graph parsing neural networks. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) Proceedings of the European Conference on Computer Vision (ECCV), pp. 407\u2013423. Springer, Cham (2018)","DOI":"10.1007\/978-3-030-01240-3_25"},{"key":"1558_CR41","unstructured":"Gao, C., Zou, Y., Huang, J.-B.: ican: Instance-centric attention network for human\u2013object interaction detection. In: British Machine Vision Conference (2018)"},{"key":"1558_CR42","doi-asserted-by":"publisher","unstructured":"Li, Y.-L., Liu, X., Lu, H., Wang, S., Liu, J., Li, J., Lu, C.: Detailed 2d\u20133d joint representation for human\u2013object interaction. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10163\u201310172 (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.01018","DOI":"10.1109\/CVPR42600.2020.01018"},{"key":"1558_CR43","doi-asserted-by":"crossref","unstructured":"Hou, Z., Peng, X., Qiao, Y., Tao, D.: Visual compositional learning for human\u2013object interaction detection. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) Proceedings of the European Conference on Computer Vision (ECCV), pp. 584\u2013600. Springer, Cham (2020)","DOI":"10.1007\/978-3-030-58555-6_35"},{"key":"1558_CR44","unstructured":"Li, Y.-L., Liu, X., Wu, X., Li, Y., Lu, C.: Hoi analysis: Integrating and decomposing human\u2013object interaction. In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M.F., Lin, H. (eds.) Advances in Neural Information Processing Systems, vol. 33, pp. 5011\u20135022 (2020)"},{"key":"1558_CR45","doi-asserted-by":"crossref","unstructured":"Gao, C., Xu, J., Zou, Y., Huang, J.-B.: Drg: dual relation graph for human\u2013object interaction detection. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) Proceedings of the European Conference on Computer Vision (ECCV), pp. 696\u2013712. Springer, Cham (2020)","DOI":"10.1007\/978-3-030-58610-2_41"},{"key":"1558_CR46","doi-asserted-by":"publisher","unstructured":"Zhang, F.Z., Campbell, D., Gould, S.: Efficient two-stage detection of human\u2013object interactions with a novel unary-pairwise transformer. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 20072\u201320080 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01947","DOI":"10.1109\/CVPR52688.2022.01947"},{"key":"1558_CR47","doi-asserted-by":"publisher","unstructured":"Zhang, Y., Pan, Y., Yao, T., Huang, R., Mei, T., Chen, C.: Exploring structure-aware transformer over interaction proposals for human\u2013object interaction detection. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 19526\u201319535 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01894","DOI":"10.1109\/CVPR52688.2022.01894"},{"key":"1558_CR48","doi-asserted-by":"publisher","unstructured":"Wang, T., Yang, T., Danelljan, M., Khan, F.S., Zhang, X., Sun, J.: Learning human\u2013object interaction detection using interaction points. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4115\u20134124 (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.00417","DOI":"10.1109\/CVPR42600.2020.00417"},{"key":"1558_CR49","doi-asserted-by":"publisher","first-page":"964","DOI":"10.1109\/TIP.2022.3231528","volume":"32","author":"J Lim","year":"2023","unstructured":"Lim, J., Baskaran, V.M., Lim, J.M.-Y., Wong, K., See, J., Tistarelli, M.: Ernet: an efficient and reliable human\u2013object interaction detection network. IEEE Trans. Image Process. 32, 964\u2013979 (2023). https:\/\/doi.org\/10.1109\/TIP.2022.3231528","journal-title":"IEEE Trans. Image Process."},{"key":"1558_CR50","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) Proceedings of the European Conference on Computer Vision (ECCV), pp. 740\u2013755. Springer, Cham (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1558_CR51","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"1558_CR52","doi-asserted-by":"publisher","unstructured":"Ulutan, O., Iftekhar, A.S.M., Manjunath, B.S.: Vsgnet: spatial attention network for detecting human object interactions using graph convolutions. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 13614\u201313623 (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.01363","DOI":"10.1109\/CVPR42600.2020.01363"},{"key":"1558_CR53","doi-asserted-by":"publisher","unstructured":"Zhong, X., Qu, X., Ding, C., Tao, D.: Glance and gaze: inferring action-aware points for one-stage human\u2013object interaction detection. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 13229\u201313238 (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.01303","DOI":"10.1109\/CVPR46437.2021.01303"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01558-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-024-01558-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01558-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,31]],"date-time":"2024-07-31T19:17:39Z","timestamp":1722453459000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-024-01558-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,28]]},"references-count":53,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2024,7]]}},"alternative-id":["1558"],"URL":"https:\/\/doi.org\/10.1007\/s00138-024-01558-8","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"value":"0932-8092","type":"print"},{"value":"1432-1769","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,5,28]]},"assertion":[{"value":"17 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 May 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 May 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 May 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}}],"article-number":"72"}}