{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:11:00Z","timestamp":1750219860615,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,13]],"date-time":"2023-10-13T00:00:00Z","timestamp":1697155200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Research and application of key technology of management digitalization based on data center","award":["524623210007"],"award-info":[{"award-number":["524623210007"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,13]]},"DOI":"10.1145\/3607822.3616409","type":"proceedings-article","created":{"date-parts":[[2023,10,13]],"date-time":"2023-10-13T10:37:16Z","timestamp":1697193436000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Enhanced Transformer Interaction Components for Human-Object Interaction Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-2687-1985","authenticated-orcid":false,"given":"Jinhui","family":"Zhang","sequence":"first","affiliation":[{"name":"NARI Group Corporation (State Grid Electric Power Research Institute), China and China Realtime Database Co., Ltd., China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7709-1459","authenticated-orcid":false,"given":"Yuxiao","family":"Zhao","sequence":"additional","affiliation":[{"name":"NARI Group Corporation (State Grid Electric Power Research Institute), China and China Realtime Database Co., Ltd., China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2529-0896","authenticated-orcid":false,"given":"Xian","family":"Zhang","sequence":"additional","affiliation":[{"name":"NARI Group Corporation (State Grid Electric Power Research Institute), China and China Realtime Database Co., Ltd., China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0420-3179","authenticated-orcid":false,"given":"Xiang","family":"Wang","sequence":"additional","affiliation":[{"name":"NARI Group Corporation (State Grid Electric Power Research Institute), China and China Realtime Database Co., Ltd., China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7856-120X","authenticated-orcid":false,"given":"Yuxuan","family":"Zhao","sequence":"additional","affiliation":[{"name":"NARI Group Corporation (State Grid Electric Power Research Institute), China and China Realtime Database Co., Ltd., China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6556-075X","authenticated-orcid":false,"given":"Peng","family":"Wang","sequence":"additional","affiliation":[{"name":"NARI Group Corporation (State Grid Electric Power Research Institute), China and China Realtime Database Co., Ltd., China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1670-1122","authenticated-orcid":false,"given":"Jian","family":"Hu","sequence":"additional","affiliation":[{"name":"NARI Group Corporation (State Grid Electric Power Research Institute), China and China Realtime Database Co., Ltd., China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,13]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"volume-title":"Learning to detect human-object interactions. In 2018 ieee winter conference on applications of computer vision (wacv)","author":"Chao Yu-Wei","key":"e_1_3_2_1_2_1","unstructured":"Yu-Wei Chao, Yunfan Liu, Xieyang Liu, Huayi Zeng, and Jia Deng. 2018. Learning to detect human-object interactions. In 2018 ieee winter conference on applications of computer vision (wacv). IEEE, 381\u2013389."},{"key":"e_1_3_2_1_3_1","volume-title":"More Than Accuracy: An Empirical Study of Consistency Between Performance and Interpretability. In Pacific Rim International Conference on Artificial Intelligence. Springer, 579\u2013590","author":"Du Yun","year":"2022","unstructured":"Yun Du, Dong Liang, Rong Quan, Songlin Du, and Yaping Yan. 2022. More Than Accuracy: An Empirical Study of Consistency Between Performance and Interpretability. In Pacific Rim International Conference on Artificial Intelligence. Springer, 579\u2013590."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01249-6_4"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i2.16217"},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings, Part XII 16","author":"Gao Chen","year":"2020","unstructured":"Chen Gao, Jiarui Xu, Yuliang Zou, and Jia-Bin Huang. 2020. Drg: Dual relation graph for human-object interaction detection. In Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XII 16. Springer, 696\u2013712."},{"key":"e_1_3_2_1_7_1","volume-title":"ican: Instance-centric attention network for human-object interaction detection. arXiv preprint arXiv:1808.10437","author":"Gao Chen","year":"2018","unstructured":"Chen Gao, Yuliang Zou, and Jia-Bin Huang. 2018. ican: Instance-centric attention network for human-object interaction detection. arXiv preprint arXiv:1808.10437 (2018)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG52635.2021.9667044"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00872"},{"key":"e_1_3_2_1_10_1","volume-title":"Visual semantic role labeling. arXiv preprint arXiv:1505.04474","author":"Gupta Saurabh","year":"2015","unstructured":"Saurabh Gupta and Jitendra Malik. 2015. Visual semantic role labeling. arXiv preprint arXiv:1505.04474 (2015)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3161969"},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings, Part XV 16","author":"Kim Bumsoo","year":"2020","unstructured":"Bumsoo Kim, Taeho Choi, Jaewoo Kang, and Hyunwoo\u00a0J Kim. 2020. Uniondet: Union-level detector towards real-time human-object interaction detection. In Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XV 16. Springer, 498\u2013514."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00014"},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings, Part XXI 16","author":"Kim Dong-Jin","year":"2020","unstructured":"Dong-Jin Kim, Xiao Sun, Jinsoo Choi, Stephen Lin, and In\u00a0So Kweon. 2020. Detecting human-object interactions with action co-occurrence priors. In Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXI 16. Springer, 718\u2013736."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00370"},{"key":"e_1_3_2_1_17_1","first-page":"1","article-title":"Anchor retouching via model interaction for robust object detection in aerial images","volume":"60","author":"Liang Dong","year":"2021","unstructured":"Dong Liang, Qixiang Geng, Zongqi Wei, Dmitry\u00a0A Vorontsov, Ekaterina\u00a0L Kim, Mingqiang Wei, and Huiyu Zhou. 2021. Anchor retouching via model interaction for robust object detection in aerial images. IEEE Transactions on Geoscience and Remote Sensing 60 (2021), 1\u201313.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2014.10.020"},{"key":"e_1_3_2_1_19_1","volume-title":"CrossNet: Cross-scene Background Subtraction Network via 3D Optical Flow","author":"Liang Dong","year":"2023","unstructured":"Dong Liang, Dong Zhang, Qiong Wang, Zongqi Wei, and Liyan Zhang. 2023. CrossNet: Cross-scene Background Subtraction Network via 3D Optical Flow. IEEE Transactions on Multimedia (2023)."},{"key":"e_1_3_2_1_20_1","volume-title":"MUS-CDB: Mixed Uncertainty Sampling with Class Distribution Balancing for Active Annotation in Aerial Object Detection","author":"Liang Dong","year":"2023","unstructured":"Dong Liang, Jing-Wei Zhang, Ying-Peng Tang, and Sheng-Jun Huang. 2023. MUS-CDB: Mixed Uncertainty Sampling with Class Distribution Balancing for Active Annotation in Aerial Object Detection. IEEE Transactions on Geoscience and Remote Sensing (2023)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00056"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the Twenty-Ninth International Conference on International Joint Conferences on Artificial Intelligence. 1104\u20131110","author":"Lin Xue","year":"2021","unstructured":"Xue Lin, Qi Zou, and Xixia Xu. 2021. Action-guided attention mining and relation reasoning network for human-object interaction detection. In Proceedings of the Twenty-Ninth International Conference on International Joint Conferences on Artificial Intelligence. 1104\u20131110."},{"key":"e_1_3_2_1_24_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_25"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01027"},{"key":"e_1_3_2_1_27_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00956"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00579"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00417"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2022.3158903"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3568396"},{"key":"e_1_3_2_1_33_1","volume-title":"Balanced Mixup Loss for Long-Tailed Visual Recognition. In ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 1\u20135.","author":"Ye Haibo","year":"2023","unstructured":"Haibo Ye, Fangyu Zhou, Xinjie Li, and Qingheng Zhang. 2023. Balanced Mixup Loss for Long-Tailed Visual Recognition. In ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 1\u20135."},{"key":"e_1_3_2_1_34_1","first-page":"17209","article-title":"Mining the benefits of two-stage and one-stage hoi detection","volume":"34","author":"Zhang Aixi","year":"2021","unstructured":"Aixi Zhang, Yue Liao, Si Liu, Miao Lu, Yongliang Wang, Chen Gao, and Xiaobo Li. 2021. Mining the benefits of two-stage and one-stage hoi detection. Advances in Neural Information Processing Systems 34 (2021), 17209\u201317220.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.4108\/eai.23-12-2022.2329170"},{"key":"e_1_3_2_1_36_1","volume-title":"Mendam: Multi-Expert Network with Distribution-Aware Momentum for Long-Tailed Recognition. In ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","author":"Zhang Qingheng","year":"2023","unstructured":"Qingheng Zhang, Haibo Ye, and Kaicheng Yu. 2023. Mendam: Multi-Expert Network with Distribution-Aware Momentum for Long-Tailed Recognition. In ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 1\u20135."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01303"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00093"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01165"}],"event":{"name":"SUI '23: ACM Symposium on Spatial User Interaction","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques","SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"location":"Sydney NSW Australia","acronym":"SUI '23"},"container-title":["Proceedings of the 2023 ACM Symposium on Spatial User Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3607822.3616409","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3607822.3616409","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:21Z","timestamp":1750178841000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3607822.3616409"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,13]]},"references-count":39,"alternative-id":["10.1145\/3607822.3616409","10.1145\/3607822"],"URL":"https:\/\/doi.org\/10.1145\/3607822.3616409","relation":{},"subject":[],"published":{"date-parts":[[2023,10,13]]},"assertion":[{"value":"2023-10-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}