{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T00:35:29Z","timestamp":1778891729179,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,8,21]],"date-time":"2021-08-21T00:00:00Z","timestamp":1629504000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001459","name":"Ministry of Education - Singapore","doi-asserted-by":"publisher","award":["251RES2029"],"award-info":[{"award-number":["251RES2029"]}],"id":[{"id":"10.13039\/501100001459","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,8,21]]},"DOI":"10.1145\/3463944.3469097","type":"proceedings-article","created":{"date-parts":[[2021,8,20]],"date-time":"2021-08-20T01:59:32Z","timestamp":1629424772000},"page":"9-17","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":24,"title":["ST-HOI: A Spatial-Temporal Baseline for Human-Object Interaction Detection in Videos"],"prefix":"10.1145","author":[{"given":"Meng-Jiun","family":"Chiou","sequence":"first","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chun-Yu","family":"Liao","sequence":"additional","affiliation":[{"name":"ASUS Intelligent Cloud Services, Taipei, Taiwan Roc"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Li-Wei","family":"Wang","sequence":"additional","affiliation":[{"name":"ASUS Intelligent Cloud Services, Taipei, Taiwan Roc"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Roger","family":"Zimmermann","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiashi","family":"Feng","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,8,21]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_2_3_1","volume-title":"Learning to detect human-object interactions. In 2018 ieee winter conference on applications of computer vision (wacv)","author":"Chao Yu-Wei","unstructured":"Yu-Wei Chao , Yunfan Liu , Xieyang Liu , Huayi Zeng , and Jia Deng . 2018. Learning to detect human-object interactions. In 2018 ieee winter conference on applications of computer vision (wacv) . IEEE , 381--389. Yu-Wei Chao, Yunfan Liu, Xieyang Liu, Huayi Zeng, and Jia Deng. 2018. Learning to detect human-object interactions. In 2018 ieee winter conference on applications of computer vision (wacv). IEEE, 381--389."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413856"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3069041"},{"key":"e_1_3_2_2_6_1","volume-title":"RMPE: Regional Multi-person Pose Estimation. In ICCV.","author":"Fang Hao-Shu","year":"2017","unstructured":"Hao-Shu Fang , Shuqin Xie , Yu-Wing Tai , and Cewu Lu . 2017 . RMPE: Regional Multi-person Pose Estimation. In ICCV. Hao-Shu Fang, Shuqin Xie, Yu-Wing Tai, and Cewu Lu. 2017. RMPE: Regional Multi-person Pose Estimation. In ICCV."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00630"},{"key":"e_1_3_2_2_8_1","volume-title":"iCAN: Instance-centric attention network for human-object interaction detection. arXiv preprint arXiv:1808.10437","author":"Gao Chen","year":"2018","unstructured":"Chen Gao , Yuliang Zou , and Jia-Bin Huang . 2018. iCAN: Instance-centric attention network for human-object interaction detection. arXiv preprint arXiv:1808.10437 ( 2018 ). Chen Gao, Yuliang Zou, and Jia-Bin Huang. 2018. iCAN: Instance-centric attention network for human-object interaction detection. arXiv preprint arXiv:1808.10437 (2018)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00872"},{"key":"e_1_3_2_2_10_1","volume-title":"Deep Learning","author":"Goodfellow Ian","unstructured":"Ian Goodfellow , Yoshua Bengio , and Aaron Courville . 2016. Deep Learning . MIT Press . http:\/\/www.deeplearningbook.org. Ian Goodfellow, Yoshua Bengio, and Aaron Courville. 2016. Deep Learning .MIT Press. http:\/\/www.deeplearningbook.org."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00633"},{"key":"e_1_3_2_2_12_1","volume-title":"Visual Semantic Role Labeling. arXiv preprint arXiv:1505.04474","author":"Gupta Saurabh","year":"2015","unstructured":"Saurabh Gupta and Jitendra Malik . 2015. Visual Semantic Role Labeling. arXiv preprint arXiv:1505.04474 ( 2015 ). Saurabh Gupta and Jitendra Malik. 2015. Visual Semantic Role Labeling. arXiv preprint arXiv:1505.04474 (2015)."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00977"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"e_1_3_2_2_15_1","volume-title":"2015 IEEE International Conference on Computer Vision (ICCV). 1026--1034","author":"He K.","unstructured":"K. He , X. Zhang , S. Ren , and J. Sun . 2015. Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification . In 2015 IEEE International Conference on Computer Vision (ICCV). 1026--1034 . K. He, X. Zhang, S. Ren, and J. Sun. 2015. Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification. In 2015 IEEE International Conference on Computer Vision (ICCV). 1026--1034."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.620"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.573"},{"key":"e_1_3_2_2_19_1","unstructured":"Jingwei Ji. 2020. Question about the annotations \u00b7 Issue #2 \u00b7 JingweiJ\/ActionGenome. https:\/\/github.com\/JingweiJ\/ActionGenome\/issues\/2  Jingwei Ji. 2020. Question about the annotations \u00b7 Issue #2 \u00b7 JingweiJ\/ActionGenome. https:\/\/github.com\/JingweiJ\/ActionGenome\/issues\/2"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01025"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913478446"},{"key":"e_1_3_2_2_22_1","volume-title":"Anticipating human activities using object affordances for reactive robotic response","author":"Koppula Hema S","year":"2015","unstructured":"Hema S Koppula and Ashutosh Saxena . 2015. Anticipating human activities using object affordances for reactive robotic response . IEEE transactions on pattern analysis and machine intelligence, Vol. 38 , 1 ( 2015 ), 14--29. Hema S Koppula and Ashutosh Saxena. 2015. Anticipating human activities using object affordances for reactive robotic response. IEEE transactions on pattern analysis and machine intelligence, Vol. 38, 1 (2015), 14--29."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00046"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00370"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00056"},{"key":"e_1_3_2_2_26_1","volume-title":"Sustainability","volume":"11","author":"Lo Chi-Hung","year":"2019","unstructured":"Chi-Hung Lo and Yi-Wen Wang . 2019 . Constructing an Evaluation Model for User Experience in an Unmanned Store . Sustainability , Vol. 11 , 18 (2019). Chi-Hung Lo and Yi-Wen Wang. 2019. Constructing an Evaluation Model for User Experience in an Unmanned Store. Sustainability, Vol. 11, 18 (2019)."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_51"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_25"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3323873.3325056"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123380"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_31"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2501988.2501994"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3356076"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413778"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01363"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00956"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00579"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00417"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00037"},{"key":"e_1_3_2_2_44_1","unstructured":"Yonghui Wu Mike Schuster Zhifeng Chen Quoc V Le Mohammad Norouzi Wolfgang Macherey Maxim Krikun Yuan Cao Qin Gao Klaus Macherey etal 2016. Google's neural machine translation system: Bridging the gap between human and machine translation. arXiv preprint arXiv:1609.08144 (2016).  Yonghui Wu Mike Schuster Zhifeng Chen Quoc V Le Mohammad Norouzi Wolfgang Macherey Maxim Krikun Yuan Cao Qin Gao Klaus Macherey et al. 2016. Google's neural machine translation system: Bridging the gap between human and machine translation. arXiv preprint arXiv:1609.08144 (2016)."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.330"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351090"},{"key":"e_1_3_2_2_47_1","volume-title":"Towards reaching human performance in pedestrian detection","author":"Zhang Shanshan","year":"2017","unstructured":"Shanshan Zhang , Rodrigo Benenson , Mohamed Omran , Jan Hosang , and Bernt Schiele . 2017. Towards reaching human performance in pedestrian detection . IEEE transactions on pattern analysis and machine intelligence, Vol. 40 , 4 ( 2017 ), 973--986. Shanshan Zhang, Rodrigo Benenson, Mohamed Omran, Jan Hosang, and Bernt Schiele. 2017. Towards reaching human performance in pedestrian detection. IEEE transactions on pattern analysis and machine intelligence, Vol. 40, 4 (2017), 973--986."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58565-5_5"}],"event":{"name":"ICMR '21: International Conference on Multimedia Retrieval","location":"Taipei Taiwan","acronym":"ICMR '21","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2021 ACM Workshop on Intelligent Cross-Data Analysis and Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3463944.3469097","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3463944.3469097","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:12:15Z","timestamp":1750191135000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3463944.3469097"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,21]]},"references-count":48,"alternative-id":["10.1145\/3463944.3469097","10.1145\/3463944"],"URL":"https:\/\/doi.org\/10.1145\/3463944.3469097","relation":{},"subject":[],"published":{"date-parts":[[2021,8,21]]},"assertion":[{"value":"2021-08-21","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}