{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,8]],"date-time":"2026-07-08T11:38:19Z","timestamp":1783510699817,"version":"3.55.0"},"publisher-location":"Cham","reference-count":53,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030586096","type":"print"},{"value":"9783030586102","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58610-2_41","type":"book-chapter","created":{"date-parts":[[2020,10,6]],"date-time":"2020-10-06T13:02:49Z","timestamp":1601989369000},"page":"696-712","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":151,"title":["DRG: Dual Relation Graph for Human-Object Interaction Detection"],"prefix":"10.1007","author":[{"given":"Chen","family":"Gao","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jiarui","family":"Xu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuliang","family":"Zou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jia-Bin","family":"Huang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2020,10,7]]},"reference":[{"key":"41_CR1","doi-asserted-by":"crossref","unstructured":"Bansal, A., Rambhatla, S.S., Shrivastava, A., Chellappa, R.: Detecting human-object interactions via functional generalization. In: AAAI (2020)","DOI":"10.1609\/aaai.v34i07.6616"},{"key":"41_CR2","doi-asserted-by":"crossref","unstructured":"Bilen, H., Vedaldi, A.: Weakly supervised deep detection networks. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.311"},{"key":"41_CR3","doi-asserted-by":"crossref","unstructured":"Chao, Y.W., Liu, Y., Liu, X., Zeng, H., Deng, J.: Learning to detect human-object interactions. In: WACV (2017)","DOI":"10.1109\/WACV.2018.00048"},{"key":"41_CR4","doi-asserted-by":"crossref","unstructured":"Chao, Y.W., Wang, Z., He, Y., Wang, J., Deng, J.: HICO: A benchmark for recognizing human-object interactions in images. In: CVPR (2015)","DOI":"10.1109\/ICCV.2015.122"},{"key":"41_CR5","doi-asserted-by":"crossref","unstructured":"Dai, B., Zhang, Y., Lin, D.: Detecting visual relationships with deep relational networks. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.352"},{"issue":"1","key":"41_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11263-011-0439-x","volume":"95","author":"C Desai","year":"2011","unstructured":"Desai, C., Ramanan, D., Fowlkes, C.C.: Discriminative models for multi-class object layout. IJCV 95(1), 1\u201312 (2011)","journal-title":"IJCV"},{"key":"41_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1007\/978-3-030-01249-6_4","volume-title":"Computer Vision \u2013 ECCV 2018","author":"H-S Fang","year":"2018","unstructured":"Fang, H.-S., Cao, J., Tai, Y.-W., Lu, C.: Pairwise body-part attention for recognizing human-object interactions. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11214, pp. 52\u201368. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01249-6_4"},{"key":"41_CR8","doi-asserted-by":"crossref","unstructured":"Fouhey, D.F., Zitnick, C.L.: Predicting object dynamics in scenes. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.260"},{"key":"41_CR9","unstructured":"Gao, C., Zou, Y., Huang, J.B.: iCAN: instance-centric attention network for human-object interaction detection. In: BMVC (2018)"},{"key":"41_CR10","doi-asserted-by":"crossref","unstructured":"Girdhar, R., Carreira, J., Doersch, C., Zisserman, A.: Video action transformer network. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00033"},{"key":"41_CR11","unstructured":"Girshick, R., Radosavovic, I., Gkioxari, G., Doll\u00e1r, P., He, K.: Detectron (2018). https:\/\/github.com\/facebookresearch\/detectron"},{"key":"41_CR12","doi-asserted-by":"crossref","unstructured":"Gkioxari, G., Girshick, R., Doll\u00e1r, P., He, K.: Detecting and recognizing human-object interactions. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00872"},{"key":"41_CR13","unstructured":"Gupta, S., Malik, J.: Visual semantic role labeling. arXiv preprint arXiv:1505.04474 (2015)"},{"key":"41_CR14","doi-asserted-by":"crossref","unstructured":"Gupta, T., Schwing, A., Hoiem, D.: No-frills human-object interaction detection: factorization, appearance and layout encodings, and training techniques. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00977"},{"key":"41_CR15","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"41_CR16","doi-asserted-by":"crossref","unstructured":"Hu, H., Gu, J., Zhang, Z., Dai, J., Wei, Y.: Relation networks for object detection. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00378"},{"key":"41_CR17","doi-asserted-by":"crossref","unstructured":"Hu, R., Rohrbach, M., Andreas, J., Darrell, T., Saenko, K.: Modeling relationships in referential expressions with compositional modular networks. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.470"},{"key":"41_CR18","doi-asserted-by":"crossref","unstructured":"Johnson, J., Gupta, A., Fei-Fei, L.: Image generation from scene graphs. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00133"},{"key":"41_CR19","doi-asserted-by":"crossref","unstructured":"Johnson, J., et al.: Image retrieval using scene graphs. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"41_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1007\/978-3-030-01264-9_15","volume-title":"Computer Vision \u2013 ECCV 2018","author":"K Kato","year":"2018","unstructured":"Kato, K., Li, Y., Gupta, A.: Compositional learning for human object interaction. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) Computer Vision \u2013 ECCV 2018. LNCS, vol. 11218, pp. 247\u2013264. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01264-9_15"},{"key":"41_CR21","doi-asserted-by":"crossref","unstructured":"Kolesnikov, A., Lampert, C.H., Ferrari, V.: Detecting visual relationships using box attention. In: ICCV (2019)","DOI":"10.1109\/ICCVW.2019.00217"},{"key":"41_CR22","doi-asserted-by":"crossref","unstructured":"Li, Y., Ouyang, W., Wang, X., Tang, X.: VIP-CNN: visual phrase guided convolutional neural network. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.766"},{"key":"41_CR23","doi-asserted-by":"crossref","unstructured":"Li, Y., Ouyang, W., Zhou, B., Wang, K., Wang, X.: Scene graph generation from objects, phrases and region captions. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.142"},{"key":"41_CR24","doi-asserted-by":"crossref","unstructured":"Li, Y.L., et al.: Transferable interactiveness prior for human-object interaction detection. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00370"},{"key":"41_CR25","doi-asserted-by":"crossref","unstructured":"Liao, Y., Liu, S., Wang, F., Chen, Y., Qian, C., Feng, J.: PPDM: parallel point detection and matching for real-time human-object interaction detection. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00056"},{"key":"41_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"TY Lin","year":"2014","unstructured":"Lin, T.Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"41_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"852","DOI":"10.1007\/978-3-319-46448-0_51","volume-title":"Computer Vision \u2013 ECCV 2016","author":"C Lu","year":"2016","unstructured":"Lu, C., Krishna, R., Bernstein, M., Fei-Fei, L.: Visual relationship detection with language priors. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 852\u2013869. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_51"},{"key":"41_CR28","doi-asserted-by":"crossref","unstructured":"Mai, L., Jin, H., Lin, Z., Fang, C., Brandt, J., Liu, F.: Spatial-semantic image search by visual feature synthesis. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.125"},{"key":"41_CR29","unstructured":"Mikolov, T., Grave, E., Bojanowski, P., Puhrsch, C., Joulin, A.: Advances in pre-training distributed word representations. In: LREC (2018)"},{"key":"41_CR30","unstructured":"Newell, A., Deng, J.: Pixels to graphs by associative embedding. In: NeurIPS (2017)"},{"key":"41_CR31","doi-asserted-by":"crossref","unstructured":"Peyre, J., Laptev, I., Schmid, C., Sivic, J.: Weakly-supervised learning of visual relations. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.554"},{"key":"41_CR32","doi-asserted-by":"crossref","unstructured":"Peyre, J., Laptev, I., Schmid, C., Sivic, J.: Detecting rare visual relations using analogies. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00207"},{"key":"41_CR33","doi-asserted-by":"crossref","unstructured":"Plummer, B.A., Mallya, A., Cervantes, C.M., Hockenmaier, J., Lazebnik, S.: Phrase localization and visual relationship detection with comprehensive linguistic cues. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.213"},{"key":"41_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"407","DOI":"10.1007\/978-3-030-01240-3_25","volume-title":"Computer Vision \u2013 ECCV 2018","author":"S Qi","year":"2018","unstructured":"Qi, S., Wang, W., Jia, B., Shen, J., Zhu, S.-C.: Learning human-object interactions by graph parsing neural networks. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11213, pp. 407\u2013423. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01240-3_25"},{"key":"41_CR35","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: NeurIPS (2015)"},{"key":"41_CR36","doi-asserted-by":"crossref","unstructured":"Shen, L., Yeung, S., Hoffman, J., Mori, G., Fei-Fei, L.: Scaling human-object interaction recognition through zero-shot learning. In: WACV (2018)","DOI":"10.1109\/WACV.2018.00181"},{"key":"41_CR37","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/978-3-030-01252-6_20","volume-title":"Computer Vision \u2013 ECCV 2018","author":"C Sun","year":"2018","unstructured":"Sun, C., Shrivastava, A., Vondrick, C., Murphy, K., Sukthankar, R., Schmid, C.: Actor-centric relation network. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11215, pp. 335\u2013351. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01252-6_20"},{"key":"41_CR38","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS (2017)"},{"key":"41_CR39","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Lin, X., Batra, T., Lawrence Zitnick, C., Parikh, D.: Learning common sense through visual abstraction. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.292"},{"key":"41_CR40","doi-asserted-by":"crossref","unstructured":"Wan, B., Zhou, D., Zhou, Y., Li, R., He, X.: Pose-aware multi-level feature network for human object interaction detection. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00956"},{"key":"41_CR41","doi-asserted-by":"crossref","unstructured":"Wang, T., et al.: Deep contextual attention for human-object interaction detection. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00579"},{"key":"41_CR42","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"41_CR43","doi-asserted-by":"crossref","unstructured":"Xu, D., Zhu, Y., Choy, C.B., Fei-Fei, L.: Scene graph generation by iterative message passing. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.330"},{"key":"41_CR44","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"690","DOI":"10.1007\/978-3-030-01246-5_41","volume-title":"Computer Vision \u2013 ECCV 2018","author":"J Yang","year":"2018","unstructured":"Yang, J., Lu, J., Lee, S., Batra, D., Parikh, D.: Graph R-CNN for scene graph generation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11205, pp. 690\u2013706. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01246-5_41"},{"key":"41_CR45","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1007\/978-3-030-01258-8_3","volume-title":"Computer Vision \u2013 ECCV 2018","author":"X Yang","year":"2018","unstructured":"Yang, X., Zhang, H., Cai, J.: Shuffle-then-assemble: learning object-agnostic visual relationship features. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11216, pp. 38\u201354. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01258-8_3"},{"key":"41_CR46","doi-asserted-by":"crossref","unstructured":"Yao, B., Fei-Fei, L.: Modeling mutual context of object and human pose in human-object interaction activities. In: CVPR (2010)","DOI":"10.1109\/CVPR.2010.5540235"},{"key":"41_CR47","unstructured":"Yi, K., Wu, J., Gan, C., Torralba, A., Kohli, P., Tenenbaum, J.B.: Neural-symbolic VQA: disentangling reasoning from vision and language understanding. In: NeurIPS (2018)"},{"key":"41_CR48","doi-asserted-by":"crossref","unstructured":"Yin, X., Ordonez, V.: Obj2text: generating visually descriptive language from object layouts. In: EMNLP (2017)","DOI":"10.18653\/v1\/D17-1017"},{"key":"41_CR49","doi-asserted-by":"crossref","unstructured":"Zellers, R., Yatskar, M., Thomson, S., Choi, Y.: Neural motifs: scene graph parsing with global context. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00611"},{"key":"41_CR50","doi-asserted-by":"crossref","unstructured":"Zhang, H., Kyaw, Z., Yu, J., Chang, S.F.: PPR-FCN: weakly supervised visual relation detection via parallel pairwise R-FCN. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.454"},{"key":"41_CR51","doi-asserted-by":"crossref","unstructured":"Zhou, P., Chi, M.: Relation parsing neural network for human-object interaction detection. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00093"},{"key":"41_CR52","doi-asserted-by":"crossref","unstructured":"Zhuang, B., Liu, L., Shen, C., Reid, I.: Towards context-aware interaction recognition for visual relationship detection. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.71"},{"key":"41_CR53","doi-asserted-by":"crossref","unstructured":"Zitnick, C.L., Parikh, D.: Bringing semantics into focus using visual abstraction. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.387"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58610-2_41","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,6]],"date-time":"2024-10-06T00:41:29Z","timestamp":1728175289000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58610-2_41"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030586096","9783030586102"],"references-count":53,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58610-2_41","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"7 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}