{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T15:29:44Z","timestamp":1742916584434,"version":"3.40.3"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030336752"},{"type":"electronic","value":"9783030336769"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-33676-9_23","type":"book-chapter","created":{"date-parts":[[2019,10,25]],"date-time":"2019-10-25T17:20:30Z","timestamp":1572024030000},"page":"331-344","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Exploiting Attention for Visual Relationship Detection"],"prefix":"10.1007","author":[{"given":"Tongxin","family":"Hu","sequence":"first","affiliation":[]},{"given":"Wentong","family":"Liao","sequence":"additional","affiliation":[]},{"given":"Michael Ying","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Bodo","family":"Rosenhahn","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,10,25]]},"reference":[{"key":"23_CR1","doi-asserted-by":"crossref","unstructured":"Awiszus, M., Rosenhahn, B.: Markov chain neural networks. In: CVPR Workshops, pp. 2180\u20132187 (2018)","DOI":"10.1109\/CVPRW.2018.00293"},{"key":"23_CR2","doi-asserted-by":"crossref","unstructured":"Berg, A.C., et al.: Understanding and predicting importance in images. In: CVPR, pp. 3562\u20133569. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6248100"},{"key":"23_CR3","doi-asserted-by":"crossref","unstructured":"Choi, W., Chao, Y.W., Pantofaru, C., Savarese, S.: Understanding indoor scenes using 3D geometric phrases. In: CVPR, pp. 33\u201340 (2013)","DOI":"10.1109\/CVPR.2013.12"},{"key":"23_CR4","doi-asserted-by":"crossref","unstructured":"Dai, B., Zhang, Y., Lin, D.: Detecting visual relationships with deep relational networks. In: CVPR, pp. 3076\u20133086 (2017)","DOI":"10.1109\/CVPR.2017.352"},{"key":"23_CR5","doi-asserted-by":"crossref","unstructured":"Das, P., Xu, C., Doell, R.F., Corso, J.J.: A thousand frames in just a few words: lingual description of videos through latent topics and sparse object stitching. In: CVPR, pp. 2634\u20132641 (2013)","DOI":"10.1109\/CVPR.2013.340"},{"key":"23_CR6","doi-asserted-by":"crossref","unstructured":"Divvala, S.K., Farhadi, A., Guestrin, C.: Learning everything about anything: Webly-supervised visual concept learning. In: CVPR, pp. 3270\u20133277 (2014)","DOI":"10.1109\/CVPR.2014.412"},{"key":"23_CR7","doi-asserted-by":"crossref","unstructured":"Fang, H., et al.: From captions to visual concepts and back. In: CVPR, pp. 1473\u20131482 (2015)","DOI":"10.1109\/CVPR.2015.7298754"},{"key":"23_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/978-3-642-15561-1_2","volume-title":"Computer Vision \u2013 ECCV 2010","author":"A Farhadi","year":"2010","unstructured":"Farhadi, A., et al.: Every picture tells a story: generating sentences from images. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010, Part IV. LNCS, vol. 6314, pp. 15\u201329. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15561-1_2"},{"issue":"12","key":"23_CR9","doi-asserted-by":"crossref","first-page":"3618","DOI":"10.1073\/pnas.1422953112","volume":"112","author":"D Geman","year":"2015","unstructured":"Geman, D., Geman, S., Hallonquist, N., Younes, L.: Visual turing test for computer vision systems. Proc. Natl. Acad. Sci. 112(12), 3618\u20133623 (2015)","journal-title":"Proc. Natl. Acad. Sci."},{"key":"23_CR10","doi-asserted-by":"crossref","unstructured":"Henschel, R., von Marcard, T., Rosenhahn, B.: Simultaneous identification and tracking of multiple people using video and IMUs. In: CVPR Workshops (2019)","DOI":"10.1109\/CVPRW.2019.00106"},{"key":"23_CR11","doi-asserted-by":"crossref","unstructured":"Izadinia, H., Sadeghi, F., Farhadi, A.: Incorporating scene context and object layout into appearance modeling. In: CVPR, pp. 232\u2013239 (2014)","DOI":"10.1109\/CVPR.2014.37"},{"key":"23_CR12","doi-asserted-by":"crossref","unstructured":"Jia, Z., Gallagher, A., Saxena, A., Chen, T.: 3D-based reasoning with blocks, support, and stability. In: CVPR, pp. 1\u20138 (2013)","DOI":"10.1109\/CVPR.2013.8"},{"key":"23_CR13","doi-asserted-by":"crossref","unstructured":"Kluger, F., et al.: Region-based cycle-consistent data augmentation for object detection. In: 2018 IEEE International Conference on Big Data (Big Data), pp. 5205\u20135211. IEEE (2018)","DOI":"10.1109\/BigData.2018.8622318"},{"issue":"1","key":"23_CR14","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna, R., et al.: Visual genome: connecting language and vision using crowdsourced dense image annotations. Int. J. Comput. Vis. 123(1), 32\u201373 (2017)","journal-title":"Int. J. Comput. Vis."},{"key":"23_CR15","doi-asserted-by":"crossref","unstructured":"Kulkarni, G., et al.: Baby talk: understanding and generating image descriptions. In: CVPR. Citeseer (2011)","DOI":"10.1109\/CVPR.2011.5995466"},{"key":"23_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1007\/978-3-319-59129-2_8","volume-title":"Image Analysis","author":"Z Laskar","year":"2017","unstructured":"Laskar, Z., Kannala, J.: Context aware query image representation for particular object retrieval. In: Sharma, P., Bianchi, F.M. (eds.) SCIA 2017, Part II. LNCS, vol. 10270, pp. 88\u201399. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-59129-2_8"},{"key":"23_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"346","DOI":"10.1007\/978-3-030-01246-5_21","volume-title":"Computer Vision \u2013 ECCV 2018","author":"Y Li","year":"2018","unstructured":"Li, Y., Ouyang, W., Zhou, B., Shi, J., Zhang, C., Wang, X.: Factorizable net: an efficient subgraph-based framework for scene graph generation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018, Part I. LNCS, vol. 11205, pp. 346\u2013363. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01246-5_21"},{"key":"23_CR18","doi-asserted-by":"crossref","unstructured":"Li, Y., Ouyang, W., Zhou, B., Wang, K., Wang, X.: Scene graph generation from objects, phrases and region captions. In: ICCV, pp. 1261\u20131270 (2017)","DOI":"10.1109\/ICCV.2017.142"},{"key":"23_CR19","doi-asserted-by":"crossref","unstructured":"Liao, W., Rosenhahn, B., Shuai, L., Ying Yang, M.: Natural language guided visual relationship detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops (2019)","DOI":"10.1109\/CVPRW.2019.00058"},{"key":"23_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"852","DOI":"10.1007\/978-3-319-46448-0_51","volume-title":"Computer Vision \u2013 ECCV 2016","author":"C Lu","year":"2016","unstructured":"Lu, C., Krishna, R., Bernstein, M., Fei-Fei, L.: Visual relationship detection with language priors. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016, Part I. LNCS, vol. 9905, pp. 852\u2013869. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_51"},{"key":"23_CR21","doi-asserted-by":"crossref","unstructured":"Mensink, T., Gavves, E., Snoek, C.G.: Costa: co-occurrence statistics for zero-shot classification. In: CVPR, pp. 2441\u20132448 (2014)","DOI":"10.1109\/CVPR.2014.313"},{"key":"23_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"792","DOI":"10.1007\/978-3-319-46493-0_48","volume-title":"Computer Vision \u2013 ECCV 2016","author":"VK Nagaraja","year":"2016","unstructured":"Nagaraja, V.K., Morariu, V.I., Davis, L.S.: Modeling context between objects for referring expression understanding. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016, Part IV. LNCS, vol. 9908, pp. 792\u2013807. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_48"},{"key":"23_CR23","doi-asserted-by":"crossref","unstructured":"Peyre, J., Sivic, J., Laptev, I., Schmid, C.: Weakly-supervised learning of visual relations. In: ICCV, pp. 5179\u20135188 (2017)","DOI":"10.1109\/ICCV.2017.554"},{"key":"23_CR24","doi-asserted-by":"crossref","unstructured":"Prabhu, N., Venkatesh Babu, R.: Attribute-graph: a graph based approach to image ranking. In: ICCV, pp. 1071\u20131079 (2015)","DOI":"10.1109\/ICCV.2015.128"},{"key":"23_CR25","doi-asserted-by":"crossref","unstructured":"Ramanathan, V., et al.: Learning semantic relationships for better action retrieval in images. In: CVPR, pp. 1100\u20131109 (2015)","DOI":"10.1109\/CVPR.2015.7298713"},{"key":"23_CR26","doi-asserted-by":"crossref","unstructured":"Reinders, C., Ackermann, H., Yang, M.Y., Rosenhahn, B.: Object recognition from very few training examples for enhancing bicycle maps. In: 2018 IEEE Intelligent Vehicles Symposium (IV), pp. 1\u20138. IEEE (2018)","DOI":"10.1109\/IVS.2018.8500469"},{"key":"23_CR27","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, pp. 91\u201399 (2015)"},{"key":"23_CR28","doi-asserted-by":"crossref","unstructured":"Sadeghi, M.A., Farhadi, A.: Recognition using visual phrases. In: CVPR 2011, pp. 1745\u20131752. IEEE (2011)","DOI":"10.1109\/CVPR.2011.5995711"},{"issue":"7676","key":"23_CR29","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., et al.: Mastering the game of go without human knowledge. Nature 550(7676), 354 (2017)","journal-title":"Nature"},{"key":"23_CR30","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint: arXiv:1409.1556 (2014)"},{"key":"23_CR31","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"key":"23_CR32","doi-asserted-by":"crossref","unstructured":"Wandt, B., Rosenhahn, B.: RepNet: weakly supervised training of an adversarial reprojection network for 3D human pose estimation. In: CVPR, pp. 7782\u20137791 (2019)","DOI":"10.1109\/CVPR.2019.00797"},{"key":"23_CR33","unstructured":"Xiong, Y., Zhu, K., Lin, D., Tang, X.: Recognize complex events from static images by fusing deep channels. In: CVPR, pp. 1600\u20131609 (2015)"},{"key":"23_CR34","doi-asserted-by":"crossref","unstructured":"Xu, D., Zhu, Y., Choy, C.B., Fei-Fei, L.: Scene graph generation by iterative message passing. In: CVPR, pp. 5410\u20135419 (2017)","DOI":"10.1109\/CVPR.2017.330"},{"key":"23_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"690","DOI":"10.1007\/978-3-030-01246-5_41","volume-title":"Computer Vision \u2013 ECCV 2018","author":"J Yang","year":"2018","unstructured":"Yang, J., Lu, J., Lee, S., Batra, D., Parikh, D.: Graph R-CNN for scene graph\u00a0generation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018, Part I. LNCS, vol. 11205, pp. 690\u2013706. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01246-5_41"},{"key":"23_CR36","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1016\/j.isprsjprs.2017.07.010","volume":"131","author":"MY Yang","year":"2017","unstructured":"Yang, M.Y., Liao, W., Ackermann, H., Rosenhahn, B.: On support relations and semantic scene graphs. ISPRS J. Photogramm. Remote Sens. 131, 15\u201325 (2017)","journal-title":"ISPRS J. Photogramm. Remote Sens."},{"key":"23_CR37","doi-asserted-by":"crossref","unstructured":"Yu, R., Li, A., Morariu, V.I., Davis, L.S.: Visual relationship detection with internal and external linguistic knowledge distillation. In: ICCV, pp. 1974\u20131982 (2017)","DOI":"10.1109\/ICCV.2017.121"},{"key":"23_CR38","doi-asserted-by":"crossref","unstructured":"Zellers, R., Yatskar, M., Thomson, S., Choi, Y.: Neural motifs: scene graph parsing with global context. In: CVPR, pp. 5831\u20135840 (2018)","DOI":"10.1109\/CVPR.2018.00611"},{"key":"23_CR39","doi-asserted-by":"crossref","unstructured":"Zhang, H., Kyaw, Z., Chang, S.F., Chua, T.S.: Visual translation embedding network for visual relation detection. In: CVPR, pp. 5532\u20135540 (2017)","DOI":"10.1109\/CVPR.2017.331"},{"key":"23_CR40","doi-asserted-by":"crossref","unstructured":"Zhuang, B., Liu, L., Shen, C., Reid, I.: Towards context-aware interaction recognition for visual relationship detection. In: ICCV, pp. 589\u2013598 (2017)","DOI":"10.1109\/ICCV.2017.71"},{"key":"23_CR41","doi-asserted-by":"crossref","unstructured":"Zitnick, C.L., Parikh, D., Vanderwende, L.: Learning the visual interpretation of sentences. In: ICCV, pp. 1681\u20131688 (2013)","DOI":"10.1109\/ICCV.2013.211"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-33676-9_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,1,26]],"date-time":"2021-01-26T03:20:24Z","timestamp":1611631224000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-33676-9_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030336752","9783030336769"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-33676-9_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"25 October 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DAGM GCPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"German Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Dortmund","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 September 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 September 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"41","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dagm2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/gcpr2019.tu-dortmund.de\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"91","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"43","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"47% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}