{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T07:46:21Z","timestamp":1776152781597,"version":"3.50.1"},"publisher-location":"Cham","reference-count":51,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030012397","type":"print"},{"value":"9783030012403","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01240-3_25","type":"book-chapter","created":{"date-parts":[[2018,10,6]],"date-time":"2018-10-06T04:36:08Z","timestamp":1538800568000},"page":"407-423","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":352,"title":["Learning Human-Object Interactions by Graph Parsing Neural Networks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4070-733X","authenticated-orcid":false,"given":"Siyuan","family":"Qi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0802-9567","authenticated-orcid":false,"given":"Wenguan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Baoxiong","family":"Jia","sequence":"additional","affiliation":[]},{"given":"Jianbing","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Song-Chun","family":"Zhu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,10,5]]},"reference":[{"key":"25_CR1","doi-asserted-by":"crossref","unstructured":"Chao, Y.W., Liu, Y., Liu, X., Zeng, H., Deng, J.: Learning to detect human-object interactions (2018)","DOI":"10.1109\/WACV.2018.00048"},{"key":"25_CR2","doi-asserted-by":"crossref","unstructured":"Chao, Y.W., Wang, Z., He, Y., Wang, J., Deng, J.: HICO: A benchmark for recognizing human-object interactions in images. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.122"},{"key":"25_CR3","unstructured":"Chen, L.C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. PAMI (2016)"},{"key":"25_CR4","unstructured":"Chen, L.C., Schwing, A., Yuille, A., Urtasun, R.: Learning deep structured models. In: ICML (2015)"},{"key":"25_CR5","doi-asserted-by":"crossref","unstructured":"Cho, K., Van Merri\u00ebnboer, B., Bahdanau, D., Bengio, Y.: On the properties of neural machine translation: encoder-decoder approaches. In: Syntax, Semantics and Structure in Statistical Translation, p. 103 (2014)","DOI":"10.3115\/v1\/W14-4012"},{"key":"25_CR6","doi-asserted-by":"crossref","unstructured":"Dai, J., et al.: Deformable convolutional networks. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.89"},{"key":"25_CR7","unstructured":"Defferrard, M., Bresson, X., Vandergheynst, P.: Convolutional neural networks on graphs with fast localized spectral filtering. In: NIPS (2016)"},{"key":"25_CR8","unstructured":"Delaitre, V., Sivic, J., Laptev, I.: Learning person-object interactions for action recognition in still images. In: NIPS (2011)"},{"key":"25_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"158","DOI":"10.1007\/978-3-642-33765-9_12","volume-title":"Computer Vision \u2013 ECCV 2012","author":"C Desai","year":"2012","unstructured":"Desai, C., Ramanan, D.: Detecting actions, poses, and objects with relational phraselets. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7575, pp. 158\u2013172. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33765-9_12"},{"key":"25_CR10","doi-asserted-by":"crossref","unstructured":"Elman, J.L.: Finding structure in time. Cogn. Sci. (1990)","DOI":"10.1207\/s15516709cog1402_1"},{"key":"25_CR11","doi-asserted-by":"crossref","unstructured":"Fang, H.S., Xu, Y., Wang, W., Zhu, S.C.: Learning pose grammar to encode human body configuration for 3D pose estimation. In: AAAI (2018)","DOI":"10.1609\/aaai.v32i1.12270"},{"key":"25_CR12","unstructured":"Gilmer, J., Schoenholz, S.S., Riley, P.F., Vinyals, O., Dahl, G.E.: Neural message passing for quantum chemistry. In: ICML (2017)"},{"key":"25_CR13","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast R-CNN. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"25_CR14","doi-asserted-by":"crossref","unstructured":"Gkioxari, G., Girshick, R., Doll\u00e1r, P., He, K.: Detecting and recognizing human-object interactions. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00872"},{"key":"25_CR15","doi-asserted-by":"crossref","unstructured":"Gupta, A., Davis, L.S.: Objects in action: an approach for combining action understanding and object perception. In: CVPR (2007)","DOI":"10.1109\/CVPR.2007.383331"},{"key":"25_CR16","doi-asserted-by":"crossref","unstructured":"Gupta, A., Kembhavi, A., Davis, L.S.: Observing human-object interactions: using spatial and functional compatibility for recognition. PAMI (2009)","DOI":"10.1109\/TPAMI.2009.83"},{"key":"25_CR17","unstructured":"Gupta, S., Malik, J.: Visual semantic role labeling. arXiv preprint arXiv:1505.04474 (2015)"},{"key":"25_CR18","doi-asserted-by":"crossref","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. (1997)","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"25_CR19","doi-asserted-by":"crossref","unstructured":"Hu, J.F., Zheng, W.S., Lai, J., Gong, S., Xiang, T.: Recognising human-object interaction via exemplar based modelling. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.390"},{"key":"25_CR20","doi-asserted-by":"crossref","unstructured":"Jain, A., Zamir, A.R., Savarese, S., Saxena, A.: Structural-RNN: deep learning on spatio-temporal graphs. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.573"},{"key":"25_CR21","unstructured":"Kipf, T.N., Welling, M.: Semi-supervised classification with graph convolutional networks. In: ICLR (2017)"},{"key":"25_CR22","doi-asserted-by":"crossref","unstructured":"Koppula, H.S., Saxena, A.: Anticipating human activities using object affordances for reactive robotic response. PAMI (2016)","DOI":"10.1109\/TPAMI.2015.2430335"},{"key":"25_CR23","doi-asserted-by":"crossref","unstructured":"Koppula, H.S., Gupta, R., Saxena, A.: Learning human activities and object affordances from RGB-D videos. Int. J. Robot. Res. (2013)","DOI":"10.1177\/0278364913478446"},{"key":"25_CR24","doi-asserted-by":"crossref","unstructured":"Li, R., Tapaswi, M., Liao, R., Jia, J., Urtasun, R., Fidler, S.: Situation recognition with graph neural networks. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.448"},{"key":"25_CR25","unstructured":"Li, Y., Tarlow, D., Brockschmidt, M., Zemel, R.: Gated graph sequence neural networks. In: ICLR (2016)"},{"key":"25_CR26","doi-asserted-by":"crossref","unstructured":"Liang, X., Lin, L., Shen, X., Feng, J., Yan, S., Xing, E.P.: Interpretable structure-evolving LSTM. In: ICCV (2017)","DOI":"10.1109\/CVPR.2017.234"},{"key":"25_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1007\/978-3-319-46448-0_8","volume-title":"Computer Vision \u2013 ECCV 2016","author":"X Liang","year":"2016","unstructured":"Liang, X., Shen, X., Feng, J., Lin, L., Yan, S.: Semantic object parsing with graph LSTM. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 125\u2013143. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_8"},{"key":"25_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"25_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"414","DOI":"10.1007\/978-3-319-46448-0_25","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Mallya","year":"2016","unstructured":"Mallya, A., Lazebnik, S.: Learning models for actions and person-object interactions with transfer to question answering. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 414\u2013428. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_25"},{"key":"25_CR30","doi-asserted-by":"crossref","unstructured":"Marino, K., Salakhutdinov, R., Gupta, A.: The more you know: using knowledge graphs for image classification. In: CVPR (2016)","DOI":"10.1109\/CVPR.2017.10"},{"key":"25_CR31","doi-asserted-by":"crossref","unstructured":"Monti, F., Boscaini, D., Masci, J., Rodol\u00e0, E., Svoboda, J., Bronstein, M.M.: Geometric deep learning on graphs and manifolds using mixture model CNNs. In: CVPR (2016)","DOI":"10.1109\/CVPR.2017.576"},{"key":"25_CR32","unstructured":"Niepert, M., Ahmed, M., Kutzkov, K.: Learning convolutional neural networks for graphs. In: ICML (2016)"},{"key":"25_CR33","doi-asserted-by":"crossref","unstructured":"Park, S., Nie, X., Zhu, S.C.: Attribute and-or grammar for joint parsing of human pose, parts and attributes. PAMI (2017)","DOI":"10.1109\/TPAMI.2017.2731842"},{"key":"25_CR34","doi-asserted-by":"crossref","unstructured":"Qi, S., Huang, S., Wei, P., Zhu, S.C.: Predicting human activities using stochastic grammar. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.132"},{"key":"25_CR35","unstructured":"Qi, S., Jia, B., Zhu, S.C.: Generalized earley parser: bridging symbolic grammars and sequence data for future prediction. In: ICML (2018)"},{"key":"25_CR36","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: NIPS (2015)"},{"key":"25_CR37","unstructured":"Seo, Y., Defferrard, M., Vandergheynst, P., Bresson, X.: Structured sequence modeling with graph convolutional recurrent networks. arXiv preprint arXiv:1612.07659 (2016)"},{"key":"25_CR38","doi-asserted-by":"crossref","unstructured":"Shen, L., Yeung, S., Hoffman, J., Mori, G., Fei-Fei, L.: Scaling human-object interaction recognition through zero-shot learning (2018)","DOI":"10.1109\/WACV.2018.00181"},{"key":"25_CR39","unstructured":"Shi, X., Chen, Z., Wang, H., Yeung, D.Y., Wong, W.K., Woo, W.c.: Convolutional LSTM network: a machine learning approach for precipitation nowcasting. In: NIPS (2015)"},{"key":"25_CR40","doi-asserted-by":"crossref","unstructured":"Simonovsky, M., Komodakis, N.: Dynamic edge-conditioned filters in convolutional neural networks on graphs. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.11"},{"key":"25_CR41","doi-asserted-by":"crossref","unstructured":"Teney, D., Liu, L., van den Hengel, A.: Graph-structured representations for visual question answering. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.344"},{"key":"25_CR42","unstructured":"Tompson, J.J., Jain, A., LeCun, Y., Bregler, C.: Joint training of a convolutional network and a graphical model for human pose estimation. In: NIPS (2014)"},{"key":"25_CR43","doi-asserted-by":"crossref","unstructured":"Wang, W., Xu, Y., Shen, J., Zhu, S.C.: Attentive fashion grammar network for fashion landmark detection and clothing category classification. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00449"},{"key":"25_CR44","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1007\/978-3-319-46484-8_18","volume-title":"Computer Vision \u2013 ECCV 2016","author":"Z Wu","year":"2016","unstructured":"Wu, Z., Lin, D., Tang, X.: Deep Markov random field for image modeling. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 295\u2013312. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_18"},{"key":"25_CR45","doi-asserted-by":"crossref","unstructured":"Xia, F., Zhu, J., Wang, P., Yuille, A.L.: Pose-guided human parsing by an And\/Or graph using pose-context features. In: AAAI (2016)","DOI":"10.1609\/aaai.v30i1.10460"},{"key":"25_CR46","doi-asserted-by":"crossref","unstructured":"Xu, D., Zhu, Y., Choy, C.B., Fei-Fei, L.: Scene graph generation by iterative message passing. In: ICCV (2017)","DOI":"10.1109\/CVPR.2017.330"},{"key":"25_CR47","doi-asserted-by":"crossref","unstructured":"Yao, B., Fei-Fei, L.: Grouplet: a structured image representation for recognizing human and object interactions. In: CVPR (2010)","DOI":"10.1109\/CVPR.2010.5540234"},{"key":"25_CR48","doi-asserted-by":"crossref","unstructured":"Yao, B., Fei-Fei, L.: Modeling mutual context of object and human pose in human-object interaction activities. In: CVPR (2010)","DOI":"10.1109\/CVPR.2010.5540235"},{"key":"25_CR49","doi-asserted-by":"crossref","unstructured":"Yao, B., Jiang, X., Khosla, A., Lin, A.L., Guibas, L., Fei-Fei, L.: Human action recognition by learning bases of action attributes and parts. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126386"},{"key":"25_CR50","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Liang, X., Wang, X., Yeung, D.Y., Gupta, A.: Temporal dynamic graph LSTM for action-driven video object detection. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.200"},{"key":"25_CR51","doi-asserted-by":"crossref","unstructured":"Zheng, S., et al.: Conditional random fields as recurrent neural networks. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.179"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01240-3_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,6]],"date-time":"2022-10-06T01:30:30Z","timestamp":1665019830000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01240-3_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012397","9783030012403"],"references-count":51,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01240-3_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"5 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}