{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T05:19:44Z","timestamp":1755926384874,"version":"3.40.3"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030336752"},{"type":"electronic","value":"9783030336769"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-33676-9_30","type":"book-chapter","created":{"date-parts":[[2019,10,25]],"date-time":"2019-10-25T17:20:30Z","timestamp":1572024030000},"page":"428-441","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["DynGraph: Visual Question Answering via Dynamic Scene Graphs"],"prefix":"10.1007","author":[{"given":"Monica","family":"Haurilet","sequence":"first","affiliation":[]},{"given":"Ziad","family":"Al-Halah","sequence":"additional","affiliation":[]},{"given":"Rainer","family":"Stiefelhagen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,10,25]]},"reference":[{"key":"30_CR1","unstructured":"Agrawal, A., et al.: VQA: visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2425\u20132433 (2015)"},{"key":"30_CR2","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and visual question answering. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"30_CR3","doi-asserted-by":"crossref","unstructured":"Andreas, J., Rohrbach, M., Darrell, T., Klein, D.: Neural module networks. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.12"},{"key":"30_CR4","unstructured":"Bouchard, G., Singh, S., Trouillon, T.: On approximate reasoning capabilities of low-rank vector spaces. In: AAAI (2015)"},{"key":"30_CR5","doi-asserted-by":"crossref","unstructured":"Conneau, A., Kiela, D., Schwenk, H., Barrault, L., Bordes, A.: Supervised learning of universal sentence representations from natural language inference data. In: EMNLP (2017)","DOI":"10.18653\/v1\/D17-1070"},{"key":"30_CR6","doi-asserted-by":"crossref","unstructured":"Harzig, P., Eggert, C., Lienhart, R.: Visual question answering with a hybrid convolution recurrent model. In: Proceedings of the 2018 ACM on International Conference on Multimedia Retrieval, pp. 318\u2013325. ACM (2018)","DOI":"10.1145\/3206025.3206054"},{"key":"30_CR7","doi-asserted-by":"crossref","unstructured":"Deng, Z., Vahdat, A., Hu, H., Mori, G.: Structure inference machines: recurrent neural networks for analyzing relations in group activity recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.516"},{"key":"30_CR8","doi-asserted-by":"crossref","unstructured":"Hu, R., Andreas, J., Rohrbach, M., Darrell, T., Saenko, K.: Learning to reason: end-to-end module networks for visual question answering. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.93"},{"key":"30_CR9","doi-asserted-by":"crossref","unstructured":"Johnson, J., Hariharan, B., van der Maaten, L., Fei-Fei, L., Zitnick, C.L., Girshick, R.: CLEVR: a diagnostic dataset for compositional language and elementary visual reasoning. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.215"},{"key":"30_CR10","doi-asserted-by":"crossref","unstructured":"Johnson, J., et al.: Image retrieval using scene graphs. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"30_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1007\/978-3-319-46493-0_15","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Kembhavi","year":"2016","unstructured":"Kembhavi, A., Salvato, M., Kolve, E., Seo, M., Hajishirzi, H., Farhadi, A.: A diagram is worth a dozen images. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 235\u2013251. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_15"},{"key":"30_CR12","unstructured":"Kipf, T.N., Welling, M.: Semi-supervised classification with graph convolutional networks. In: ICLR (2017)"},{"key":"30_CR13","doi-asserted-by":"crossref","unstructured":"Kok, S., Domingos, P.: Statistical predicate invention. In: ICML (2007)","DOI":"10.1145\/1273496.1273551"},{"issue":"1","key":"30_CR14","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna, R., et al.: Visual genome: connecting language and vision using crowdsourced dense image annotations. Int. J. Comput. Vision 123(1), 32\u201373 (2017)","journal-title":"Int. J. Comput. Vision"},{"key":"30_CR15","doi-asserted-by":"crossref","unstructured":"Kuhlmann, M., Oepen, S.: Towards a catalogue of linguistic graph banks. Computational Linguistics (2016)","DOI":"10.1162\/COLI_a_00280"},{"key":"30_CR16","unstructured":"Lu, J., Yang, J., Batra, D., Parikh, D.: Hierarchical question-image co-attention for visual question answering. In: NIPS (2016)"},{"key":"30_CR17","doi-asserted-by":"crossref","unstructured":"Ma, L., Lu, Z., Li, H.: Learning to answer questions from image using convolutional neural network. In: AAAI (2016)","DOI":"10.1609\/aaai.v30i1.10442"},{"key":"30_CR18","unstructured":"Mahdisoltani, F., Biega, J., Suchanek, F.M.: YAGO3: a knowledge base from multilingual wikipedias. In: CIDR (2013)"},{"issue":"1\u20133","key":"30_CR19","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1007\/s11263-017-1038-2","volume":"125","author":"M Malinowski","year":"2017","unstructured":"Malinowski, M., Rohrbach, M., Fritz, M.: Ask your neurons: a deep learning approach to visual question answering. Int. J. Comput. Vision 125(1\u20133), 110\u2013135 (2017)","journal-title":"Int. J. Comput. Vision"},{"key":"30_CR20","unstructured":"Manessi, F., Rozza, A., Manzo, M.: Dynamic graph convolutional networks. arXiv preprint arXiv:1704.06199 (2017)"},{"key":"30_CR21","doi-asserted-by":"crossref","unstructured":"Manning, C., Surdeanu, M., Bauer, J., Finkel, J., Bethard, S., McClosky, D.: The Stanford CoreNLP natural language processing toolkit. In: Proceedings of 52nd Annual Meeting of the Association for Computational Linguistics: System Demonstrations (2014)","DOI":"10.3115\/v1\/P14-5010"},{"key":"30_CR22","unstructured":"Mikolov, T., Sutskever, I., Chen, K., Corrado, G.S., Dean, J.: Distributed representations of words and phrases and their compositionality. In: NIPS (2013)"},{"key":"30_CR23","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1038\/nmeth.2340","volume":"10","author":"P Radivojac","year":"2013","unstructured":"Radivojac, P., et al.: A large-scale evaluation of computational protein function prediction. Nat. Methods 10, 221\u2013227 (2013)","journal-title":"Nat. Methods"},{"key":"30_CR24","unstructured":"Ren, M., Kiros, R., Zemel, R.: Exploring models and data for image question answering. In: NIPS (2015)"},{"key":"30_CR25","unstructured":"Santoro, A., et al.: A simple neural network module for relational reasoning. In: NIPS (2017)"},{"issue":"3","key":"30_CR26","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1609\/aimag.v29i3.2157","volume":"29","author":"P Sen","year":"2008","unstructured":"Sen, P., Namata, G., Bilgic, M., Getoor, L., Galligher, B., Eliassi-Rad, T.: Collective classification in network data. AI Mag. 29(3), 93\u2013106 (2008)","journal-title":"AI Mag."},{"key":"30_CR27","doi-asserted-by":"crossref","unstructured":"Simonovsky, M., Komodakis, N.: Dynamic edge-conditioned filters in convolutional neural networks on graphs. arXiv (2017)","DOI":"10.1109\/CVPR.2017.11"},{"key":"30_CR28","doi-asserted-by":"crossref","unstructured":"Teney, D., Anderson, P., He, X., van den Hengel, A.: Tips and tricks for visual question answering: learnings from the 2017 challenge. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00444"},{"key":"30_CR29","doi-asserted-by":"crossref","unstructured":"Teney, D., Liu, L., van den Hengel, A.: Graph-structured representations for visual question answering. In: CVPR (2016)","DOI":"10.1109\/CVPR.2017.344"},{"key":"30_CR30","doi-asserted-by":"crossref","unstructured":"Toutanova, K., Chen, D., Pantel, P., Poon, H., Choudhury, P., Gamon, M.: Representing text for joint embedding of text and knowledge bases. In: EMNLP (2015)","DOI":"10.18653\/v1\/D15-1174"},{"key":"30_CR31","unstructured":"Veli\u010dkovi\u0107, P., Cucurull, G., Casanova, A., Romero, A., Lio, P., Bengio, Y.: Graph attention networks. arXiv preprint arXiv:1710.10903 (2017)"},{"key":"30_CR32","doi-asserted-by":"crossref","unstructured":"Wang, P., Wu, Q., Shen, C., van den Hengel, A.: The VQA-machine: learning how to use existing vision algorithms to answer new questions. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.416"},{"key":"30_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"413","DOI":"10.1007\/978-3-030-01228-1_25","volume-title":"Computer Vision \u2013 ECCV 2018","author":"X Wang","year":"2018","unstructured":"Wang, X., Gupta, A.: Videos as space-time region graphs. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11209, pp. 413\u2013431. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01228-1_25"},{"key":"30_CR34","doi-asserted-by":"crossref","unstructured":"Wang, X., Ye, Y., Gupta, A.: Zero-shot recognition via semantic embeddings and knowledge graphs. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00717"},{"key":"30_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"451","DOI":"10.1007\/978-3-319-46478-7_28","volume-title":"Computer Vision \u2013 ECCV 2016","author":"H Xu","year":"2016","unstructured":"Xu, H., Saenko, K.: Ask, attend and answer: exploring question-guided spatial attention for visual question answering. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9911, pp. 451\u2013466. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_28"},{"key":"30_CR36","doi-asserted-by":"crossref","unstructured":"Yang, S.H., Long, B., Smola, A., Sadagopan, N., Zheng, Z., Zha, H.: Like like alike: joint friendship and interest propagation in social networks. In: Proceedings of the 20th International Conference on World Wide Web (2011)","DOI":"10.1145\/1963405.1963481"},{"key":"30_CR37","doi-asserted-by":"crossref","unstructured":"Yang, Z., He, X., Gao, J., Deng, L., Smola, A.: Stacked attention networks for image question answering. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.10"},{"key":"30_CR38","doi-asserted-by":"crossref","unstructured":"Yu, D., Fu, J., Mei, T., Rui, Y.: Multi-level attention networks for visual question answering. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.446"},{"key":"30_CR39","doi-asserted-by":"crossref","unstructured":"Zitnick, C.L., Parikh, D.: Bringing semantics into focus using visual abstraction. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.387"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-33676-9_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,2]],"date-time":"2022-10-02T17:04:16Z","timestamp":1664730256000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-33676-9_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030336752","9783030336769"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-33676-9_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"25 October 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DAGM GCPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"German Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Dortmund","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 September 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 September 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"41","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dagm2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/gcpr2019.tu-dortmund.de\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"91","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"43","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"47% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}