{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T16:24:53Z","timestamp":1771950293842,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,9,22]],"date-time":"2023-09-22T00:00:00Z","timestamp":1695340800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Deutsche Forschungsgemeinschaft (DFG, German Research Foundation)","award":["390740016"],"award-info":[{"award-number":["390740016"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,9,22]]},"DOI":"10.1145\/3615522.3615547","type":"proceedings-article","created":{"date-parts":[[2023,10,20]],"date-time":"2023-10-20T18:06:54Z","timestamp":1697825214000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Visual Analysis of Scene-Graph-Based Visual Question Answering"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-4034-3453","authenticated-orcid":false,"given":"Noel","family":"Sch\u00e4fer","sequence":"first","affiliation":[{"name":"VISUS, University of Stuttgart, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0799-4293","authenticated-orcid":false,"given":"Sebastian","family":"K\u00fcnzel","sequence":"additional","affiliation":[{"name":"VISUS, University of Stuttgart, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3960-3290","authenticated-orcid":false,"given":"Tanja","family":"Munz-K\u00f6rner","sequence":"additional","affiliation":[{"name":"VISUS, University of Stuttgart, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7503-1413","authenticated-orcid":false,"given":"Pascal","family":"Tilli","sequence":"additional","affiliation":[{"name":"IMS, University of Stuttgart, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3595-5221","authenticated-orcid":false,"given":"Sandeep","family":"Vidyapu","sequence":"additional","affiliation":[{"name":"VISUS, University of Stuttgart, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7893-9147","authenticated-orcid":false,"given":"Ngoc","family":"Thang Vu","sequence":"additional","affiliation":[{"name":"IMS, University of Stuttgart, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1174-1026","authenticated-orcid":false,"given":"Daniel","family":"Weiskopf","sequence":"additional","affiliation":[{"name":"VISUS, University of Stuttgart, Germany"}]}],"member":"320","published-online":{"date-parts":[[2023,10,20]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Proceedings of the IEEE International Conference on Computer Vision. 2425\u20132433","author":"Antol S.","unstructured":"S. Antol , A. Agrawal , J. Lu , M. Mitchell , D. Batra , C.\u00a0 L. Zitnick , and D. Parikh . 2015. VQA: Visual question answering . In Proceedings of the IEEE International Conference on Computer Vision. 2425\u20132433 . S. Antol, A. Agrawal, J. Lu, M. Mitchell, D. Batra, C.\u00a0L. Zitnick, and D. Parikh. 2015. VQA: Visual question answering. In Proceedings of the IEEE International Conference on Computer Vision. 2425\u20132433."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3137605"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCG.2018.042731661"},{"key":"e_1_3_2_2_5_1","unstructured":"V. Damodaran S. Chakravarthy A. Kumar A. Umapathy T. Mitamura Y. Nakashima N. Garcia and C. Chu. 2021. Understanding the role of scene graphs in visual question answering. arXiv 2101.05479 (2021).  V. Damodaran S. Chakravarthy A. Kumar A. Umapathy T. Mitamura Y. Nakashima N. Garcia and C. Chu. 2021. Understanding the role of scene graphs in visual question answering. arXiv 2101.05479 (2021)."},{"key":"e_1_3_2_2_6_1","unstructured":"M. Danilevsky K. Qian R. Aharonov Y. Katsis B. Kawas and P. Sen. 2020. A survey of the state of explainable AI for natural language processing. In Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing. Association for Computational Linguistics 447\u2013459.  M. Danilevsky K. Qian R. Aharonov Y. Katsis B. Kawas and P. Sen. 2020. A survey of the state of explainable AI for natural language processing. In Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing. Association for Computational Linguistics 447\u2013459."},{"key":"e_1_3_2_2_7_1","volume-title":"2018 41st International Convention on Information and Communication Technology, Electronics and Microelectronics. IEEE, 0210\u20130215","author":"Do\u0161ilovi\u0107 K.","unstructured":"F.\u00a0 K. Do\u0161ilovi\u0107 , M. Br\u010di\u0107 , and N. Hlupi\u0107 . 2018. Explainable artificial intelligence: A survey . In 2018 41st International Convention on Information and Communication Technology, Electronics and Microelectronics. IEEE, 0210\u20130215 . F.\u00a0K. Do\u0161ilovi\u0107, M. Br\u010di\u0107, and N. Hlupi\u0107. 2018. Explainable artificial intelligence: A survey. In 2018 41st International Convention on Information and Communication Technology, Electronics and Microelectronics. IEEE, 0210\u20130215."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1186\/s42492-021-00090-0"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cag.2018.09.018"},{"key":"e_1_3_2_2_10_1","volume-title":"New Trends in Computer Graphics: Proceedings of CG International\u201988","author":"Gervautz M.","unstructured":"M. Gervautz and W. Purgathofer . 1988. A simple method for color quantization: Octree quantization . In New Trends in Computer Graphics: Proceedings of CG International\u201988 . Springer, 219\u2013231. M. Gervautz and W. Purgathofer. 1988. A simple method for color quantization: Octree quantization. In New Trends in Computer Graphics: Proceedings of CG International\u201988. Springer, 219\u2013231."},{"key":"e_1_3_2_2_11_1","volume-title":"Generating Natural Language Explanations for Visual Question Answering using Scene Graphs and Visual Attention. arXiv","author":"Ghosh S.","year":"1902","unstructured":"S. Ghosh , G. Burachas , A. Ray , and A.Ziskind. 2019. Generating Natural Language Explanations for Visual Question Answering using Scene Graphs and Visual Attention. arXiv 1902 .05715 (2019). S. Ghosh, G. Burachas, A. Ray, and A.Ziskind. 2019. Generating Natural Language Explanations for Visual Question Answering using Scene Graphs and Visual Attention. arXiv 1902.05715 (2019)."},{"key":"e_1_3_2_2_12_1","unstructured":"Y. Goyal A. Mohapatra D. Parikh and D. Batra. 2016. Towards transparent AI systems: Interpreting visual question answering models. arXiv: 1608.08974 (2016).  Y. Goyal A. Mohapatra D. Parikh and D. Batra. 2016. Towards transparent AI systems: Interpreting visual question answering models. arXiv: 1608.08974 (2016)."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2018.2843369"},{"key":"e_1_3_2_2_14_1","unstructured":"Z. Huang Z. Zeng B. Liu D. Fu and J. Fu. 2020. Pixel-BERT: Aligning image pixels with text by deep multi-modal transformers. arXiv 2004.00849 (2020).  Z. Huang Z. Zeng B. Liu D. Fu and J. Fu. 2020. Pixel-BERT: Aligning image pixels with text by deep multi-modal transformers. arXiv 2004.00849 (2020)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00686"},{"key":"e_1_3_2_2_16_1","volume-title":"Proceedings of the 2015 IEEE Conference on Computer Vision and Pattern Recognition. Institute of Electrical and Electronics Engineers (IEEE), 3668\u20133678","author":"Johnson J.","unstructured":"J. Johnson , R. Krishna , M. Stark , L. Li , D.\u00a0 A. Shamma , M.\u00a0 S. Bernstein , and L. Fei-Fei . 2015. Image retrieval using scene graphs . In Proceedings of the 2015 IEEE Conference on Computer Vision and Pattern Recognition. Institute of Electrical and Electronics Engineers (IEEE), 3668\u20133678 . J. Johnson, R. Krishna, M. Stark, L. Li, D.\u00a0A. Shamma, M.\u00a0S. Bernstein, and L. Fei-Fei. 2015. Image retrieval using scene graphs. In Proceedings of the 2015 IEEE Conference on Computer Vision and Pattern Recognition. Institute of Electrical and Electronics Engineers (IEEE), 3668\u20133678."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2017.2744718"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"crossref","unstructured":"D. Keim F. Mansmann J. Schneidewind J. Thomas and H. Ziegler. 2008. Visual analytics: Scope and challenges. In Visual Data Mining: Theory Techniques and Tools for Visual Analytics S.\u00a0J. Simoff M.\u00a0H. B\u00f6hlen and A.\u00a0Mazeika (Eds.). Springer Berlin Heidelberg 76\u201390.  D. Keim F. Mansmann J. Schneidewind J. Thomas and H. Ziegler. 2008. Visual analytics: Scope and challenges. In Visual Data Mining: Theory Techniques and Tools for Visual Analytics S.\u00a0J. Simoff M.\u00a0H. B\u00f6hlen and A.\u00a0Mazeika (Eds.). Springer Berlin Heidelberg 76\u201390.","DOI":"10.1007\/978-3-540-71080-6_6"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.3390\/s21217164"},{"key":"e_1_3_2_2_20_1","volume-title":"Proceedings of the Third Workshop on Multimodal Artificial Intelligence. Association for Computational Linguistics, 79\u201386","author":"Liang W.","unstructured":"W. Liang , Y. Jiang , and Z. Liu . 2021. GraphVQA: Language-Guided Graph Neural Networks for Graph-based Visual Question Answering . In Proceedings of the Third Workshop on Multimodal Artificial Intelligence. Association for Computational Linguistics, 79\u201386 . W. Liang, Y. Jiang, and Z. Liu. 2021. GraphVQA: Language-Guided Graph Neural Networks for Graph-based Visual Question Answering. In Proceedings of the Third Workshop on Multimodal Artificial Intelligence. Association for Computational Linguistics, 79\u201386."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.visinf.2017.01.006"},{"key":"e_1_3_2_2_22_1","volume-title":"Proceedings of the 2017 IEEE Conference on Visual Analytics Science and Technology. 13\u201324","author":"Ming Y.","unstructured":"Y. Ming , S. Cao , R. Zhang , Z. Li , Y. Chen , Y. Song , and H. Qu . 2017. Understanding hidden memories of recurrent neural networks . In Proceedings of the 2017 IEEE Conference on Visual Analytics Science and Technology. 13\u201324 . Y. Ming, S. Cao, R. Zhang, Z. Li, Y. Chen, Y. Song, and H. Qu. 2017. Understanding hidden memories of recurrent neural networks. In Proceedings of the 2017 IEEE Conference on Visual Analytics Science and Technology. 13\u201324."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cag.2021.12.003"},{"key":"e_1_3_2_2_24_1","unstructured":"W. Norcliffe-Brown S. Vafeias and S. Parisot. 2018. Learning conditioned graph structures for interpretable visual question answering. In Advances in Neural Information Processing Systems S.\u00a0Bengio H.\u00a0Wallach H.\u00a0Larochelle K.\u00a0Grauman N.\u00a0Cesa-Bianchi and R.\u00a0Garnett (Eds.). Vol.\u00a031. Curran Associates Inc.  W. Norcliffe-Brown S. Vafeias and S. Parisot. 2018. Learning conditioned graph structures for interpretable visual question answering. In Advances in Neural Information Processing Systems S.\u00a0Bengio H.\u00a0Wallach H.\u00a0Larochelle K.\u00a0Grauman N.\u00a0Cesa-Bianchi and R.\u00a0Garnett (Eds.). Vol.\u00a031. Curran Associates Inc."},{"key":"e_1_3_2_2_25_1","volume-title":"Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics, 1532\u20131543","author":"Pennington J.","unstructured":"J. Pennington , R. Socher , and C. Manning . 2014. GloVe: Global Vectors for Word Representation . In Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics, 1532\u20131543 . J. Pennington, R. Socher, and C. Manning. 2014. GloVe: Global Vectors for Word Representation. In Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics, 1532\u20131543."},{"key":"e_1_3_2_2_26_1","volume-title":"Proceedings of the NIPS 2017 Workshop on Visually-Grounded Interaction and Language.","author":"Rajani F.","year":"2017","unstructured":"N.\u00a0 F. Rajani and R.\u00a0 J. Mooney . 2017 . Ensembling Visual Explanations for VQA . In Proceedings of the NIPS 2017 Workshop on Visually-Grounded Interaction and Language. N.\u00a0F. Rajani and R.\u00a0J. Mooney. 2017. Ensembling Visual Explanations for VQA. In Proceedings of the NIPS 2017 Workshop on Visually-Grounded Interaction and Language."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1002\/ail2.51"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.18419\/darus-3589"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2017.2744158"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2020.2978284"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-32236-6_51"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-020-0191-7"},{"key":"e_1_3_2_2_33_1","unstructured":"R. Yusuf J. Owusu H. Wang K. Qin Z. Lawal and Y. Dong. 2022. VQA and visual reasoning: An overview of recent datasets methods and challenges. arXiv 2212.13296 (2022).  R. Yusuf J. Owusu H. Wang K. Qin Z. Lawal and Y. Dong. 2022. VQA and visual reasoning: An overview of recent datasets methods and challenges. arXiv 2212.13296 (2022)."}],"event":{"name":"VINCI 2023: The 16th International Symposium on Visual Information Communication and Interaction","location":"Guangzhou China","acronym":"VINCI 2023"},"container-title":["Proceedings of the 16th International Symposium on Visual Information Communication and Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3615522.3615547","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3615522.3615547","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:45:46Z","timestamp":1750178746000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3615522.3615547"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,22]]},"references-count":32,"alternative-id":["10.1145\/3615522.3615547","10.1145\/3615522"],"URL":"https:\/\/doi.org\/10.1145\/3615522.3615547","relation":{},"subject":[],"published":{"date-parts":[[2023,9,22]]},"assertion":[{"value":"2023-10-20","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}