{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T08:24:26Z","timestamp":1774599866112,"version":"3.50.1"},"publisher-location":"Cham","reference-count":48,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030585884","type":"print"},{"value":"9783030585891","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58589-1_23","type":"book-chapter","created":{"date-parts":[[2020,11,11]],"date-time":"2020-11-11T06:18:04Z","timestamp":1605075484000},"page":"379-396","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":39,"title":["VQA-LOL: Visual Question Answering Under the Lens of Logic"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5593-2804","authenticated-orcid":false,"given":"Tejas","family":"Gokhale","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5634-410X","authenticated-orcid":false,"given":"Pratyay","family":"Banerjee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7549-723X","authenticated-orcid":false,"given":"Chitta","family":"Baral","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0126-8976","authenticated-orcid":false,"given":"Yezhou","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,11,12]]},"reference":[{"key":"23_CR1","doi-asserted-by":"crossref","unstructured":"Aditya, S., Yang, Y., Baral, C.: Integrating knowledge and reasoning in image understanding. In: Proceedings of the 28th International Joint Conference on Artificial Intelligence, IJCAI 2019, pp. 6252\u20136259. AAAI Press (2019). http:\/\/dl.acm.org\/citation.cfm?id=3367722.3367926","DOI":"10.24963\/ijcai.2019\/873"},{"key":"23_CR2","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and visual question answering. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"23_CR3","doi-asserted-by":"crossref","unstructured":"Antol, S., et al.: VQA: visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2425\u20132433 (2015)","DOI":"10.1109\/ICCV.2015.279"},{"key":"23_CR4","doi-asserted-by":"crossref","unstructured":"Asai, A., Hajishirzi, H.: Logic-guided data augmentation and regularization for consistent question answering. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 5642\u20135650. Association for Computational Linguistics (2020). https:\/\/www.aclweb.org\/anthology\/2020.acl-main.499","DOI":"10.18653\/v1\/2020.acl-main.499"},{"key":"23_CR5","doi-asserted-by":"crossref","unstructured":"Bhattacharya, N., Li, Q., Gurari, D.: Why does a visual question have different answers? In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4271\u20134280 (2019)","DOI":"10.1109\/ICCV.2019.00437"},{"key":"23_CR6","unstructured":"Bobrow, D.G.: Natural language input for a computer problem solving system (1964)"},{"key":"23_CR7","doi-asserted-by":"crossref","unstructured":"Boole, G.: An investigation of the laws of thought: on which are founded themathematical theories of logic and probabilities. Dover Publications (1854)","DOI":"10.5962\/bhl.title.29413"},{"key":"23_CR8","unstructured":"Bordes, A., Usunier, N., Garcia-Duran, A., Weston, J., Yakhnenko, O.: Translating embeddings for modeling multi-relational data. In: Advances in Neural Information Processing Systems, pp. 2787\u20132795 (2013)"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Bowman, S.R., Potts, C., Manning, C.D.: Recursive neural networks can learn logical semantics. arXiv preprint arXiv:1406.1827 (2014)","DOI":"10.18653\/v1\/W15-4002"},{"key":"23_CR10","volume-title":"Conceptual Change in Childhood","author":"S Carey","year":"1985","unstructured":"Carey, S.: Conceptual Change in Childhood. MIT Press, Cambridge (1985)"},{"key":"23_CR11","doi-asserted-by":"publisher","unstructured":"Cesana-Arlotti, N., Mart\u00edn, A., T\u00e9gl\u00e1s, E., Vorobyova, L., Cetnarski, R., Bonatti, L.L.: Precursors of logical reasoning in preverbal human infants. Science 359(6381), 1263\u20131266 (2018).https:\/\/doi.org\/10.1126\/science.aao3539. https:\/\/science.sciencemag.org\/content\/359\/6381\/1263","DOI":"10.1126\/science.aao3539"},{"issue":"4","key":"23_CR12","doi-asserted-by":"publisher","first-page":"696","DOI":"10.2307\/2272415","volume":"37","author":"J Corcoran","year":"1972","unstructured":"Corcoran, J.: Completeness of an ancient logic. J. Symb. Logic 37(4), 696\u2013702 (1972)","journal-title":"J. Symb. Logic"},{"key":"23_CR13","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"23_CR14","doi-asserted-by":"crossref","unstructured":"Fang, Z., Gokhale, T., Banerjee, P., Baral, C., Yang, Y.: Video2commonsense: generating commonsense descriptions to enrich video captioning. arXiv preprint arXiv:2003.05162 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.61"},{"issue":"25","key":"23_CR15","doi-asserted-by":"publisher","first-page":"379","DOI":"10.4064\/fm-25-1-379-387","volume":"1","author":"M Fr\u00e9chet","year":"1935","unstructured":"Fr\u00e9chet, M.: G\u00e9n\u00e9ralisation du th\u00e9oreme des probabilit\u00e9s totales. Fundamenta Mathematicae 1(25), 379\u2013387 (1935)","journal-title":"Fundamenta Mathematicae"},{"key":"23_CR16","unstructured":"Gopnik, A., Meltzoff, A.N., Kuhl, P.K.: The Scientist in the Crib: Minds, Brains, and How Children Learn. William Morrow & Co (1999)"},{"key":"23_CR17","doi-asserted-by":"crossref","unstructured":"Goyal, Y., Khot, T., Summers-Stay, D., Batra, D., Parikh, D.: Making the V in VQA matter: elevating the role of image understanding in visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6904\u20136913 (2017)","DOI":"10.1109\/CVPR.2017.670"},{"key":"23_CR18","unstructured":"Hegel, G.W.F.: Hegel\u2019s science of logic (1929)"},{"key":"23_CR19","doi-asserted-by":"crossref","unstructured":"Horn, L.R., Kato, Y.: Negation and Polarity: Syntactic and Semantic Perspectives. OUP, Oxford (2000)","DOI":"10.1093\/oso\/9780198238744.001.0001"},{"key":"23_CR20","unstructured":"Hudson, D.A., Manning, C.D.: GQA: a new dataset for compositional question answering over real-world images. arXiv preprint arXiv:1902.09506 (2019)"},{"key":"23_CR21","doi-asserted-by":"crossref","unstructured":"Johnson, J., Hariharan, B., van der Maaten, L., Fei-Fei, L., Lawrence Zitnick, C., Girshick, R.: Clevr: a diagnostic dataset for compositional language and elementary visual reasoning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2901\u20132910 (2017)","DOI":"10.1109\/CVPR.2017.215"},{"key":"23_CR22","unstructured":"Kassner, N., Sch\u00fctze, H.: Negated lama: birds cannot fly. arXiv preprint arXiv:1911.03343 (2019)"},{"key":"23_CR23","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"23_CR24","doi-asserted-by":"publisher","unstructured":"Lewis, M., Steedman, M.: Combined distributional and logical semantics. Trans. Assoc. Comput. Linguist. 1, 179\u2013192 (2013). https:\/\/doi.org\/10.1162\/tacl_a_00219. https:\/\/www.aclweb.org\/anthology\/Q13-1015","DOI":"10.1162\/tacl_a_00219"},{"key":"23_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"23_CR26","unstructured":"Liu, Y., et al.: Roberta: a robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"23_CR27","unstructured":"Lu, J., Batra, D., Parikh, D., Lee, S.: Vilbert: pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. In: Advances in Neural Information Processing Systems, pp. 13\u201323 (2019)"},{"key":"23_CR28","unstructured":"Malinowski, M., Fritz, M.: A multi-world approach to question answering about real-world scenes based on uncertain input. In: Advances in Neural Information Processing Systems, pp. 1682\u20131690 (2014)"},{"key":"23_CR29","unstructured":"Mao, J., Gan, C., Kohli, P., Tenenbaum, J.B., Wu, J.: The neuro-symbolic concept learner: interpreting scenes, words, and sentences from natural supervision. In: International Conference on Learning Representations (2019). https:\/\/openreview.net\/forum?id=rJgMlhRctm"},{"key":"23_CR30","doi-asserted-by":"crossref","unstructured":"Mintz, M., Bills, S., Snow, R., Jurafsky, D.: Distant supervision for relation extraction without labeled data. In: Proceedings of the Joint Conference of the 47th Annual Meeting of the ACL and the 4th International Joint Conference on Natural Language Processing of the AFNLP, vol. 2, pp. 1003\u20131011. Association for Computational Linguistics (2009)","DOI":"10.3115\/1690219.1690287"},{"issue":"2","key":"23_CR31","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1162\/COLI_a_00095","volume":"38","author":"R Morante","year":"2012","unstructured":"Morante, R., Sporleder, C.: Modality and negation: an introduction to the special issue. Comput. Linguist. 38(2), 223\u2013260 (2012)","journal-title":"Comput. Linguist."},{"key":"23_CR32","doi-asserted-by":"crossref","unstructured":"Neelakantan, A., Roth, B., McCallum, A.: Compositional vector space models for knowledge base completion. arXiv preprint arXiv:1504.06662 (2015)","DOI":"10.3115\/v1\/P15-1016"},{"key":"23_CR33","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.: Glove: global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"23_CR34","unstructured":"Piattelli-Palmarini, M.: Language and learning: the debate between jean piaget and noam chomsky (1980)"},{"key":"23_CR35","unstructured":"Raju, P.: The principle of four-cornered negation in Indian philosophy. Rev. Metaphys. 694\u2013713 (1954)"},{"key":"23_CR36","unstructured":"Ramshaw, L., Marcus, M.: Text chunking using transformation-based learning. In: Third Workshop on Very Large Corpora (1995). https:\/\/www.aclweb.org\/anthology\/W95-0107"},{"key":"23_CR37","unstructured":"Ren, M., Kiros, R., Zemel, R.: Exploring models and data for image question answering. In: Advances in Neural Information Processing Systems, pp. 2953\u20132961 (2015)"},{"key":"23_CR38","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, pp. 91\u201399 (2015)"},{"key":"23_CR39","unstructured":"Riedel, S., Yao, L., McCallum, A., Marlin, B.M.: Relation extraction with matrix factorization and universal schemas. In: Proceedings of the 2013 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Atlanta, Georgia, pp. 74\u201384. Association for Computational Linguistics, June 2013. https:\/\/www.aclweb.org\/anthology\/N13-1008"},{"key":"23_CR40","doi-asserted-by":"crossref","unstructured":"Rockt\u00e4schel, T., Bo\u0161njak, M., Singh, S., Riedel, S.: Low-dimensional embeddings of logic. In: Proceedings of the ACL 2014 Workshop on Semantic Parsing, pp. 45\u201349 (2014)","DOI":"10.3115\/v1\/W14-2409"},{"key":"23_CR41","unstructured":"Socher, R., Chen, D., Manning, C.D., Ng, A.: Reasoning with neural tensor networks for knowledge base completion. In: Advances in Neural Information Processing Systems, pp. 926\u2013934 (2013)"},{"key":"23_CR42","unstructured":"Spinoza, B.D.: Ethics, translated by andrew boyle, introduction by ts gregory (1934)"},{"key":"23_CR43","doi-asserted-by":"crossref","unstructured":"Tan, H., Bansal, M.: Lxmert: learning cross-modality encoder representations from transformers. arXiv preprint arXiv:1908.07490 (2019)","DOI":"10.18653\/v1\/D19-1514"},{"key":"23_CR44","unstructured":"Wittgenstein, L.: Tractatus Logico-Philosophicus. Routledge (1921)"},{"key":"23_CR45","doi-asserted-by":"crossref","unstructured":"Yu, Z., Yu, J., Cui, Y., Tao, D., Tian, Q.: Deep modular co-attention networks for visual question answering. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2019","DOI":"10.1109\/CVPR.2019.00644"},{"key":"23_CR46","doi-asserted-by":"crossref","unstructured":"Zellers, R., Bisk, Y., Farhadi, A., Choi, Y.: From recognition to cognition: visual commonsense reasoning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6720\u20136731 (2019)","DOI":"10.1109\/CVPR.2019.00688"},{"key":"23_CR47","unstructured":"Zettlemoyer, L.S., Collins, M.: Learning to map sentences to logical form: structured classification with probabilistic categorial grammars. arXiv preprint arXiv:1207.1420 (2012)"},{"key":"23_CR48","doi-asserted-by":"crossref","unstructured":"Zhou, L., Palangi, H., Zhang, L., Hu, H., Corso, J.J., Gao, J.: Unified vision-language pre-training for image captioning and VQA. In: AAAI, pp. 13041\u201313049 (2020)","DOI":"10.1609\/aaai.v34i07.7005"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58589-1_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,11]],"date-time":"2024-11-11T00:20:43Z","timestamp":1731284443000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58589-1_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585884","9783030585891"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58589-1_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"12 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}