{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T19:09:38Z","timestamp":1771960178438,"version":"3.50.1"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031703409","type":"print"},{"value":"9783031703416","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70341-6_26","type":"book-chapter","created":{"date-parts":[[2024,8,30]],"date-time":"2024-08-30T20:26:39Z","timestamp":1725049599000},"page":"440-460","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["CircuitVQA: A Visual Question Answering Dataset for\u00a0Electrical Circuit Images"],"prefix":"10.1007","author":[{"given":"Rahul","family":"Mehta","sequence":"first","affiliation":[]},{"given":"Bhavyajeet","family":"Singh","sequence":"additional","affiliation":[]},{"given":"Vasudeva","family":"Varma","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2843-3110","authenticated-orcid":false,"given":"Manish","family":"Gupta","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,22]]},"reference":[{"key":"26_CR1","doi-asserted-by":"crossref","unstructured":"Alberti, C., Ling, J., Collins, M., Reitter, D.: Fusion of detected objects in text for visual question answering. In: EMNLP-IJCNLP, pp. 2131\u20132140 (2019)","DOI":"10.18653\/v1\/D19-1219"},{"key":"26_CR2","doi-asserted-by":"crossref","unstructured":"Andreas, J., Rohrbach, M., Darrell, T., Klein, D.: Neural module networks. In: CVPR, pp. 39\u201348 (2016)","DOI":"10.1109\/CVPR.2016.12"},{"key":"26_CR3","doi-asserted-by":"crossref","unstructured":"Antol, S., et al.: VQA: visual question answering. In: ICCV, pp. 2425\u20132433 (2015)","DOI":"10.1109\/ICCV.2015.279"},{"key":"26_CR4","doi-asserted-by":"crossref","unstructured":"Babkin, P., et al.: BizGraphQA: a dataset for image-based inference over graph-structured diagrams from business domains. In: SIGIR, pp. 2691\u20132700 (2023)","DOI":"10.1145\/3539618.3591875"},{"key":"26_CR5","doi-asserted-by":"crossref","unstructured":"Chaudhry, R., Shekhar, S., Gupta, U., Maneriker, P., Bansal, P., Joshi, A.: Leaf-QA: locate, encode & attend for figure question answering. In: WACV, pp. 3512\u20133521 (2020)","DOI":"10.1109\/WACV45572.2020.9093269"},{"key":"26_CR6","unstructured":"Chiang, W.L., et al.: Vicuna: an open-source chatbot impressing GPT-4 with 90%* ChatGPT quality (2023). https:\/\/lmsys.org\/blog\/2023-03-30-vicuna\/"},{"key":"26_CR7","unstructured":"Dai, W., et al.: InstructBLIP: towards general-purpose vision-language models with instruction tuning (2023)"},{"key":"26_CR8","doi-asserted-by":"crossref","unstructured":"Das, A., et al.: Visual dialog. In: CVPR, pp. 326\u2013335 (2017)","DOI":"10.1109\/CVPR.2017.121"},{"key":"26_CR9","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. In: NAACL-HLT, vol.\u00a01, p.\u00a02 (2019)"},{"key":"26_CR10","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. In: ICLR (2020)"},{"key":"26_CR11","doi-asserted-by":"crossref","unstructured":"Garcia, N., Otani, M., Chu, C., Nakashima, Y.: Knowit VQA: answering knowledge-based questions about videos. In: AAAI, vol.\u00a034, pp. 10826\u201310834 (2020)","DOI":"10.1609\/aaai.v34i07.6713"},{"key":"26_CR12","doi-asserted-by":"crossref","unstructured":"Geman, D., Geman, S., Hallonquist, N., Younes, L.: Visual Turing test for computer vision systems. Proc. Natl. Acad. Sci. 112(12), 3618\u20133623 (2015)","DOI":"10.1073\/pnas.1422953112"},{"key":"26_CR13","doi-asserted-by":"crossref","unstructured":"Gupta, P., Gupta, M.: NewsKVQA: knowledge-aware news video question answering. In: Pacific-Asia Conference on Knowledge Discovery and Data Mining, pp. 3\u201315 (2022)","DOI":"10.1007\/978-3-031-05981-0_1"},{"key":"26_CR14","doi-asserted-by":"crossref","unstructured":"Hu, R., Andreas, J., Rohrbach, M., Darrell, T., Saenko, K.: Learning to reason: end-to-end module networks for visual question answering. In: ICCV, pp. 804\u2013813 (2017)","DOI":"10.1109\/ICCV.2017.93"},{"key":"26_CR15","unstructured":"Huang, G., et\u00a0al.: Machine learning for electronic design automation: a survey. Trans. Des. Autom. Electron. Syst. (TODAES) 26(5), 1\u201346 (2021)"},{"issue":"12","key":"26_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3571730","volume":"55","author":"Z Ji","year":"2023","unstructured":"Ji, Z., et al.: Survey of hallucination in natural language generation. ACM Comput. Surv. 55(12), 1\u201338 (2023)","journal-title":"ACM Comput. Surv."},{"key":"26_CR17","doi-asserted-by":"crossref","unstructured":"Kafle, K., Price, B., Cohen, S., Kanan, C.: DVQA: understanding data visualizations via question answering. In: CVPR, pp. 5648\u20135656 (2018)","DOI":"10.1109\/CVPR.2018.00592"},{"key":"26_CR18","unstructured":"Kahou, S.E., Michalski, V., Atkinson, A., K\u00e1d\u00e1r, \u00c1., Trischler, A., Bengio, Y.: FigureQA: an annotated figure dataset for visual reasoning. arXiv:1710.07300 (2017)"},{"key":"26_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1007\/978-3-319-46493-0_15","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Kembhavi","year":"2016","unstructured":"Kembhavi, A., Salvato, M., Kolve, E., Seo, M., Hajishirzi, H., Farhadi, A.: A diagram is worth a dozen images. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 235\u2013251. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_15"},{"key":"26_CR20","doi-asserted-by":"crossref","unstructured":"Kembhavi, A., Seo, M., Schwenk, D., Choi, J., Farhadi, A., Hajishirzi, H.: Are you smarter than a sixth grader? Textbook question answering for multimodal machine comprehension. In: CVPR, pp. 4999\u20135007 (2017)","DOI":"10.1109\/CVPR.2017.571"},{"issue":"1","key":"26_CR21","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna, R., et al.: Visual genome: connecting language and vision using crowdsourced dense image annotations. IJCV 123(1), 32\u201373 (2017)","journal-title":"IJCV"},{"key":"26_CR22","unstructured":"Lee, K., et al.: Pix2struct: screenshot parsing as pretraining for visual language understanding. In: ICML, pp. 18893\u201318912 (2023)"},{"key":"26_CR23","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: Blip: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: ICML, pp. 12888\u201312900. PMLR (2022)"},{"key":"26_CR24","doi-asserted-by":"crossref","unstructured":"Li, Y., Du, Y., Zhou, K., Wang, J., Zhao, W.X., Wen, J.R.: Evaluating object hallucination in large vision-language models. In: EMNLP, pp. 292\u2013305 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.20"},{"key":"26_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"26_CR26","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. In: NeuRIPS, vol. 36 (2024)"},{"key":"26_CR27","first-page":"2507","volume":"35","author":"P Lu","year":"2022","unstructured":"Lu, P., et al.: Learn to explain: multimodal reasoning via thought chains for science question answering. NeuRIPS 35, 2507\u20132521 (2022)","journal-title":"NeuRIPS"},{"key":"26_CR28","doi-asserted-by":"crossref","unstructured":"Masry, A., Do, X.L., Tan, J.Q., Joty, S., Hoque, E.: ChartQA: a benchmark for question answering about charts with visual and logical reasoning. In: ACL, pp. 2263\u20132279 (2022)","DOI":"10.18653\/v1\/2022.findings-acl.177"},{"key":"26_CR29","doi-asserted-by":"crossref","unstructured":"Methani, N., Ganguly, P., Khapra, M.M., Kumar, P.: PlotQA: reasoning over scientific plots. In: WACV, pp. 1527\u20131536 (2020)","DOI":"10.1109\/WACV45572.2020.9093523"},{"key":"26_CR30","unstructured":"OpenAI: ChatGPT. https:\/\/chat.openai.com\/"},{"key":"26_CR31","doi-asserted-by":"publisher","unstructured":"OpenAI, et\u00a0al.: GPT-4 technical report. arXiv preprint arXiv:2303.08774 (2023). https:\/\/doi.org\/10.48550\/arXiv.2303.08774","DOI":"10.48550\/arXiv.2303.08774"},{"key":"26_CR32","doi-asserted-by":"crossref","unstructured":"Penamakuri, A.S., Gupta, M., Gupta, M.D., Mishra, A.: Answer mining from a pool of images: towards retrieval-based visual question answering. In: Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence, pp. 1312\u20131321 (2023)","DOI":"10.24963\/ijcai.2023\/146"},{"key":"26_CR33","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: ICML, pp. 8748\u20138763. PMLR (2021)"},{"key":"26_CR34","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: unified, real-time object detection. In: CVPR, pp. 779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"26_CR35","doi-asserted-by":"crossref","unstructured":"Rohrbach, A., Hendricks, L.A., Burns, K., Darrell, T., Saenko, K.: Object hallucination in image captioning. In: EMNLP, pp. 4035\u20134045 (2018)","DOI":"10.18653\/v1\/D18-1437"},{"key":"26_CR36","doi-asserted-by":"crossref","unstructured":"Singh, A., et al.: Towards VQA models that can read. In: CVPR, pp. 8317\u20138326 (2019)","DOI":"10.1109\/CVPR.2019.00851"},{"key":"26_CR37","doi-asserted-by":"crossref","unstructured":"Singh, A., Pang, G., Toh, M., Huang, J., Galuba, W., Hassner, T.: TextOCR: towards large-scale end-to-end reasoning for arbitrary-shaped scene text. In: CVPR, pp. 8802\u20138812 (2021)","DOI":"10.1109\/CVPR46437.2021.00869"},{"key":"26_CR38","doi-asserted-by":"crossref","unstructured":"Thoma, F., Bayer, J., Li, Y., Dengel, A.: A public ground-truth dataset for handwritten circuit diagram images. In: ICDAR, pp. 20\u201327 (2021)","DOI":"10.1007\/978-3-030-86198-8_2"},{"key":"26_CR39","unstructured":"Touvron, H., et\u00a0al.: LLAMA: open and efficient foundation language models. arXiv:2302.13971 (2023)"},{"key":"26_CR40","unstructured":"Wang, J., et al.: GIT: a generative image-to-text transformer for vision and language. TMLR (2022)"},{"key":"26_CR41","unstructured":"Yang, Z., Li, L., Lin, K., Wang, J., Lin, C.C., Liu, Z., Wang, L.: The dawn of LMMS: preliminary explorations with GPT-4v (ision). arXiv preprint arXiv:2309.17421$$\\textbf{9}$$(1), 1 (2023)"},{"key":"26_CR42","doi-asserted-by":"crossref","unstructured":"Yang, Z., He, X., Gao, J., Deng, L., Smola, A.: Stacked attention networks for image question answering. In: CVPR, pp. 21\u201329 (2016)","DOI":"10.1109\/CVPR.2016.10"},{"key":"26_CR43","unstructured":"Zauner, C.: Implementation and benchmarking of perceptual image hash functions. https:\/\/phash.org\/docs\/pubs\/thesis_zauner.pdf. Accessed 19 Feb 2024"},{"key":"26_CR44","doi-asserted-by":"crossref","unstructured":"Zeng, K.H., Chen, T.H., Chuang, C.Y., Liao, Y.H., Niebles, J.C., Sun, M.: Leveraging video descriptions to learn video question answering. In: AAAI, vol.\u00a031 (2017)","DOI":"10.1609\/aaai.v31i1.11238"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70341-6_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,30]],"date-time":"2024-08-30T20:31:09Z","timestamp":1725049869000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70341-6_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031703409","9783031703416"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70341-6_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"22 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vilnius","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lithuania","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2024.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}