{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T20:16:07Z","timestamp":1780344967779,"version":"3.54.1"},"publisher-location":"Cham","reference-count":66,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031200588","type":"print"},{"value":"9783031200595","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-20059-5_9","type":"book-chapter","created":{"date-parts":[[2022,10,28]],"date-time":"2022-10-28T16:02:50Z","timestamp":1666972970000},"page":"148-166","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":24,"title":["Reliable Visual Question Answering: Abstain Rather Than Answer Incorrectly"],"prefix":"10.1007","author":[{"given":"Spencer","family":"Whitehead","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Suzanne","family":"Petryk","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Vedaad","family":"Shakib","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Joseph","family":"Gonzalez","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Trevor","family":"Darrell","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Anna","family":"Rohrbach","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Marcus","family":"Rohrbach","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,10,29]]},"reference":[{"key":"9_CR1","doi-asserted-by":"crossref","unstructured":"Agrawal, A., Batra, D., Parikh, D., Kembhavi, A.: Don\u2019t just assume; look and answer: overcoming priors for visual question answering. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00522"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6077\u20136086 (2018)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"9_CR3","doi-asserted-by":"crossref","unstructured":"Antol, S., et al.: VQA: visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2425\u20132433 (2015)","DOI":"10.1109\/ICCV.2015.279"},{"issue":"6","key":"9_CR4","doi-asserted-by":"publisher","first-page":"e15154","DOI":"10.2196\/15154","volume":"22","author":"O Asan","year":"2020","unstructured":"Asan, O., Bayrak, A.E., Choudhury, A., et al.: Artificial intelligence and human trust in healthcare: focus on clinicians. J. Med. Internet Res. 22(6), e15154 (2020)","journal-title":"J. Med. Internet Res."},{"key":"9_CR5","doi-asserted-by":"crossref","unstructured":"Bhattacharya, N., Li, Q., Gurari, D.: Why does a visual question have different answers? In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4271\u20134280 (2019)","DOI":"10.1109\/ICCV.2019.00437"},{"key":"9_CR6","unstructured":"Black, E., Leino, K., Fredrikson, M.: Selective ensembles for consistent predictions. In: International Conference on Learning Representations (2022)"},{"key":"9_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1007\/978-3-030-58577-8_7","volume-title":"Computer Vision \u2013 ECCV 2020","author":"YC Chen","year":"2020","unstructured":"Chen, Y.C., et al.: UNITER: UNiversal Image-TExt Representation Learning. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12375, pp. 104\u2013120. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58577-8_7"},{"key":"9_CR8","doi-asserted-by":"crossref","unstructured":"Chiu, T.Y., Zhao, Y., Gurari, D.: Assessing image quality issues for real-world problems. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3646\u20133656 (2020)","DOI":"10.1109\/CVPR42600.2020.00370"},{"issue":"1","key":"9_CR9","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1109\/TIT.1970.1054406","volume":"16","author":"C Chow","year":"1970","unstructured":"Chow, C.: On optimum recognition error and reject tradeoff. IEEE Trans. Inf. Theory 16(1), 41\u201346 (1970)","journal-title":"IEEE Trans. Inf. Theory"},{"issue":"4","key":"9_CR10","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1109\/TEC.1957.5222035","volume":"6","author":"CK Chow","year":"1957","unstructured":"Chow, C.K.: An optimum character recognition system using decision functions. IRE Trans. Electron. Comput. EC 6(4), 247\u2013254 (1957)","journal-title":"IRE Trans. Electron. Comput. EC"},{"key":"9_CR11","unstructured":"Corbi\u00e8re, C., Thome, N., Bar-Hen, A., Cord, M., P\u00e9rez, P.: Addressing failure prediction by learning model confidence. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"9_CR12","doi-asserted-by":"publisher","first-page":"51","DOI":"10.3389\/frai.2020.00051","volume":"3","author":"E Davis","year":"2020","unstructured":"Davis, E.: Unanswerable questions about images and texts. Front. Artif. Intell. 3, 51 (2020)","journal-title":"Front. Artif. Intell."},{"key":"9_CR13","doi-asserted-by":"publisher","unstructured":"De Stefano, C., Sansone, C., Vento, M.: To reject or not to reject: that is the question-an answer in case of neural classifiers. IEEE Trans. Syst. Man, Cybern. Part C (Applications and Reviews) 30(1), 84\u201394 (2000). https:\/\/doi.org\/10.1109\/5326.827457","DOI":"10.1109\/5326.827457"},{"key":"9_CR14","doi-asserted-by":"publisher","unstructured":"Dong, L., Quirk, C., Lapata, M.: Confidence modeling for neural semantic parsing. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 743\u2013753. Association for Computational Linguistics, Melbourne, Australia (2018). https:\/\/doi.org\/10.18653\/v1\/P18-1069, https:\/\/aclanthology.org\/P18-1069","DOI":"10.18653\/v1\/P18-1069"},{"key":"9_CR15","first-page":"1605","volume":"11","author":"R El-Yaniv","year":"2010","unstructured":"El-Yaniv, R., Wiener, Y.: On the foundations of noise-free selective classification. J. Mach. Learn. Res. 11, 1605\u20131641 (2010)","journal-title":"J. Mach. Learn. Res."},{"key":"9_CR16","doi-asserted-by":"crossref","unstructured":"Fukui, A., Park, D.H., Yang, D., Rohrbach, A., Darrell, T., Rohrbach, M.: Multimodal compact bilinear pooling for visual question answering and visual grounding. In: EMNLP (2016)","DOI":"10.18653\/v1\/D16-1044"},{"key":"9_CR17","doi-asserted-by":"crossref","unstructured":"Gao, P., et al.: Dynamic fusion with intra-and inter-modality attention flow for visual question answering. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00680"},{"key":"9_CR18","unstructured":"Geifman, Y., El-Yaniv, R.: Selective classification for deep neural networks. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"9_CR19","unstructured":"Geifman, Y., El-Yaniv, R.: SelectiveNet: a deep neural network with an integrated reject option. In: International Conference on Machine Learning, pp. 2151\u20132159. PMLR (2019)"},{"key":"9_CR20","doi-asserted-by":"crossref","unstructured":"Goyal, Y., Khot, T., Summers-Stay, D., Batra, D., Parikh, D.: Making the V in VQA matter: elevating the role of image understanding in visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6904\u20136913 (2017)","DOI":"10.1109\/CVPR.2017.670"},{"key":"9_CR21","doi-asserted-by":"crossref","unstructured":"Guillory, D., Shankar, V., Ebrahimi, S., Darrell, T., Schmidt, L.: Predicting with confidence on unseen distributions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00117"},{"issue":"22","key":"9_CR22","doi-asserted-by":"publisher","first-page":"2402","DOI":"10.1001\/jama.2016.17216","volume":"316","author":"V Gulshan","year":"2016","unstructured":"Gulshan, V., et al.: Development and validation of a deep learning algorithm for detection of diabetic retinopathy in retinal fundus photographs. JAMA 316(22), 2402\u20132410 (2016). https:\/\/doi.org\/10.1001\/jama.2016.17216","journal-title":"JAMA"},{"key":"9_CR23","unstructured":"Guo, C., Pleiss, G., Sun, Y., Weinberger, K.Q.: On calibration of modern neural networks. In: International Conference on Machine Learning, pp. 1321\u20131330. PMLR (2017)"},{"key":"9_CR24","doi-asserted-by":"crossref","unstructured":"Gurari, D., et al.: VizWiz grand challenge: answering visual questions from blind people. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3608\u20133617 (2018)","DOI":"10.1109\/CVPR.2018.00380"},{"issue":"17","key":"9_CR25","doi-asserted-by":"publisher","first-page":"1889","DOI":"10.1093\/bioinformatics\/btn349","volume":"24","author":"B Hanczar","year":"2008","unstructured":"Hanczar, B., Dougherty, E.R.: Classification with reject option in gene expression data. Bioinformatics 24(17), 1889\u20131895 (2008)","journal-title":"Bioinformatics"},{"key":"9_CR26","doi-asserted-by":"crossref","unstructured":"Hendricks, L.A., Burns, K., Saenko, K., Darrell, T., Rohrbach, A.: Women also snowboard: overcoming bias in captioning models. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 771\u2013787 (2018)","DOI":"10.1007\/978-3-030-01219-9_47"},{"key":"9_CR27","unstructured":"Hendrycks, D., Gimpel, K.: A baseline for detecting misclassified and out-of-distribution examples in neural networks. In: Proceedings of International Conference on Learning Representations (2017)"},{"key":"9_CR28","doi-asserted-by":"crossref","unstructured":"Hudson, D.A., Manning, C.D.: GQA: a new dataset for real-world visual reasoning and compositional question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6700\u20136709 (2019)","DOI":"10.1109\/CVPR.2019.00686"},{"key":"9_CR29","unstructured":"Jiang, H., Kim, B., Guan, M., Gupta, M.: To trust or not to trust a classifier. In: Bengio, S., Wallach, H., Larochelle, H., Grauman, K., Cesa-Bianchi, N., Garnett, R. (eds.) Advances in Neural Information Processing Systems. vol.\u00a031. Curran Associates, Inc. (2018), https:\/\/proceedings.neurips.cc\/paper\/2018\/file\/7180cffd6a8e829dacfc2a31b3f72ece-Paper.pdf"},{"key":"9_CR30","doi-asserted-by":"crossref","unstructured":"Jiang, H., Misra, I., Rohrbach, M., Learned-Miller, E., Chen, X.: In defense of grid features for visual question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10267\u201310276 (2020)","DOI":"10.1109\/CVPR42600.2020.01028"},{"key":"9_CR31","unstructured":"Jiang, Y., Natarajan, V., Chen, X., Rohrbach, M., Batra, D., Parikh, D.: Pythia v0. 1: the winning entry to the VQA challenge 2018. arXiv preprint arXiv:1807.09956 (2018)"},{"key":"9_CR32","unstructured":"Kadavath, S., et al.: Language models (mostly) know what they know. arXiv preprint arXiv:2207.05221 (2022)"},{"key":"9_CR33","doi-asserted-by":"crossref","unstructured":"Kafle, K., Kanan, C.: An analysis of visual question answering algorithms. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.217"},{"key":"9_CR34","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.cviu.2017.06.005","volume":"163","author":"K Kafle","year":"2017","unstructured":"Kafle, K., Kanan, C.: Visual question answering: Datasets, algorithms, and future challenges. Comput. Vis. Image Underst. 163, 3\u201320 (2017)","journal-title":"Comput. Vis. Image Underst."},{"key":"9_CR35","doi-asserted-by":"publisher","unstructured":"Kamath, A., Jia, R., Liang, P.: Selective question answering under domain shift. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 5684\u20135696. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.503, https:\/\/aclanthology.org\/2020.acl-main.503","DOI":"10.18653\/v1\/2020.acl-main.503"},{"key":"9_CR36","doi-asserted-by":"publisher","unstructured":"Karamcheti, S., Krishna, R., Fei-Fei, L., Manning, C.: Mind your outliers! investigating the negative impact of outliers on active learning for visual question answering. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 7265\u20137281. Association for Computational Linguistics, Online (2021). https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.564, https:\/\/aclanthology.org\/2021.acl-long.564","DOI":"10.18653\/v1\/2021.acl-long.564"},{"issue":"6","key":"9_CR37","doi-asserted-by":"publisher","first-page":"673","DOI":"10.1038\/89044","volume":"7","author":"J Khan","year":"2001","unstructured":"Khan, J., et al.: Classification and diagnostic prediction of cancers using gene expression profiling and artificial neural networks. Nat. Med. 7(6), 673\u2013679 (2001)","journal-title":"Nat. Med."},{"key":"9_CR38","doi-asserted-by":"crossref","unstructured":"Khani, F., Rinard, M., Liang, P.: Unanimous prediction for 100% precision with application to learning semantic mappings. arXiv preprint arXiv:1606.06368 (2016)","DOI":"10.18653\/v1\/P16-1090"},{"key":"9_CR39","unstructured":"Lakshminarayanan, B., Pritzel, A., Blundell, C.: Simple and scalable predictive uncertainty estimation using deep ensembles. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"9_CR40","unstructured":"Li, L.H., Yatskar, M., Yin, D., Hsieh, C.J., Chang, K.W.: VisualBERT: a simple and performant baseline for vision and language. In: Arxiv (2019)"},{"key":"9_CR41","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"786","DOI":"10.1007\/978-3-030-61616-8_63","volume-title":"Artificial Neural Networks and Machine Learning \u2013 ICANN 2020","author":"M Li","year":"2020","unstructured":"Li, M., Weber, C., Wermter, S.: Neural networks for detecting irrelevant questions during visual question answering. In: Farka\u0161, I., Masulli, P., Wermter, S. (eds.) ICANN 2020. LNCS, vol. 12397, pp. 786\u2013797. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-61616-8_63"},{"key":"9_CR42","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1007\/978-3-030-58577-8_8","volume-title":"Computer Vision \u2013 ECCV 2020","author":"X Li","year":"2020","unstructured":"Li, X., et al.: Oscar: object-semantics aligned pre-training for vision-language tasks. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12375, pp. 121\u2013137. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58577-8_8"},{"key":"9_CR43","unstructured":"Lu, J., Batra, D., Parikh, D., Lee, S.: ViLBERT: pretraining task-agnostic Visiolinguistic representations for vision-and-language tasks. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"issue":"11","key":"9_CR44","doi-asserted-by":"publisher","first-page":"e79315","DOI":"10.1371\/journal.pone.0079315","volume":"8","author":"B L\u00fctkenh\u00f6ner","year":"2013","unstructured":"L\u00fctkenh\u00f6ner, B., Basel, T.: Predictive modeling for diagnostic tests with high specificity, but low sensitivity: a study of the glycerol test in patients with suspected meniere\u2019s disease. PLoS ONE 8(11), e79315 (2013)","journal-title":"PLoS ONE"},{"key":"9_CR45","doi-asserted-by":"crossref","unstructured":"Mahendru, A., Prabhu, V., Mohapatra, A., Batra, D., Lee, S.: The promise of premise: harnessing question premises in visual question answering. In: EMNLP (2017)","DOI":"10.18653\/v1\/D17-1097"},{"key":"9_CR46","doi-asserted-by":"publisher","unstructured":"Mcknight, D.H., Carter, M., Thatcher, J.B., Clay, P.F.: Trust in a specific technology: an investigation of its components and measures. ACM Trans. Manage. Inf. Syst. 2(2), 1\u201325 (2011). https:\/\/doi.org\/10.1145\/1985347.1985353","DOI":"10.1145\/1985347.1985353"},{"key":"9_CR47","unstructured":"Nguyen, D.K., Goswami, V., Chen, X.: Movie: revisiting modulated convolutions for visual counting and beyond. In: Proceedings of the International Conference on Learning Representations (2021)"},{"key":"9_CR48","doi-asserted-by":"crossref","unstructured":"Niculescu-Mizil, A., Caruana, R.: Predicting good probabilities with supervised learning. In: Proceedings of the 22nd International Conference on Machine learning, pp. 625\u2013632 (2005)","DOI":"10.1145\/1102351.1102430"},{"issue":"3","key":"9_CR49","first-page":"61","volume":"10","author":"J Platt","year":"1999","unstructured":"Platt, J., et al.: Probabilistic outputs for support vector machines and comparisons to regularized likelihood methods. Adv. Large Margin Classifiers 10(3), 61\u201374 (1999)","journal-title":"Adv. Large Margin Classifiers"},{"key":"9_CR50","doi-asserted-by":"publisher","unstructured":"Pudil, P., Novovicova, J., Blaha, S., Kittler, J.: Multistage pattern recognition with reject option. In: Proceedings., 11th IAPR International Conference on Pattern Recognition. Vol. II. Conference B: Pattern Recognition Methodology and Systems, pp. 92\u201395 (1992). https:\/\/doi.org\/10.1109\/ICPR.1992.201729","DOI":"10.1109\/ICPR.1992.201729"},{"key":"9_CR51","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"9_CR52","doi-asserted-by":"crossref","unstructured":"Ray, A., Christie, G., Bansal, M., Batra, D., Parikh, D.: Question relevance in VQA: identifying non-visual and false-premise questions. In: Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing, pp. 919\u2013924 (2016)","DOI":"10.18653\/v1\/D16-1090"},{"key":"9_CR53","doi-asserted-by":"crossref","unstructured":"Shah, M., Chen, X., Rohrbach, M., Parikh, D.: Cycle-consistency for robust visual question answering. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00681"},{"key":"9_CR54","doi-asserted-by":"publisher","first-page":"104327","DOI":"10.1016\/j.imavis.2021.104327","volume":"116","author":"H Sharma","year":"2021","unstructured":"Sharma, H., Jalal, A.S.: A survey of methods, datasets and evaluation metrics for visual question answering. Image Vis. Comput. 116, 104327 (2021)","journal-title":"Image Vis. Comput."},{"key":"9_CR55","unstructured":"Shen, S., et al.: How much can clip benefit vision-and-language tasks? arXiv preprint arXiv:2107.06383 (2021)"},{"key":"9_CR56","unstructured":"Singh, A., Goswami, V., Parikh, D.: Are we pretraining it right? digging deeper into visio-linguistic pretraining. arXiv preprint arXiv:2004.08744 (2020)"},{"key":"9_CR57","doi-asserted-by":"crossref","unstructured":"Singh, A., et al.: Towards VQA models that can read. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8317\u20138326 (2019)","DOI":"10.1109\/CVPR.2019.00851"},{"key":"9_CR58","doi-asserted-by":"crossref","unstructured":"Teney, D., Liu, L., van Den Hengel, A.: Graph-structured representations for visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1\u20139 (2017)","DOI":"10.1109\/CVPR.2017.344"},{"key":"9_CR59","doi-asserted-by":"publisher","unstructured":"Tran, D., et al.: Plex: towards reliability using pretrained large model extensions (2022). https:\/\/doi.org\/10.48550\/ARXIV.2207.07411, https:\/\/arxiv.org\/abs\/2207.07411","DOI":"10.48550\/ARXIV.2207.07411"},{"key":"9_CR60","doi-asserted-by":"publisher","unstructured":"Varshney, N., Mishra, S., Baral, C.: Investigating selective prediction approaches across several tasks in IID, OOD, and adversarial settings. In: Findings of the Association for Computational Linguistics: ACL 2022, pp. 1995\u20132002 (2022). https:\/\/doi.org\/10.18653\/v1\/2022.findings-acl.158, https:\/\/aclanthology.org\/2022.findings-acl.158","DOI":"10.18653\/v1\/2022.findings-acl.158"},{"key":"9_CR61","unstructured":"Wang, X., Luo, Y., Crankshaw, D., Tumanov, A., Yu, F., Gonzalez, J.E.: Idk cascades: fast deep learning by learning not to overthink. arXiv preprint arXiv:1706.00885 (2017)"},{"key":"9_CR62","doi-asserted-by":"crossref","unstructured":"Whitehead, S., Wu, H., Ji, H., Feris, R., Saenko, K.: Separating skills and concepts for novel visual question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5632\u20135641 (2021)","DOI":"10.1109\/CVPR46437.2021.00558"},{"key":"9_CR63","doi-asserted-by":"crossref","unstructured":"Xin, J., Tang, R., Yu, Y., Lin, J.: The art of abstention: selective prediction and error regularization for natural language processing. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 1040\u20131051 (2021)","DOI":"10.18653\/v1\/2021.acl-long.84"},{"key":"9_CR64","doi-asserted-by":"crossref","unstructured":"Yang, Z., He, X., Gao, J., Deng, L., Smola, A.: Stacked attention networks for image question answering. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.10"},{"key":"9_CR65","doi-asserted-by":"crossref","unstructured":"Yu, Z., Yu, J., Cui, Y., Tao, D., Tian, Q.: Deep modular co-attention networks for visual question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6281\u20136290 (2019)","DOI":"10.1109\/CVPR.2019.00644"},{"key":"9_CR66","doi-asserted-by":"crossref","unstructured":"Zhang, P., et al.: VinVL: revisiting visual representations in vision-language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5579\u20135588 (2021)","DOI":"10.1109\/CVPR46437.2021.00553"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-20059-5_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,28]],"date-time":"2022-10-28T16:06:02Z","timestamp":1666973162000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-20059-5_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031200588","9783031200595"],"references-count":66,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-20059-5_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"29 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}