{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T05:29:25Z","timestamp":1740806965073,"version":"3.38.0"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,1,6]],"date-time":"2025-01-06T00:00:00Z","timestamp":1736121600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,6]],"date-time":"2025-01-06T00:00:00Z","timestamp":1736121600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1007\/s00530-024-01629-w","type":"journal-article","created":{"date-parts":[[2025,1,6]],"date-time":"2025-01-06T11:18:28Z","timestamp":1736162308000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["ReDiT: re-evaluating large visual question answering model confidence by defining input scenario difficulty and applying temperature mapping"],"prefix":"10.1007","volume":"31","author":[{"given":"Modafar","family":"Al-Shouha","sequence":"first","affiliation":[]},{"given":"G\u00e1bor","family":"Sz\u0171cs","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,6]]},"reference":[{"issue":"1","key":"1629_CR1","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1186\/s13040-023-00339-9","volume":"16","author":"JG Meyer","year":"2023","unstructured":"Meyer, J.G., Urbanowicz, R.J., Martin, P.C., O\u2019Connor, K., Li, R., Peng, P.-C., Bright, T.J., Tatonetti, N., Won, K.J., Gonzalez-Hernandez, G., et al.: ChatGPT and large language models in academia: opportunities and challenges. BioData Mining 16(1), 20 (2023). https:\/\/doi.org\/10.1186\/s13040-023-00339-9","journal-title":"BioData Mining"},{"key":"1629_CR2","doi-asserted-by":"publisher","DOI":"10.1038\/s41587-022-01618-2","author":"A Madani","year":"2023","unstructured":"Madani, A., Krause, B., Greene, E.R., Subramanian, S., Mohr, B.P., Holton, J.M., Olmos, J.L., Jr., Xiong, C., Sun, Z.Z., Socher, R., et al.: Large language models generate functional protein sequences across diverse families. Nat. Biotechnol. (2023). https:\/\/doi.org\/10.1038\/s41587-022-01618-2","journal-title":"Nat. Biotechnol."},{"key":"1629_CR3","unstructured":"Li, Z., Wallace, E., Shen, S., Lin, K., Keutzer, K., Klein, D., Gonzalez, J.: Train big, then compress: Rethinking model size for efficient training and inference of transformers. In: Proceedings of the 37th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 119, pp. 5958\u20135968 . https:\/\/proceedings.mlr.press\/v119\/li20m.html (2020). Accessed 17 Oct 2024"},{"key":"1629_CR4","unstructured":"Russell, S., Norvig, P.: Artificial intelligence, global edition a modern approach, p. 1168. https:\/\/elibrary.pearson.de\/book\/99.150005\/9781292401171 (2021). Accessed 17 Oct 2024"},{"key":"1629_CR5","doi-asserted-by":"publisher","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13, pp. 740\u2013755 (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48 . Springer","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1629_CR6","doi-asserted-by":"publisher","unstructured":"Yang, J., Zhou, K., Li, Y., Liu, Z.: Generalized out-of-distribution detection: A survey. arXiv preprint arXiv:2110.11334 (2021). https:\/\/doi.org\/10.48550\/arXiv.2110.11334","DOI":"10.48550\/arXiv.2110.11334"},{"key":"1629_CR7","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.126287","volume":"546","author":"X Liming","year":"2023","unstructured":"Liming, X., Tang, Q., Lv, J., Zheng, B., Zeng, X., Li, W.: Deep image captioning: A review of methods, trends and future challenges. Neurocomputing 546, 126287 (2023). https:\/\/doi.org\/10.1016\/j.neucom.2023.126287","journal-title":"Neurocomputing"},{"key":"1629_CR8","doi-asserted-by":"publisher","unstructured":"Lee, D., Cheon, Y., Han, W.-S.: Regularizing attention networks for anomaly detection in visual question answering. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 1845\u20131853 (2021). https:\/\/doi.org\/10.1609\/aaai.v35i3.16279","DOI":"10.1609\/aaai.v35i3.16279"},{"key":"1629_CR9","doi-asserted-by":"publisher","unstructured":"Gokhale, T., Banerjee, P., Baral, C., Yang, Y.: Mutant: A training paradigm for out-of-distribution generalization in visual question answering. arXiv preprint arXiv:2009.08566 (2020). https:\/\/doi.org\/10.48550\/arXiv.2009.08566","DOI":"10.48550\/arXiv.2009.08566"},{"key":"1629_CR10","doi-asserted-by":"publisher","unstructured":"Liang, S., Li, Y., Srikant, R.: Enhancing the reliability of out-of-distribution image detection in neural networks. arXiv preprint arXiv:1706.02690 (2017). https:\/\/doi.org\/10.48550\/arXiv.1706.02690","DOI":"10.48550\/arXiv.1706.02690"},{"key":"1629_CR11","doi-asserted-by":"publisher","DOI":"10.14232\/actacyb.298356","author":"M Al-Shouha","year":"2024","unstructured":"Al-Shouha, M., Sz\u0171cs, G.: Single and combined algorithms for open set classification on image datasets. Acta Cybernetica (2024). https:\/\/doi.org\/10.14232\/actacyb.298356","journal-title":"Acta Cybernetica"},{"key":"1629_CR12","doi-asserted-by":"publisher","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763 (2021). https:\/\/doi.org\/10.48550\/arXiv.2103.00020 . PMLR","DOI":"10.48550\/arXiv.2103.00020"},{"key":"1629_CR13","doi-asserted-by":"publisher","unstructured":"Wang, J., Yang, Z., Hu, X., Li, L., Lin, K., Gan, Z., Liu, Z., Liu, C., Wang, L.: GIT: A Generative Image-to-text Transformer for Vision and Language (2022). https:\/\/doi.org\/10.48550\/arXiv.2205.14100","DOI":"10.48550\/arXiv.2205.14100"},{"key":"1629_CR14","doi-asserted-by":"publisher","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. arXiv (2022). https:\/\/doi.org\/10.48550\/ARXIV.2201.12086","DOI":"10.48550\/ARXIV.2201.12086"},{"key":"1629_CR15","doi-asserted-by":"publisher","unstructured":"Kim, W., Son, B., Kim, I.: ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision (2021). https:\/\/doi.org\/10.48550\/arXiv.2102.03334","DOI":"10.48550\/arXiv.2102.03334"},{"key":"1629_CR16","unstructured":"Jia, C., Yang, Y., Xia, Y., Chen, Y.-T., Parekh, Z., Pham, H., Le, Q., Sung, Y.-H., Li, Z., Duerig, T.: Scaling up visual and vision-language representation learning with noisy text supervision. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning, vol. 139, pp. 4904\u20134916. https:\/\/proceedings.mlr.press\/v139\/jia21b.html (2021). Accessed 17 Oct 2024"},{"key":"1629_CR17","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., Krueger, G., Sutskever, I.: Learning transferable visual models from natural language supervision. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning, vol. 139, pp. 8748\u20138763. https:\/\/proceedings.mlr.press\/v139\/radford21a.html (2021). Accessed 17 Oct 2024"},{"key":"1629_CR18","doi-asserted-by":"publisher","unstructured":"Yuan, L., Chen, D., Chen, Y.-L., Codella, N., Dai, X., Gao, J., Hu, H., Huang, X., Li, B., Li, C., et al.: Florence: A new foundation model for computer vision. arXiv preprint arXiv:2111.11432 (2021). https:\/\/doi.org\/10.48550\/arXiv.2111.11432","DOI":"10.48550\/arXiv.2111.11432"},{"key":"1629_CR19","doi-asserted-by":"publisher","unstructured":"Salehi, M., Mirzaei, H., Hendrycks, D., Li, Y., Rohban, M.H., Sabokrou, M.: A unified survey on anomaly, novelty, open-set, and out-of-distribution detection: Solutions and future challenges. arXiv preprint arXiv:2110.14051 (2021). https:\/\/doi.org\/10.48550\/arXiv.2110.14051","DOI":"10.48550\/arXiv.2110.14051"},{"key":"1629_CR20","doi-asserted-by":"crossref","unstructured":"Shi, X., Lee, S.: Benchmarking out-of-distribution detection in visual question answering. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 5485\u20135495. https:\/\/openaccess.thecvf.com\/content\/WACV2024\/papers\/Shi_Benchmarking_Out-of-Distribution_Detection_in_Visual_Question_Answering_WACV_2024_paper.pdf (2024). Accessed 17 Oct 2024","DOI":"10.1109\/WACV57701.2024.00540"},{"key":"1629_CR21","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3269429","author":"X Zhang","year":"2023","unstructured":"Zhang, X., Zhang, F., Xu, C.: Next-ood: Overcoming dual multiple-choice vqa biases. IEEE Trans. Pattern Anal. Mach. Intell. (2023). https:\/\/doi.org\/10.1109\/TPAMI.2023.3269429","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1629_CR22","doi-asserted-by":"publisher","unstructured":"Zeng, Y., Zhang, X., Li, H.: Multi-Grained Vision Language Pre-Training: Aligning Texts with Visual Concepts (2022). https:\/\/doi.org\/10.48550\/arXiv.2111.08276","DOI":"10.48550\/arXiv.2111.08276"},{"key":"1629_CR23","doi-asserted-by":"publisher","unstructured":"Zhang, X., Li, J., Chu, W., Hai, J., Xu, R., Yang, Y., Guan, S., Xu, J., Cui, P.: On the Out-Of-Distribution Generalization of Multimodal Large Language Models (2024). https:\/\/doi.org\/10.48550\/arXiv.2402.06599","DOI":"10.48550\/arXiv.2402.06599"},{"key":"1629_CR24","doi-asserted-by":"publisher","unstructured":"Zhang, M., Press, O., Merrill, W., Liu, A., Smith, N.A.: How Language Model Hallucinations Can Snowball (2023). https:\/\/doi.org\/10.48550\/arXiv.2305.13534","DOI":"10.48550\/arXiv.2305.13534"},{"key":"1629_CR25","doi-asserted-by":"publisher","unstructured":"Zheng, S., Huang, J., Chang, K.C.-C.: Why Does ChatGPT Fall Short in Providing Truthful Answers? (2023). https:\/\/doi.org\/10.48550\/arXiv.2304.10513","DOI":"10.48550\/arXiv.2304.10513"},{"issue":"6","key":"1629_CR26","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1109\/MSP.2012.2211477","volume":"29","author":"L Deng","year":"2012","unstructured":"Deng, L.: The mnist database of handwritten digit images for machine learning research [best of the web]. IEEE Signal Process. Mag. 29(6), 141\u2013142 (2012). https:\/\/doi.org\/10.1109\/MSP.2012.2211477","journal-title":"IEEE Signal Process. Mag."},{"key":"1629_CR27","doi-asserted-by":"publisher","unstructured":"Serr\u00e0, J., \u00c1lvarez, D., G\u00f3mez, V., Slizovskaia, O., N\u00fa\u00f1ez, J.F., Luque, J.: Input complexity and out-of-distribution detection with likelihood-based generative models. arXiv preprint arXiv:1909.11480 (2019). https:\/\/doi.org\/10.48550\/arXiv.1909.11480","DOI":"10.48550\/arXiv.1909.11480"},{"key":"1629_CR28","doi-asserted-by":"publisher","unstructured":"Xiao, H., Rasul, K., Vollgraf, R.: Fashion-mnist: a novel image dataset for benchmarking machine learning algorithms. arXiv preprint arXiv:1708.07747 (2017). https:\/\/doi.org\/10.48550\/arXiv.1708.07747","DOI":"10.48550\/arXiv.1708.07747"},{"key":"1629_CR29","unstructured":"Krizhevsky, A., Hinton, G.: Learning multiple layers of features from tiny images. University of Toronto. http:\/\/www.cs.toronto.edu\/~kriz\/cifar.html (2009)"},{"key":"1629_CR30","unstructured":"Itseez: Open Source Computer Vision Library. https:\/\/github.com\/itseez\/opencv (2015). Accessed 17 Oct 2024"},{"key":"1629_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2023.104478","volume":"145","author":"J Frei","year":"2023","unstructured":"Frei, J., Kramer, F.: Annotated dataset creation through large language models for non-english medical nlp. J. Biomed. Inform. 145, 104478 (2023). https:\/\/doi.org\/10.1016\/j.jbi.2023.104478","journal-title":"J. Biomed. Inform."},{"key":"1629_CR32","doi-asserted-by":"publisher","first-page":"1061","DOI":"10.1109\/TASLP.2023.3267618","volume":"32","author":"Z Chi","year":"2024","unstructured":"Chi, Z., Huang, H., Liu, L., Bai, Y., Gao, X., Mao, X.-L.: Can pretrained english language models benefit non-english nlp systems in low-resource scenarios? IEEE\/ACM Trans. Audio Speech Lang. Process. 32, 1061\u20131074 (2024). https:\/\/doi.org\/10.1109\/TASLP.2023.3267618","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"1629_CR33","doi-asserted-by":"publisher","unstructured":"Tran, K.: From english to foreign languages: transferring pre-trained language models (2020). https:\/\/doi.org\/10.48550\/arXiv.2002.07306","DOI":"10.48550\/arXiv.2002.07306"},{"key":"1629_CR34","doi-asserted-by":"publisher","unstructured":"Wang, J., Zhou, Y., Xu, G., Shi, P., Zhao, C., Xu, H., Ye, Q., Yan, M., Zhang, J., Zhu, J., Sang, J., Tang, H.: Evaluation and analysis of hallucination in large vision-language models (2023). https:\/\/doi.org\/10.48550\/arXiv.2308.15126","DOI":"10.48550\/arXiv.2308.15126"},{"key":"1629_CR35","unstructured":"Guo, C., Pleiss, G., Sun, Y., Weinberger, K.Q.: On calibration of modern neural networks. In: Precup, D., Teh, Y.W. (eds.) Proceedings of the 34th International Conference on Machine Learning, vol. 70, pp. 1321\u20131330. https:\/\/proceedings.mlr.press\/v70\/guo17a.html (2017). Accessed 17 Oct 2024"},{"key":"1629_CR36","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695. https:\/\/openaccess.thecvf.com\/content\/CVPR2022\/papers\/Rombach_High-Resolution_Image_Synthesis_With_Latent_Diffusion_Models_CVPR_2022_paper.pdf (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"1629_CR37","unstructured":"Niels Rogge: comparing-VQA-models. Hugging Face. https:\/\/huggingface.co\/spaces\/nielsr\/comparing-VQA-models. Accessed 17 Oct 2024"},{"issue":"1","key":"1629_CR38","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1002\/1097-0142(1950)3:1<32::AID-CNCR2820030106>3.0.CO;2-3","volume":"3","author":"WJ Youden","year":"1950","unstructured":"Youden, W.J.: Index for rating diagnostic tests. Cancer 3(1), 32\u201335 (1950). https:\/\/doi.org\/10.1002\/1097-0142(1950)3:1<32::AID-CNCR2820030106>3.0.CO;2-3","journal-title":"Cancer"},{"key":"1629_CR39","doi-asserted-by":"publisher","first-page":"2825","DOI":"10.48550\/arXiv.1201.0490","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., Blondel, M., Prettenhofer, P., Weiss, R., Dubourg, V., Vanderplas, J., Passos, A., Cournapeau, D., Brucher, M., Perrot, M., Duchesnay, E.: Scikit-learn: machine learning in Python. J. Mach. Learn. Res. 12, 2825\u20132830 (2011). https:\/\/doi.org\/10.48550\/arXiv.1201.0490","journal-title":"J. Mach. Learn. Res."}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01629-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-024-01629-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01629-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T11:03:58Z","timestamp":1740740638000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-024-01629-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,6]]},"references-count":39,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,2]]}},"alternative-id":["1629"],"URL":"https:\/\/doi.org\/10.1007\/s00530-024-01629-w","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"type":"print","value":"0942-4962"},{"type":"electronic","value":"1432-1882"}],"subject":[],"published":{"date-parts":[[2025,1,6]]},"assertion":[{"value":"14 February 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 December 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 January 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"45"}}