{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T17:47:23Z","timestamp":1768412843059,"version":"3.49.0"},"reference-count":16,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2023,8,30]],"date-time":"2023-08-30T00:00:00Z","timestamp":1693353600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,30]],"date-time":"2023-08-30T00:00:00Z","timestamp":1693353600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Mach. Intell. Res."],"published-print":{"date-parts":[[2023,10]]},"DOI":"10.1007\/s11633-023-1469-x","type":"journal-article","created":{"date-parts":[[2023,8,30]],"date-time":"2023-08-30T13:03:14Z","timestamp":1693400594000},"page":"605-613","source":"Crossref","is-referenced-by-count":13,"title":["How Good is Google Bard\u2019s Visual Understanding? An Empirical Study on Open Challenges"],"prefix":"10.1007","volume":"20","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7391-7539","authenticated-orcid":false,"given":"Haotong","family":"Qin","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7092-2877","authenticated-orcid":false,"given":"Ge-Peng","family":"Ji","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9502-1749","authenticated-orcid":false,"given":"Salman","family":"Khan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5245-7518","authenticated-orcid":false,"given":"Deng-Ping","family":"Fan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4263-3143","authenticated-orcid":false,"given":"Fahad Shahbaz","family":"Khan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3445-5711","authenticated-orcid":false,"given":"Luc Van","family":"Gool","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,8,30]]},"reference":[{"key":"1469_CR1","unstructured":"R. Thoppilan, D. De Freitas, J. Hall, N. Shazeer, A. Kulshreshtha, H. T. Cheng, A. Jin, T. Bos, L. Baker, Y. Du, Y. G. Li, H. Lee, H. S. Zheng, A. Ghafouri, M. Menegali, Y. P. Huang, M. Krikun, D. Lepikhin, J. Qin, D. H. Chen, Y Z. Xu, Z F. Chen, A. Roberts, M. Bosma, V. Zhao, Y. Q. Zhou, C. C. Chang, I. Krivokon, W. Rusch, M. Pickett, P. Srinivasan, L. Man, K. Meier-Hellstern, M. R. Morris, T. Doshi, R. D. Santos, T. Duke, J. Soraker, B. Zevenbergen, V. Prabhakaran, M. Diaz, B. Hutchinson, K. Olson, A. Molina, E. Hoffman-John, J. Lee, L. Aroyo, R. Rajakumar, A. Butryna, M. Lamm, V. Kuzmina, J. Fenton, A. Cohen, R. Bernstein, R. Kurzweil, B. Aguera-Arcas, C. Cui, M. Croak, E. Chi, Q. Le. LaMDA: Language models for dialog applications. [Online], Available: https:\/\/arxiv.org\/abs\/2201.08239, 2022."},{"key":"1469_CR2","unstructured":"A. Chowdhery, S. Narang, J. Devlin, M. Bosma, G. Mishra, A. Roberts, P. Barham, H. W. Chung, C. Sutton, S. Gehrmann, P. Schuh, K. S. Shi, S. Tsvyashchenko, J. Maynez, A. Rao, P. Barnes, Y. Tay, N. Shazeer, V. Prabhakaran, E. Reif, N. Du, B. Hutchinson, R. Pope, J. Bradbury, J. Austin, M. Isard, G. Gur-Ari, P. C. Yin, T. Duke, A. Levskaya, S. Ghemawat, S. Dev, H. Michalewski, X. Garcia, V. Misra, K. Robinson, L. Fedus, D. Zhou, D. Ippolito, D. Luan, H. Lim, B. Zoph, A. Spiridonov, R. Sepassi, D. Dohan, S. Agrawal, M. Omernick, A. M. Dai, T. S. Pillai, M. Pellat, A. Lewkowycz, E. Moreira, R. Child, O. Polozov, K. Lee, Z. W. Zhou, X. Z. Wang, B. Saeta, M. Diaz, O. Firat, M. Catasta, J. Wei, K. Meier-Hellstern, D. Eck, J. Dean, S. Petrov, N. Fiedel. PaLM: Scaling language modeling with pathways. [Online], Available: https:\/\/arxiv.org\/abs\/2204.02311, 2022."},{"key":"1469_CR3","unstructured":"OpenAI. GPT-4 technical report. [Online], Available: https:\/\/arxiv.org\/abs\/2303.08774, 2023"},{"key":"1469_CR4","unstructured":"Microsoft. Bing chat enterprise announced, multimodal visual search rolling out to bing chat, [Online], Available: https:\/\/blogs.bing.com\/search\/july-2023\/Bing-Chat-Enterprise-announced,-multimodal-Visual-Search-rolling-out-to-Bing-Chat, 2023."},{"key":"1469_CR5","unstructured":"LLaVA. LLaVA-Bench, [Online], Available: https:\/\/github.com\/haotian-liu\/LLaVA\/blob\/main\/docs\/LLaVA_Bench.md, 2023."},{"key":"1469_CR6","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Microsoft coco: Common objects in context","author":"L T. Y","year":"2014","unstructured":"T. Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan, P. Doll\u00e1r, C. L. Zitnick. Microsoft coco: Common objects in context. In Proceedings of the 13th European Conference on Computer Vision, Springer, Z\u00fcrich, Switzerland, pp. 740\u2013755, 2014. DOI: https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48."},{"key":"1469_CR7","unstructured":"D. Hendrycks, T. D. Dietterich. Benchmarking neural network robustness to common corruptions and perturbations. In Proceedings of the 7th International Conference on Learning Representations, New Orleans, USA, 2019."},{"key":"1469_CR8","doi-asserted-by":"publisher","first-page":"3833","DOI":"10.1109\/CVPR.2019.00396","volume-title":"Single image deraining: A comprehensive benchmark analysis","author":"L S. Y","year":"2019","unstructured":"S. Y. Li, I. B. Araujo, W. Q. Ren, Z. Y. Wang, E. K. Tokuda, R. H. Junior, R. Cesar-Junior, J. W. Zhang, X. J. Guo, X. C. Cao. Single image deraining: A comprehensive benchmark analysis. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, IEEE, Long Beach, USA, pp. 3833\u20133842, 2019. DOI: https:\/\/doi.org\/10.1109\/CVPR.2019.00396."},{"issue":"10","key":"1469_CR9","doi-asserted-by":"publisher","DOI":"10.3390\/s22103628","volume":"22","year":"2022","unstructured":"S. Z. Hassan, K. Ahmad, S. Hicks, P. Halvorsen, A. Al-Fuqaha, N. Conci, M. Riegler. Visual sentiment analysis from disaster images in social media. Sensors, vol. 22, no. 10, Article number 3628, 2022. DOI: https:\/\/doi.org\/10.3390\/s22103628.","journal-title":"Sensors"},{"key":"1469_CR10","doi-asserted-by":"publisher","unstructured":"S. Maji, E. Rahtu, J. Kannala, M. Blaschko, A. Vedaldi, Fine-grained visual classification of aircraft. arXiv: 1306.5151, 2013. DOI: https:\/\/doi.org\/10.48550\/arXiv.1306.5151.","DOI":"10.48550\/arXiv.1306.5151"},{"issue":"10","key":"1469_CR11","doi-asserted-by":"publisher","first-page":"6024","DOI":"10.1109\/TPAMI.2021.3085766","volume":"44","author":"F D. P","year":"2022","unstructured":"D. P. Fan, G. P. Ji, M. M. Cheng, L. Shao. Concealed object detection. IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 44, no. 10, pp. 6024\u20136042, 2022. DOI: https:\/\/doi.org\/10.1109\/TPAMI.2021.3085766.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1469_CR12","doi-asserted-by":"crossref","unstructured":"G. L. Sun, Z. C. An, Y. Liu, C. Liu, C. Sakaridis, D. P. Fan, L. Van Gool. Indiscernible object counting in underwater scenes. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Vancouver, Ganada, pp. 13791\u201313801, 2023.","DOI":"10.1109\/CVPR52729.2023.01325"},{"issue":"1","key":"1469_CR13","doi-asserted-by":"publisher","DOI":"10.1007\/s44267-023-00019-6","volume":"1","year":"2023","unstructured":"D. P. Fan, G. P. Ji, P. Xu, M. M. Cheng, C. Sakaridis, L. Van Gool. Advances in deep concealed scene understanding. Visual Intelligence, vol. 1, no. 1, Article number 16, 2023. DOI: https:\/\/doi.org\/10.1007\/s44267-023-00019-6.","journal-title":"Visual Intelligence"},{"key":"1469_CR14","doi-asserted-by":"publisher","first-page":"8309","DOI":"10.1109\/CVPR.2019.00851","volume-title":"Towards vqa models that can read","author":"S A","year":"2019","unstructured":"A. Singh, V. Natarjan, M. Shah, Y. Jiang, X. L. Chen, D. Batra, D. Parikh, M. Rohrbach. Towards vqa models that can read. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, IEEE, Long Beach, USA, pp. 8309\u20138318, 2019. DOI: https:\/\/doi.org\/10.1109\/CVPR.2019.00851."},{"issue":"6","key":"1469_CR15","doi-asserted-by":"publisher","first-page":"531","DOI":"10.1007\/s11633-022-1371-y","volume":"19","author":"J G. P","year":"2022","unstructured":"G. P. Ji, G. B. Xiao, Y. C. Chou, D. P. Fan, K. Zhao, G. Chen, L. Van Gool. Video polyp segmentation: A deep learning perspective. Machine Intelligence Research, vol. 19, no. 6, pp. 531\u2013549, 2022. DOI: https:\/\/doi.org\/10.1007\/s11633-022-1371-y.","journal-title":"Machine Intelligence Research"},{"issue":"12","key":"1469_CR16","doi-asserted-by":"publisher","first-page":"8555","DOI":"10.1109\/TGRS.2020.2988782","volume":"58","author":"L S","year":"2020","unstructured":"S. Lobry, D. Marcos, J. Murray, D. Tuia. RSVQA: Visual question answering for remote sensing data. IEEE Transactions on Geoscience and Remote Sensing, vol. 58, no. 12, pp. 8555\u20138566, 2020. DOI: https:\/\/doi.org\/10.1109\/tgrs.2020.2988782.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"}],"container-title":["Machine Intelligence Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-023-1469-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11633-023-1469-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-023-1469-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,15]],"date-time":"2023-09-15T12:12:30Z","timestamp":1694779950000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11633-023-1469-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,30]]},"references-count":16,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2023,10]]}},"alternative-id":["1469"],"URL":"https:\/\/doi.org\/10.1007\/s11633-023-1469-x","relation":{},"ISSN":["2731-538X","2731-5398"],"issn-type":[{"value":"2731-538X","type":"print"},{"value":"2731-5398","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8,30]]}}}