{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,28]],"date-time":"2026-06-28T03:26:42Z","timestamp":1782617202527,"version":"3.54.5"},"reference-count":52,"publisher":"Informa UK Limited","issue":"12","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["42171466"],"award-info":[{"award-number":["42171466"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Key Laboratory of Earth Surface System and Human-Earth Relations, Ministry of Natural Resources of China","award":["LBXT2023YB03"],"award-info":[{"award-number":["LBXT2023YB03"]}]},{"name":"Open Fund of Key Laboratory of Urban Land Resources Monitoring and Simulation, Ministry of Natural Resources","award":["KF-2023-08-04"],"award-info":[{"award-number":["KF-2023-08-04"]}]},{"name":"CUG Scholar\u2019 Scientific Research Funds at China University of Geosciences","award":["2022034"],"award-info":[{"award-number":["2022034"]}]}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["International Journal of Geographical Information Science"],"published-print":{"date-parts":[[2025,12,2]]},"DOI":"10.1080\/13658816.2025.2506533","type":"journal-article","created":{"date-parts":[[2025,5,20]],"date-time":"2025-05-20T05:52:13Z","timestamp":1747720333000},"page":"2862-2885","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":3,"title":["LandGPT: a multimodal large language model for parcel-level land use classification with multi-source data"],"prefix":"10.1080","volume":"39","author":[{"given":"Geyuan","family":"Zhu","sequence":"first","affiliation":[{"name":"School of Geography and Information Engineering, China University of Geosciences","place":["Wuhan, China"]},{"name":"LocationMind Institution, LocationMind Inc","place":["Chiyoda, Japan"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mi","family":"Tang","sequence":"additional","affiliation":[{"name":"School of Geography and Information Engineering, China University of Geosciences","place":["Wuhan, China"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yueheng","family":"Ma","sequence":"additional","affiliation":[{"name":"School of Geography and Information Engineering, China University of Geosciences","place":["Wuhan, China"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhihui","family":"Hu","sequence":"additional","affiliation":[{"name":"School of Geography and Information Engineering, China University of Geosciences","place":["Wuhan, China"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chenglong","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Geography and Information Engineering, China University of Geosciences","place":["Wuhan, China"]},{"name":"LocationMind Institution, LocationMind Inc","place":["Chiyoda, Japan"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiang","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Geography and Information Engineering, China University of Geosciences","place":["Wuhan, China"]},{"name":"LocationMind Institution, LocationMind Inc","place":["Chiyoda, Japan"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Huanjun","family":"Hu","sequence":"additional","affiliation":[{"name":"School of Mathematics and Computer Science, Wuhan Polytechnic University","place":["Wuhan, China"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Qingfeng","family":"Guan","sequence":"additional","affiliation":[{"name":"School of Geography and Information Engineering, China University of Geosciences","place":["Wuhan, China"]},{"name":"National Engineering Research Center of Geographic Information System, China University of Geosciences","place":["Wuhan, China"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yao","family":"Yao","sequence":"additional","affiliation":[{"name":"School of Geography and Information Engineering, China University of Geosciences","place":["Wuhan, China"]},{"name":"LocationMind Institution, LocationMind Inc","place":["Chiyoda, Japan"]},{"name":"National Engineering Research Center of Geographic Information System, China University of Geosciences","place":["Wuhan, China"]},{"name":"Hitotsubashi Institute for Advanced Study, Hitotsubashi University","place":["Kunitachi, Japan"]},{"name":"Faculty of Engineering, Reitaku University","place":["Kashiwa, Japan"]},{"name":"Ministry of Land and Resources of China, Key Laboratory of Urban Land Resources Monitoring and Simulation","place":["Shenzhen, China"]}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"301","published-online":{"date-parts":[[2025,5,20]]},"reference":[{"key":"e_1_3_3_2_1","unstructured":"Cao J. and Xiao J. 2022. An augmented benchmark dataset for geometric question answering through dual parallel text encoding. In: N. Calzolari et\u00a0al. eds. Proceedings of the 29th international conference on computational linguistics Gyeongju Republic of Korea. International Committee on Computational Linguistics 1511\u20131520."},{"key":"e_1_3_3_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2020.02.014"},{"key":"e_1_3_3_4_1","doi-asserted-by":"crossref","unstructured":"Chen Z. et\u00a0al. 2024a. How far are we to GPT-4V? Closing the gap to commercial multimodal models with open-source suites. Science China Information Sciences 67 (12) 220101.","DOI":"10.1007\/s11432-024-4231-5"},{"key":"e_1_3_3_5_1","doi-asserted-by":"crossref","unstructured":"Chen Z. et\u00a0al. 2024b. InternVL: scaling up vision foundation models and aligning for generic visual-linguistic tasks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition 24185\u201324198.","DOI":"10.1109\/CVPR52733.2024.02283"},{"key":"e_1_3_3_6_1","unstructured":"Chen Z. et\u00a0al. 2024c. Expanding performance boundaries of open-source multimodal models with model data and test-time scaling. arXiv preprint arXiv:2412.05271."},{"key":"e_1_3_3_7_1","doi-asserted-by":"crossref","unstructured":"Cherti M. et\u00a0al. 2023. Reproducible scaling laws for contrastive language-image learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition 2818\u20132829.","DOI":"10.1109\/CVPR52729.2023.00276"},{"key":"e_1_3_3_8_1","doi-asserted-by":"crossref","unstructured":"Ding X. et\u00a0al. 2024. UniRepLKNet: a universal perception large-kernel ConvNet for audio video point cloud time-series and image recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition 5513\u20135524.","DOI":"10.1109\/CVPR52733.2024.00527"},{"key":"e_1_3_3_9_1","unstructured":"Dong X. et\u00a0al. 2024. InternLM-XComposer2: mastering free-form text-image composition and comprehension in vision-language large model. arXiv preprint arXiv:2401.16420."},{"key":"e_1_3_3_10_1","doi-asserted-by":"publisher","DOI":"10.1080\/15481603.2020.1724707"},{"key":"e_1_3_3_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.scitotenv.2022.154967"},{"key":"e_1_3_3_12_1","doi-asserted-by":"crossref","unstructured":"Gao Z. et\u00a0al. 2024. Mini-InternVL: a flexible-transfer pocket multimodal model with 5% parameters and 90% performance. Visual Intelligence 2 (1) 1\u201317.","DOI":"10.1007\/s44267-024-00067-6"},{"key":"e_1_3_3_13_1","doi-asserted-by":"crossref","unstructured":"Guo L. et\u00a0al. 2024. MKGL: mastery of a three-word language. Advances in Neural Information Processing Systems 37 140509\u2013140534.","DOI":"10.52202\/079017-4460"},{"key":"e_1_3_3_14_1","doi-asserted-by":"crossref","unstructured":"He K. et\u00a0al. 2016. Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_15_1","doi-asserted-by":"crossref","unstructured":"Hudson D.A. and Manning C.D. 2019. GQA: a new dataset for real-world visual reasoning and compositional question answering. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition 6700\u20136709.","DOI":"10.1109\/CVPR.2019.00686"},{"key":"e_1_3_3_16_1","doi-asserted-by":"crossref","unstructured":"Jain P. et\u00a0al. 2025. SenCLIP: enhancing zero-shot land-use mapping for sentinel-2 with ground-level prompting. In: 2025 IEEE\/CVF winter conference on applications of computer vision (WACV). IEEE 5656\u20135665.","DOI":"10.1109\/WACV61041.2025.00552"},{"key":"e_1_3_3_17_1","unstructured":"Jin M. et\u00a0al. 2023. Time-LLM: time series forecasting by reprogramming large language models. arXiv preprint arXiv:2310.01728."},{"key":"e_1_3_3_18_1","doi-asserted-by":"crossref","unstructured":"Kafle K. et\u00a0al. 2018. DVQA: understanding data visualizations via question answering. In: Proceedings of the IEEE conference on computer vision and pattern recognition 5648\u20135656.","DOI":"10.1109\/CVPR.2018.00592"},{"key":"e_1_3_3_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_15"},{"key":"e_1_3_3_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19815-1_29"},{"key":"e_1_3_3_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.habitatint.2021.102437"},{"key":"e_1_3_3_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"e_1_3_3_23_1","doi-asserted-by":"crossref","unstructured":"Kudo T. and Richardson J. 2018. SentencePiece: a simple and language independent subword tokenizer and detokenizer for neural text processing. arXiv preprint arXiv:1808.06226.","DOI":"10.18653\/v1\/D18-2012"},{"key":"e_1_3_3_24_1","unstructured":"Lee H.L. et\u00a0al. 2024. LLaVA-NeXT: improved reasoning OCR and world knowledge. LLaVA. https:\/\/llava-vl.github.io\/blog\/2024-01-30-llava-next\/"},{"key":"e_1_3_3_25_1","doi-asserted-by":"crossref","unstructured":"Lin T.-Y. et\u00a0al. 2014. Microsoft COCO: common objects in context. In: Computer vision \u2013 ECCV 2014: 13th European conference proceedings part v 13 6\u201312 September 2014 Zurich Switzerland. Springer 740\u2013755.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_3_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2024.3390838"},{"key":"e_1_3_3_27_1","doi-asserted-by":"crossref","unstructured":"Liu H. et\u00a0al. 2024. Improved baselines with visual instruction tuning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition 26296\u201326306.","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"e_1_3_3_28_1","doi-asserted-by":"crossref","unstructured":"Liu Z. et\u00a0al. 2015. Deep learning face attributes in the wild. In: Proceedings of the IEEE international conference on computer vision 3730\u20133738.","DOI":"10.1109\/ICCV.2015.425"},{"key":"e_1_3_3_29_1","doi-asserted-by":"crossref","unstructured":"Liu Z. et\u00a0al. 2021. Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF international conference on computer vision 10012\u201310022.","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_3_30_1","unstructured":"Loshchilov I. and Hutter F. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101."},{"key":"e_1_3_3_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.rse.2021.112830"},{"key":"e_1_3_3_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.scitotenv.2021.149915"},{"key":"e_1_3_3_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-99-7962-2_30"},{"key":"e_1_3_3_34_1","doi-asserted-by":"crossref","unstructured":"Masry A. et\u00a0al. 2022. ChartQA: a benchmark for question answering about charts with visual and logical reasoning. arXiv preprint arXiv:2203.10244.","DOI":"10.18653\/v1\/2022.findings-acl.177"},{"key":"e_1_3_3_35_1","doi-asserted-by":"crossref","unstructured":"Mathew M. Karatzas D. and Jawahar C.V. 2021. DocVQA: a dataset for VQA on document images. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision 2200\u20132209.","DOI":"10.1109\/WACV48630.2021.00225"},{"key":"e_1_3_3_36_1","doi-asserted-by":"crossref","unstructured":"Mishra A. et\u00a0al. 2019. OCR-VQA: visual question answering by reading text in images. In: 2019 International conference on document analysis and recognition (ICDAR). IEEE 947\u2013952.","DOI":"10.1109\/ICDAR.2019.00156"},{"key":"e_1_3_3_37_1","unstructured":"Radford A. et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In: International conference on machine learning. PmLR 8748\u20138763."},{"key":"e_1_3_3_38_1","unstructured":"Saleh B. and Elgammal A. 2015. Large-scale classification of fine-art paintings: learning the right metric on the right feature. arXiv preprint arXiv:1505.00855."},{"key":"e_1_3_3_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cities.2016.03.013"},{"key":"e_1_3_3_40_1","doi-asserted-by":"crossref","unstructured":"Singh A. et\u00a0al. 2019. Towards VQA models that can read. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition 8317\u20138326.","DOI":"10.1109\/CVPR.2019.00851"},{"key":"e_1_3_3_41_1","unstructured":"Steiner A. et\u00a0al. 2024. PaliGemma 2: a family of versatile VLMs for transfer. arXiv preprint arXiv:2412.03555."},{"key":"e_1_3_3_42_1","doi-asserted-by":"crossref","unstructured":"Vielzeuf V. et\u00a0al. 2018. CentralNet: a multilayer approach for multimodal fusion. In: Proceedings of the European conference on computer vision (ECCV) workshops.","DOI":"10.1007\/978-3-030-11024-6_44"},{"key":"e_1_3_3_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2009.187"},{"key":"e_1_3_3_44_1","doi-asserted-by":"crossref","unstructured":"Weyand T. et\u00a0al. 2020. Google landmarks dataset v2 \u2013 a large-scale benchmark for instance-level recognition and retrieval. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition 2575\u20132584.","DOI":"10.1109\/CVPR42600.2020.00265"},{"key":"e_1_3_3_45_1","unstructured":"White J. et\u00a0al. 2023. A prompt pattern catalog to enhance prompt engineering with ChatGPT. arXiv preprint arXiv:2302.11382."},{"key":"e_1_3_3_46_1","doi-asserted-by":"publisher","DOI":"10.1080\/13658816.2024.2387200"},{"key":"e_1_3_3_47_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.landurbplan.2019.103669"},{"key":"e_1_3_3_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jag.2024.103805"},{"key":"e_1_3_3_49_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jag.2021.102664"},{"key":"e_1_3_3_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2025.3542628"},{"key":"e_1_3_3_51_1","doi-asserted-by":"crossref","unstructured":"Ye J. et\u00a0al. 2023. UReader: universal OCR-free visually-situated language understanding with multimodal large language model. arXiv preprint arXiv:2310.05126.","DOI":"10.18653\/v1\/2023.findings-emnlp.187"},{"key":"e_1_3_3_52_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.rse.2019.111458"},{"key":"e_1_3_3_53_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.rse.2022.112916"}],"container-title":["International Journal of Geographical Information Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/13658816.2025.2506533","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,10]],"date-time":"2025-11-10T10:59:04Z","timestamp":1762772344000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/13658816.2025.2506533"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,20]]},"references-count":52,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2025,12,2]]}},"alternative-id":["10.1080\/13658816.2025.2506533"],"URL":"https:\/\/doi.org\/10.1080\/13658816.2025.2506533","relation":{},"ISSN":["1365-8816","1362-3087"],"issn-type":[{"value":"1365-8816","type":"print"},{"value":"1362-3087","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,20]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tgis20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tgis20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2025-01-07","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-05-12","order":1,"name":"revised","label":"Revised","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-05-12","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-05-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}