{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T06:05:23Z","timestamp":1780380323881,"version":"3.54.1"},"publisher-location":"Singapore","reference-count":33,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819698486","type":"print"},{"value":"9789819698493","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-9849-3_24","type":"book-chapter","created":{"date-parts":[[2025,7,18]],"date-time":"2025-07-18T08:59:09Z","timestamp":1752829149000},"page":"282-293","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi-level Dynamic Gated Interaction Fusion Network for Remote Sensing Visual Question Answering"],"prefix":"10.1007","author":[{"given":"Ke","family":"Hu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wenzhen","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shichao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,7,19]]},"reference":[{"key":"24_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TGRS.2022.3192460","volume":"60","author":"Y Bazi","year":"2022","unstructured":"Bazi, Y., Al Rahhal, M.M., Mekhalfi, M.L., Al Zuair, M.A., Melgani, F.: Bi-modal transformer-based approach for visual question answering in remote sensing imagery. IEEE Trans. Geosci. Remote Sens. 60, 1\u201311 (2022)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"24_CR2","doi-asserted-by":"crossref","unstructured":"Cubuk, E.D., Zoph, B., Shlens, J., Le, Q.: RandAugment: practical automated data augmentation with a reduced search space. In: NeurIPS 2020 (2020)","DOI":"10.1109\/CVPRW50498.2020.00359"},{"key":"24_CR3","unstructured":"Devlin, J.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"24_CR4","doi-asserted-by":"crossref","unstructured":"Fan, et al., Z.: Saliency-guided unsupervised feature learning for scene classification. IEEE Trans. Geosci. Remote. Sens. 53(4), 2175\u20132184 (2015)","DOI":"10.1109\/TGRS.2014.2357078"},{"key":"24_CR5","volume":"122","author":"J Feng","year":"2023","unstructured":"Feng, J., Tang, E., Zeng, M., Gu, Z., Kou, P., Zheng, W.: Improving visual question answering for remote sensing via alternate-guided attention and combined loss. Int. J. Appl. Earth Obs. Geoinf. 122, 103427 (2023)","journal-title":"Int. J. Appl. Earth Obs. Geoinf."},{"key":"24_CR6","volume":"126","author":"J Feng","year":"2024","unstructured":"Feng, J., Wang, H.: A multi-scale contextual attention network for remote sensing visual question answering. Int. J. Appl. Earth Obs. Geoinf. 126, 103641 (2024)","journal-title":"Int. J. Appl. Earth Obs. Geoinf."},{"key":"24_CR7","doi-asserted-by":"crossref","unstructured":"Gui-Song, et al., X.: AID: A benchmark data set for performance evaluation of aerial scene classification. IEEE Trans. Geosci. Remote. Sens. 55(7), 3965\u20133981 (2017)","DOI":"10.1109\/TGRS.2017.2685945"},{"key":"24_CR8","doi-asserted-by":"crossref","unstructured":"Guisong, et al., X.: DOTA: A large-scale dataset for object detection in aerial images. In: 2018 IEEE Conference on Computer Vision and Pattern Recognition, CVPR, pp. 3974\u20133983 (2018)","DOI":"10.1109\/CVPR.2018.00418"},{"key":"24_CR9","doi-asserted-by":"crossref","unstructured":"He, J., et al.: PERS: Parameter-efficient multi-modal transfer learning for remote sensing visual question answering. IEEE J. Sel. Top. Appl. Earth Observ. Remote Sens. 17, 14823 (2024)","DOI":"10.1109\/JSTARS.2024.3447086"},{"key":"24_CR10","doi-asserted-by":"crossref","unstructured":"Li, P., He, J., Liu, G., Zhong, S.: PECR: parameter-efficient transfer learning with cross-modal representation learning for remote sensing visual question answering. In: ICASSP 2024\u20132024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6740\u20136744. IEEE (2024)","DOI":"10.1109\/ICASSP48485.2024.10446146"},{"key":"24_CR11","doi-asserted-by":"crossref","unstructured":"Li, P., Liu, G., He, J., Meng, X., Zhong, S., Chen, X.: RSMoDM: multimodal momentum distillation model for remote sensing visual question answering. IEEE J. Sel. Top. Appl. Earth Observ. Remote Sens. 17, 16799\u201316814 (2024)","DOI":"10.1109\/JSTARS.2024.3419035"},{"key":"24_CR12","doi-asserted-by":"crossref","unstructured":"Li, P., Liu, G., He, J., Zhao, Z., Zhong, S.: Masked vision and language pre-training with unimodal and multimodal contrastive losses for medical visual question answering. In: International Conference on Medical Image Computing and Computer- Assisted Intervention, pp. 374\u2013383 (2023)","DOI":"10.1007\/978-3-031-43907-0_36"},{"key":"24_CR13","doi-asserted-by":"crossref","unstructured":"Li, P., Liu, G., Tan, L., Liao, J., Zhong, S.: Self-supervised vision-language pre-training for medial visual question answering. In: 2023 IEEE 20th International Symposium on Biomedical Imaging (ISBI), pp. 1\u20135. IEEE (2023)","DOI":"10.1109\/ISBI53787.2023.10230743"},{"issue":"22","key":"24_CR14","doi-asserted-by":"publisher","first-page":"28205","DOI":"10.1109\/JSEN.2023.3318835","volume":"23","author":"W Li","year":"2023","unstructured":"Li, W., Meng, X., Zhao, Z., Liu, Z., Chen, C., Wang, H.: LOT: a transformer-based approach based on channel state information for indoor localization. IEEE Sens. J. 23(22), 28205\u201328219 (2023)","journal-title":"IEEE Sens. J."},{"key":"24_CR15","doi-asserted-by":"crossref","unstructured":"Li, X., Wen, C., Hu, Y., Yuan, Z., Zhu, X.X.: Vision-language models in remote sensing: Current progress and future trends. IEEE Geosci. Remote Sens. Mag. 12, 32\u201366 (2024)","DOI":"10.1109\/MGRS.2024.3383473"},{"key":"24_CR16","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: Enhancing remote sensing visual question answering: a mask-based dual-stream feature mutual attention network. IEEE Geosci. Remote Sens. Lett. 21, 6007805 (2024)","DOI":"10.1109\/LGRS.2024.3389042"},{"key":"24_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2024.104748","volume":"160","author":"G Liu","year":"2024","unstructured":"Liu, G., He, J., Li, P., Zhao, Z., Zhong, S.: Cross-modal self-supervised vision language pre-training with multiple objectives for medical visual question answering. J. Biomed. Inform. 160, 104748 (2024)","journal-title":"J. Biomed. Inform."},{"issue":"19","key":"24_CR18","doi-asserted-by":"publisher","first-page":"4682","DOI":"10.3390\/rs15194682","volume":"15","author":"G Liu","year":"2023","unstructured":"Liu, G., He, J., Li, P., Zhong, S., Li, H., He, G.: Unified transformer with cross-modal mixture experts for remote-sensing visual question answering. Remote Sensing 15(19), 4682 (2023)","journal-title":"Remote Sensing"},{"key":"24_CR19","doi-asserted-by":"crossref","unstructured":"Lobry, S., Demir, B., Tuia, D.: RSVQA meets Bigearthnet: a new, large-scale, visual question answering dataset for remote sensing. In: 2021 IEEE International Geoscience and Remote Sensing Symposium IGARSS, pp. 1218\u20131221. IEEE (2021)","DOI":"10.1109\/IGARSS47720.2021.9553307"},{"key":"24_CR20","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: 7th International Conference on Learning Representations, ICLR 2019 (2019)"},{"key":"24_CR21","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"issue":"3","key":"24_CR22","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1145\/1010614.1010616","volume":"22","author":"X Wu","year":"2004","unstructured":"Wu, X., Zhang, C., Zhang, S.: Efficient mining of both positive and negative association rules. ACM Trans. Inf. Syst. (TOIS) 22(3), 381\u2013405 (2004)","journal-title":"ACM Trans. Inf. Syst. (TOIS)"},{"key":"24_CR23","doi-asserted-by":"crossref","unstructured":"Yi, Y., et al.: Bag-of-visual-words and spatial extensions for land-use classification. In: 18th ACM SIGSPATIAL International Symposium on Advances in Geographic Information Systems, ACM-GIS, pp. 270\u2013279 (2010)","DOI":"10.1145\/1869790.1869829"},{"key":"24_CR24","first-page":"1","volume":"60","author":"Z Yuan","year":"2022","unstructured":"Yuan, Z., Mou, L., Wang, Q., Zhu, X.X.: From easy to hard: learning language-guided curriculum for visual question answering on remote sensing data. IEEE Trans. Geosci. Remote Sens. 60, 1\u201311 (2022)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"24_CR25","doi-asserted-by":"crossref","unstructured":"Ze, L., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: 2021 IEEE\/CVF International Conference on Computer Vision, ICCV 2021, Montreal, QC, Canada, October 10\u201317, 2021, pp. 9992\u201310002. IEEE (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"issue":"8","key":"24_CR26","doi-asserted-by":"publisher","first-page":"5535","DOI":"10.1109\/TGRS.2019.2900302","volume":"57","author":"Y Zhang","year":"2019","unstructured":"Zhang, Y., Yuan, Y., Feng, Y., Lu, X.: Hierarchical and robust convolutional neural network for very high-resolution remote sensing object detection. IEEE Trans. Geosci. Remote Sens. 57(8), 5535\u20135548 (2019)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"24_CR27","first-page":"1","volume":"61","author":"Z Zhang","year":"2023","unstructured":"Zhang, Z., et al.: A spatial hierarchical reasoning network for remote sensing visual question answering. IEEE Trans. Geosci. Remote Sens. 61, 1\u201315 (2023)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"24_CR28","doi-asserted-by":"crossref","unstructured":"Zheng, W., et al.: Feature refinement decomposition and relation preference enhancement for remote sensing change detection. In: ICASSP 2025\u20132025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1\u20135. IEEE (2025)","DOI":"10.1109\/ICASSP49660.2025.10890394"},{"key":"24_CR29","doi-asserted-by":"crossref","unstructured":"Zheng, W., et al.: Cross- temporal knowledge injection with color distribution normalization for remote sensing change detection. IEEE J. Sel. Top. Appl. Earth Observations Remote Sens. 18, 6249\u20136265 (2025)","DOI":"10.1109\/JSTARS.2025.3534583"},{"key":"24_CR30","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TGRS.2020.3040221","volume":"60","author":"X Zheng","year":"2021","unstructured":"Zheng, X., Wang, B., Du, X., Lu, X.: Mutual attention inception network for remote sensing visual question answering. IEEE Trans. Geosci. Remote Sens. 60, 1\u201314 (2021)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"24_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2024.106781","volume":"181","author":"G Zhang","year":"2025","unstructured":"Zhang, G., Yuan, G., Cheng, D., et al.: Disentangled contrastive learning for fair graph representations. Neural Netw. 181, 106781 (2025)","journal-title":"Neural Netw."},{"key":"24_CR32","doi-asserted-by":"publisher","unstructured":"Zhang, G., Yuan, G., Cheng, D., et al.: Deconfounding representation learning for mitigating latent confounding effects in recommendation. Knowl. Inf. Syst., 1\u201322 (2025). https:\/\/doi.org\/10.1007\/s10115-025-02404-7","DOI":"10.1007\/s10115-025-02404-7"},{"issue":"4","key":"24_CR33","first-page":"1","volume":"18","author":"G Zhang","year":"2024","unstructured":"Zhang, G., Zhang, S., Yuan, G.: Bayesian graph local extrema convolution with long-tail strategy for misinformation detection. ACM Trans. Knowl. Discov. DataDiscov. Data 18(4), 1\u201321 (2024)","journal-title":"ACM Trans. Knowl. Discov. DataDiscov. Data"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-9849-3_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T05:44:51Z","timestamp":1780379091000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-9849-3_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819698486","9789819698493"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-9849-3_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"19 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ningbo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/icg\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}