{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T15:57:54Z","timestamp":1768233474801,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":35,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819557547","type":"print"},{"value":"9789819557554","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5755-4_40","type":"book-chapter","created":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T12:32:56Z","timestamp":1768221176000},"page":"583-597","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Adaptive Local Fine-Grained Feature Learning for\u00a0Multimodal UAV-Based Vehicle Detection"],"prefix":"10.1007","author":[{"given":"Jingjing","family":"Wan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Meng","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,13]]},"reference":[{"key":"40_CR1","doi-asserted-by":"crossref","unstructured":"Li, Z., et al.: Is ego status all you need for open-loop end-to-end autonomous driving? In: CVPR, pp. 14864\u201314873 (2024)","DOI":"10.1109\/CVPR52733.2024.01408"},{"key":"40_CR2","doi-asserted-by":"crossref","unstructured":"Upadhye, S., Neelakandan, S., Thangaraj, K., Babu, D.V., Arulkumar, N., Qureshi, K.: Modeling of real time traffic flow monitoring system using deep learning and unmanned aerial vehicles. J. Mobile Multimedia 477\u2013496 (2023)","DOI":"10.13052\/jmm1550-4646.1926"},{"key":"40_CR3","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: ICCV, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"40_CR4","doi-asserted-by":"crossref","unstructured":"Yuan, M., Wei, X.: C2Former: calibrated and complementary transformer for RGB-infrared object detection. IEEE Trans. Geosci. Remote Sens. (2024)","DOI":"10.1109\/TGRS.2024.3376819"},{"key":"40_CR5","doi-asserted-by":"crossref","unstructured":"Xie, X., Cheng, G., Wang, J., Yao, X., Han, J.: Oriented R-CNN for object detection. In: ICCV, pp. 3520\u20133529 (2021)","DOI":"10.1109\/ICCV48922.2021.00350"},{"key":"40_CR6","first-page":"1","volume":"60","author":"J Han","year":"2021","unstructured":"Han, J., Ding, J., Li, J., Xia, G.-S.: Align deep features for oriented object detection. IEEE Trans. Geosci. Remote Sens. 60, 1\u201311 (2021)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"40_CR7","doi-asserted-by":"crossref","unstructured":"He, X., Tang, C., Zou, X., Zhang, W.: Multispectral object detection via cross-modal conflict-aware learning. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 1465\u20131474 (2023)","DOI":"10.1145\/3581783.3612651"},{"key":"40_CR8","doi-asserted-by":"crossref","unstructured":"Chen, C., et al.: Weakly misalignment-free adaptive feature alignment for UAVs-based multimodal object detection. In: CVPR, pp. 26836\u201326845 (2024)","DOI":"10.1109\/CVPR52733.2024.02534"},{"key":"40_CR9","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: ICML, pp. 1597\u20131607. PMLR (2020)"},{"key":"40_CR10","doi-asserted-by":"publisher","unstructured":"Yuan, M., Wang, Y., Wei, X.: Translation, scale and rotation: cross-modal alignment meets RGB-infrared vehicle detection. In: ECCV, pp. 509\u2013525. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-20077-9_30","DOI":"10.1007\/978-3-031-20077-9_30"},{"key":"40_CR11","doi-asserted-by":"crossref","unstructured":"Sun, Z., Feng, C., Patras, I., Tzimiropoulos, G.: LAFS: landmark-based facial self-supervised learning for face recognition. In: CVPR, pp. 1639\u20131649 (2024)","DOI":"10.1109\/CVPR52733.2024.00162"},{"key":"40_CR12","doi-asserted-by":"crossref","unstructured":"Wang, J., et al.: Improving the robustness of knowledge-grounded dialogue via contrastive learning. In: AAAI, vol. 38, no. 17, pp. 19135\u201319143 (2024)","DOI":"10.1609\/aaai.v38i17.29881"},{"issue":"9","key":"40_CR13","doi-asserted-by":"publisher","first-page":"9984","DOI":"10.1109\/TITS.2023.3266487","volume":"24","author":"Y Zhu","year":"2023","unstructured":"Zhu, Y., Sun, X., Wang, M., Zhang, D., Ren, Y., Li, Y.: Multi-modal feature pyramid transformer for RGB-infrared object detection. IEEE Trans. Intell. Transp. Syst. 24(9), 9984\u20139995 (2023)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"40_CR14","doi-asserted-by":"crossref","unstructured":"Li, J., Wen, Y., He, L.: ScConv: spatial and channel reconstruction convolution for feature redundancy. In: CVPR, pp. 6153\u20136162 (2023)","DOI":"10.1109\/CVPR52729.2023.00596"},{"key":"40_CR15","doi-asserted-by":"crossref","unstructured":"Chen, L., Gu, L., Zheng, D., Fu, Y.: Frequency-adaptive dilated convolution for semantic segmentation. In: CVPR, pp. 3414\u20133425 (2024)","DOI":"10.1109\/CVPR52733.2024.00328"},{"key":"40_CR16","doi-asserted-by":"crossref","unstructured":"Lin, T.: Focal loss for dense object detection. arXiv preprint arXiv:1708.02002 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"40_CR17","doi-asserted-by":"crossref","unstructured":"Han, J., Ding, J., Xue, N., Xia, G.-S.: ReDet: a rotation-equivariant detector for aerial object detection. In: CVPR, pp. 2786\u20132795 (2021)","DOI":"10.1109\/CVPR46437.2021.00281"},{"issue":"10","key":"40_CR18","doi-asserted-by":"publisher","first-page":"6700","DOI":"10.1109\/TCSVT.2022.3168279","volume":"32","author":"Y Sun","year":"2022","unstructured":"Sun, Y., Cao, B., Zhu, P., Hu, Q.: Drone-based RGB-infrared cross-modality vehicle detection via uncertainty-aware learning. IEEE Trans. Circuits Syst. Video Technol. 32(10), 6700\u20136713 (2022)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"40_CR19","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102246","volume":"105","author":"M Yuan","year":"2024","unstructured":"Yuan, M., Shi, X., Wang, N., Wang, Y., Wei, X.: Improving RGB-infrared object detection with cascade alignment-guided transformer. Inf. Fusion 105, 102246 (2024)","journal-title":"Inf. Fusion"},{"key":"40_CR20","doi-asserted-by":"crossref","unstructured":"Cai, Y., Bian, H., Lin, J., Wang, H., Timofte, R., Zhang, Y.: Retinexformer: one-stage retinex-based transformer for low-light image enhancement. In: ICCV, pp. 12504\u201312513 (2023)","DOI":"10.1109\/ICCV51070.2023.01149"},{"issue":"24","key":"40_CR21","doi-asserted-by":"publisher","first-page":"5128","DOI":"10.3390\/rs13245128","volume":"13","author":"X Zhang","year":"2021","unstructured":"Zhang, X., Leng, C., Hong, Y., Pei, Z., Cheng, I., Basu, A.: Multimodal remote sensing image registration methods and advancements: a survey. Remote Sens. 13(24), 5128 (2021)","journal-title":"Remote Sens."},{"key":"40_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1007\/978-3-319-66182-7_35","volume-title":"Medical Image Computing and Computer Assisted Intervention - MICCAI 2017","author":"X Cao","year":"2017","unstructured":"Cao, X., et al.: Deformable image registration based on similarity-steered CNN regression. In: Descoteaux, M., Maier-Hein, L., Franz, A., Jannin, P., Collins, D.L., Duchesne, S. (eds.) MICCAI 2017. LNCS, vol. 10433, pp. 300\u2013308. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-66182-7_35"},{"key":"40_CR23","doi-asserted-by":"publisher","first-page":"166","DOI":"10.1016\/j.isprsjprs.2020.09.012","volume":"169","author":"LH Hughes","year":"2020","unstructured":"Hughes, L.H., Marcos, D., Lobry, S., Tuia, D., Schmitt, M.: A deep learning framework for matching of SAR and optical imagery. ISPRS J. Photogramm. Remote. Sens. 169, 166\u2013179 (2020)","journal-title":"ISPRS J. Photogramm. Remote. Sens."},{"issue":"11","key":"40_CR24","doi-asserted-by":"publisher","first-page":"7380","DOI":"10.1109\/TPAMI.2021.3119563","volume":"44","author":"P Zhu","year":"2021","unstructured":"Zhu, P., et al.: Detection and tracking meet drones challenge. IEEE Trans. Pattern Anal. Mach. Intell. 44(11), 7380\u20137399 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"40_CR25","doi-asserted-by":"crossref","unstructured":"Yang, S., Ding, M., Wu, Y., Li, Z., Zhang, J.: Implicit neural representation for cooperative low-light image enhancement. In: ICCV, pp. 12918\u201312927 (2023)","DOI":"10.1109\/ICCV51070.2023.01187"},{"issue":"1","key":"40_CR26","doi-asserted-by":"publisher","first-page":"315","DOI":"10.1109\/TCSVT.2021.3060162","volume":"32","author":"T Liu","year":"2021","unstructured":"Liu, T., Lam, K.M., Zhao, R., Qiu, G.: Deep cross-modal representation learning and distillation for illumination-invariant pedestrian detection. IEEE Trans. Circuits Syst. Video Technol. 32(1), 315\u2013329 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"40_CR27","doi-asserted-by":"crossref","unstructured":"Zhang, P., Wang, Y., Liu, Y., Tu, Z., Lu, H.: Magic tokens: select diverse tokens for multi-modal object re-identification. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17117\u201317126 (2024)","DOI":"10.1109\/CVPR52733.2024.01620"},{"issue":"10","key":"40_CR28","doi-asserted-by":"publisher","first-page":"4058","DOI":"10.1109\/TCSVT.2020.3045747","volume":"31","author":"P Zhu","year":"2020","unstructured":"Zhu, P., Zheng, J., Du, D., Wen, L., Sun, Y., Hu, Q.: Multi-drone-based single object tracking with agent sharing network. IEEE Trans. Circuits Syst. Video Technol. 31(10), 4058\u20134070 (2020)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"40_CR29","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"40_CR30","first-page":"1","volume":"62","author":"J Cheng","year":"2024","unstructured":"Cheng, J., et al.: DIMA: digging into multigranular archetype for fine-grained object detection. IEEE Trans. Geosci. Remote Sens. 62, 1\u201314 (2024)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"40_CR31","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"787","DOI":"10.1007\/978-3-030-58523-5_46","volume-title":"Computer Vision \u2013 ECCV 2020","author":"K Zhou","year":"2020","unstructured":"Zhou, K., Chen, L., Cao, X.: Improving multispectral pedestrian detection by addressing modality imbalance problems. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12363, pp. 787\u2013803. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58523-5_46"},{"key":"40_CR32","doi-asserted-by":"crossref","unstructured":"Xie, E., et al.: DetCo: unsupervised contrastive learning for object detection. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8392\u20138401 (2021)","DOI":"10.1109\/ICCV48922.2021.00828"},{"key":"40_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.110043","volume":"146","author":"H Zhou","year":"2024","unstructured":"Zhou, H., Tian, C., Zhang, Z., Li, C., Xie, Y., Li, Z.: Frequency-aware feature aggregation network with dual-task consistency for RGB-T salient object detection. Pattern Recogn. 146, 110043 (2024)","journal-title":"Pattern Recogn."},{"key":"40_CR34","doi-asserted-by":"crossref","unstructured":"Cong, R., Sun, M., Zhang, S., Zhou, X., Zhang, W., Zhao, Y.: Frequency perception network for camouflaged object detection. In: Proceedings of ACM International Conference on Multimedia, pp. 1179\u20131189 (2023)","DOI":"10.1145\/3581783.3612083"},{"key":"40_CR35","doi-asserted-by":"crossref","unstructured":"Zhang, T., Guo, H., Jiao, Q., Liu, Z., Li, Q.: Efficient RGB-T tracking via cross-modality distillation. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5404\u20135413 (2023)","DOI":"10.1109\/CVPR52729.2023.00523"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5755-4_40","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T12:33:18Z","timestamp":1768221198000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5755-4_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819557547","9789819557554"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5755-4_40","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"13 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}