{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T15:40:44Z","timestamp":1770392444762,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":28,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819557578","type":"print"},{"value":"9789819557585","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5758-5_12","type":"book-chapter","created":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T04:59:42Z","timestamp":1770353982000},"page":"145-159","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["CAFNet: A Cross-Modal Alignment and\u00a0Fusion Framework for\u00a0Misaligned RGB-T Video Object Detection"],"prefix":"10.1007","author":[{"given":"Yu","family":"Fan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Keke","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,7]]},"reference":[{"key":"12_CR1","unstructured":"Zhao, Z.-Q., Zheng, P., Xu, S.-T., Wu, X.: Object detection with deep learning: a review. arXiv preprint arXiv:1807.05511 (2018)"},{"key":"12_CR2","doi-asserted-by":"publisher","first-page":"126804","DOI":"10.1016\/j.neucom.2023.126804","volume":"560","author":"N Bustos","year":"2023","unstructured":"Bustos, N., Mashhadi, M., Lai-Yuen, S.K., Sarkar, S., Das, T.K.: A systematic literature review on object detection using near infrared and thermal images. Neurocomputing 560, 126804 (2023)","journal-title":"Neurocomputing"},{"key":"12_CR3","unstructured":"Tu, Z., Wang, Q., Wang, H., Wang, K., Li, C.: Erasure-based interaction network for RGB-T video object detection and a unified benchmark. arXiv preprint arXiv:2308.01630 (2023)"},{"key":"12_CR4","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1007\/s00138-013-0570-5","volume":"25","author":"R Gade","year":"2014","unstructured":"Gade, R., Moeslund, T.B.: Thermal cameras and applications: a survey. Mach. Vis. Appl. 25, 245\u2013262 (2014)","journal-title":"Mach. Vis. Appl."},{"key":"12_CR5","unstructured":"Wang, Q., Tu, Z., Wang, K., Gu, L., Guo, C.: Unveiling the limits of alignment: multi-modal dynamic local fusion network and a benchmark for unaligned RGB-T video object detection. arXiv preprint arXiv:2410.12143 (2024)"},{"key":"12_CR6","doi-asserted-by":"crossref","unstructured":"Wang, K., Lin, D., Li, C., Tu, Z., Luo, B.: Alignment-free RGB-T salient object detection: semantics-guided asymmetric correlation network and a unified benchmark. IEEE Trans. Multimedia (2024)","DOI":"10.1109\/TMM.2024.3410542"},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"Zhang, L., Zhu, X., Chen, X., Yang, X., Lei, Z., Liu, Z.: Weakly aligned cross-modal learning for multispectral pedestrian detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5127\u20135137 (2019)","DOI":"10.1109\/ICCV.2019.00523"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Wanchaitanawong, N., Tanaka, M., Shibata, T., Okutomi, M.: Multi-modal pedestrian detection with large misalignment based on modal-wise regression and multi-modal IoU. In: 2021 17th International Conference on Machine Vision and Applications (MVA), pp. 1\u20136. IEEE (2021)","DOI":"10.23919\/MVA51890.2021.9511366"},{"key":"12_CR9","doi-asserted-by":"publisher","first-page":"102246","DOI":"10.1016\/j.inffus.2024.102246","volume":"105","author":"M Yuan","year":"2024","unstructured":"Yuan, M., Shi, X., Wang, N., Wang, Y., Wei, X.: Improving RGB-Infrared object detection with cascade alignment-guided transformer. Inf. Fus. 105, 102246 (2024)","journal-title":"Inf. Fus."},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Chen, C., et al.: Weakly misalignment-free adaptive feature alignment for UAVs-based multi-modal object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 26836\u201326845 (2024)","DOI":"10.1109\/CVPR52733.2024.02534"},{"issue":"10","key":"12_CR11","doi-asserted-by":"publisher","first-page":"6700","DOI":"10.1109\/TCSVT.2022.3168279","volume":"32","author":"Y Sun","year":"2022","unstructured":"Sun, Y., Cao, B., Zhu, P., Hu, Q.: Drone-based RGB-Infrared cross-modality vehicle detection via uncertainty-aware learning. IEEE Trans. Circ. Syst. Video Technol. 32(10), 6700\u20136713 (2022)","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"key":"12_CR12","unstructured":"Wang, Q., Tu, Z., Li, C., Jiang, B.: Multimodal spatio-temporal graph learning for alignment-free RGB-T video object detection. arXiv preprint arXiv:2504.11779 (2025)"},{"key":"12_CR13","unstructured":"Veli\u010dkovi\u0107, P., Cucurull, G., Casanova, A., Romero, A., Lio, P., Bengio, Y.: Graph attention networks. arXiv preprint arXiv:1710.10903 (2017)"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Nag, S.: Image registration techniques: a survey. arXiv preprint arXiv:1712.07540 (2017)","DOI":"10.31224\/osf.io\/rv65c"},{"key":"12_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1007\/978-3-319-66182-7_35","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2017","author":"X Cao","year":"2017","unstructured":"Cao, X., et al.: Deformable image registration based on similarity-steered CNN regression. In: Descoteaux, M., Maier-Hein, L., Franz, A., Jannin, P., Collins, D.L., Duchesne, S. (eds.) MICCAI 2017. LNCS, vol. 10433, pp. 300\u2013308. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-66182-7_35"},{"key":"12_CR16","unstructured":"Zhang, L., et al.: Weakly aligned feature fusion for multimodal object detection. IEEE Trans. Neural Netw. Learn. Syst. (2021)"},{"key":"12_CR17","doi-asserted-by":"publisher","unstructured":"Yuan, M., Wang, Y., Wei, X.: Translation, scale and rotation: Cross-modal alignment meets RGB-Infrared vehicle detection. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) European Conference on Computer Vision. LNCS, pp. 509\u2013525. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20077-9_30","DOI":"10.1007\/978-3-031-20077-9_30"},{"key":"12_CR18","doi-asserted-by":"crossref","unstructured":"Zhu, X., Xiong, Y., Dai, J., Yuan, L., Wei, Y.: Deep feature flow for video recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2349\u20132358 (2017)","DOI":"10.1109\/CVPR.2017.441"},{"key":"12_CR19","doi-asserted-by":"crossref","unstructured":"Zhu, X., Wang, Y., Dai, J., Yuan, L., Wei, Y.: Flow-guided feature aggregation for video object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 408\u2013417 (2017)","DOI":"10.1109\/ICCV.2017.52"},{"key":"12_CR20","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Zisserman, A.: Detect to track and track to detect. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3038\u20133046 (2017)","DOI":"10.1109\/ICCV.2017.330"},{"key":"12_CR21","first-page":"201","volume":"1","author":"H Mao","year":"2019","unstructured":"Mao, H., Kong, T., et al.: CatDet: cascaded tracked detector for efficient object detection from video. Proc. Mach. Learn. Syst. 1, 201\u2013211 (2019)","journal-title":"Proc. Mach. Learn. Syst."},{"key":"12_CR22","doi-asserted-by":"crossref","unstructured":"Xiao, F., Lee, Y. J.: Video object detection with an aligned spatial-temporal memory. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 485\u2013501 (2018)","DOI":"10.1007\/978-3-030-01237-3_30"},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"Guo, C., et al.: Progressive sparse local attention for video object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3909\u20133918 (2019)","DOI":"10.1109\/ICCV.2019.00401"},{"key":"12_CR24","doi-asserted-by":"crossref","unstructured":"Chen, Y., Cao, Y., Hu, H., Wang, L.: Memory enhanced global-local aggregation for video object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10337\u201310346 (2020)","DOI":"10.1109\/CVPR42600.2020.01035"},{"issue":"6","key":"12_CR25","doi-asserted-by":"publisher","first-page":"7853","DOI":"10.1109\/TPAMI.2022.3223955","volume":"45","author":"Q Zhou","year":"2022","unstructured":"Zhou, Q., et al.: TransVOD: end-to-end video object detection with spatial-temporal transformers. IEEE Trans. Pattern Anal. Mach. Intell. 45(6), 7853\u20137869 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"12_CR26","doi-asserted-by":"crossref","unstructured":"Sun, D., Yang, X., Liu, M.-Y., Kautz, J.: PWC-Net: CNNs for optical flow using pyramid, warping, and cost volume. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8934\u20138943 (2018)","DOI":"10.1109\/CVPR.2018.00931"},{"key":"12_CR27","unstructured":"Hendrycks, D., Gimpel, K.: Gaussian Error Linear Units (GELUs). arXiv preprint arXiv:1606.08415 (2016)"},{"key":"12_CR28","doi-asserted-by":"publisher","unstructured":"Bottou, L.: Large-scale machine learning with stochastic gradient descent. In: Lechevallier, Y., Saporta, G. (eds.) Proceedings of COMPSTAT\u20192010: 19th International Conference on Computational Statistics, Paris, France, 22\u201327 August 2010, Keynote, Invited and Contributed Papers, pp. 177\u2013186. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-7908-2604-3_16","DOI":"10.1007\/978-3-7908-2604-3_16"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5758-5_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T04:59:55Z","timestamp":1770353995000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5758-5_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819557578","9789819557585"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5758-5_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"7 February 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}