{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T08:02:15Z","timestamp":1743148935526,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":42,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819786848"},{"type":"electronic","value":"9789819786855"}],"license":[{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8685-5_8","type":"book-chapter","created":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T05:04:56Z","timestamp":1730523896000},"page":"108-121","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["ACENet: Adaptive Context Enhancement Network for\u00a0RGB-T Video Object Detection"],"prefix":"10.1007","author":[{"given":"Zhengzheng","family":"Tu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Le","family":"Gu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Danying","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhicheng","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,3]]},"reference":[{"key":"8_CR1","doi-asserted-by":"crossref","unstructured":"Beery, S., Wu, G., Rathod, V., Votel, R., Huang, J.: Context r-CNN: long term temporal context for per-camera object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13075\u201313085 (2020)","DOI":"10.1109\/CVPR42600.2020.01309"},{"key":"8_CR2","doi-asserted-by":"crossref","unstructured":"Bottou, L.: Stochastic gradient descent tricks. In: Neural Networks: Tricks of the Trade, 2nd ed., pp. 421\u2013436. Springer (2012)","DOI":"10.1007\/978-3-642-35289-8_25"},{"key":"8_CR3","doi-asserted-by":"crossref","unstructured":"Cai, Z., Vasconcelos, N.: Cascade r-CNN: Delving into high quality object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6154\u20136162 (2018)","DOI":"10.1109\/CVPR.2018.00644"},{"issue":"6","key":"8_CR4","doi-asserted-by":"publisher","first-page":"6881","DOI":"10.1109\/TPAMI.2020.3047209","volume":"45","author":"Y Cao","year":"2020","unstructured":"Cao, Y., Xu, J., Lin, S., Wei, F., Hu, H.: Global context networks. IEEE Trans. Pattern Anal. Mach. Intell. 45(6), 6881\u20136895 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"8_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Y., Cao, Y., Hu, H., Wang, L.: Memory enhanced global-local aggregation for video object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10337\u201310346 (2020)","DOI":"10.1109\/CVPR42600.2020.01035"},{"key":"8_CR6","doi-asserted-by":"crossref","unstructured":"Deng, F., Feng, H., Liang, M., Wang, H., Yang, Y., Gao, Y., Chen, J., Hu, J., Guo, X., Lam, T.L.: Feanet: feature-enhanced attention network for RGB-thermal real-time semantic segmentation. In: 2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 4467\u20134473. IEEE (2021)","DOI":"10.1109\/IROS51168.2021.9636084"},{"issue":"4","key":"8_CR7","doi-asserted-by":"publisher","first-page":"2091","DOI":"10.1109\/TCSVT.2021.3082939","volume":"32","author":"W Gao","year":"2021","unstructured":"Gao, W., Liao, G., Ma, S., Li, G., Liang, Y., Lin, W.: Unified information fusion network for multi-modal RGB-d and RGB-t salient object detection. IEEE Trans. Circuits Syst. Video Technol. 32(4), 2091\u20132106 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"8_CR8","unstructured":"Ge, Z., Liu, S., Wang, F., Li, Z., Sun, J.: Yolox: exceeding yolo series in 2021 (2021). arXiv:2107.08430"},{"key":"8_CR9","doi-asserted-by":"crossref","unstructured":"Gong, T., Chen, K., Wang, X., Chu, Q., Zhu, F., Lin, D., Yu, N., Feng, H.: Temporal ROI align for video object recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 1442\u20131450 (2021)","DOI":"10.1609\/aaai.v35i2.16234"},{"key":"8_CR10","doi-asserted-by":"crossref","unstructured":"Han, M., Wang, Y., Chang, X., Qiao, Y.: Mining inter-video proposal relations for video object detection. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, 23\u201328 Aug. 2020, Proceedings, Part XXI 16, pp. 431\u2013446. Springer (2020)","DOI":"10.1007\/978-3-030-58589-1_26"},{"key":"8_CR11","doi-asserted-by":"crossref","unstructured":"He, F., Gao, N., Jia, J., Zhao, X., Huang, K.: Queryprop: object query propagation for high-performance video object detection. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a036, pp. 834\u2013842 (2022)","DOI":"10.1609\/aaai.v36i1.19965"},{"key":"8_CR12","doi-asserted-by":"crossref","unstructured":"He, L., Zhou, Q., Li, X., Niu, L., Cheng, G., Li, X., Liu, W., Tong, Y., Ma, L., Zhang, L.: End-to-end video object detection with spatial-temporal transformers. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 1507\u20131516 (2021)","DOI":"10.1145\/3474085.3475285"},{"key":"8_CR13","doi-asserted-by":"crossref","unstructured":"Hou, Q., Zhou, D., Feng, J.: Coordinate attention for efficient mobile network design. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13713\u201313722 (2021)","DOI":"10.1109\/CVPR46437.2021.01350"},{"issue":"8","key":"8_CR14","doi-asserted-by":"publisher","first-page":"3195","DOI":"10.1109\/TNNLS.2021.3053249","volume":"33","author":"L Jiao","year":"2021","unstructured":"Jiao, L., Zhang, R., Liu, F., Yang, S., Hou, B., Li, L., Tang, X.: New generation deep learning for video object detection: a survey. IEEE Trans. Neural Netw. Learn. Syst. 33(8), 3195\u20133215 (2021)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"8_CR15","unstructured":"Jocher, G.: ultralytics\/yolov5. https:\/\/github.com\/ultralytics\/yolov5"},{"key":"8_CR16","unstructured":"Li, C., Zhou, A., Yao, A.: Omni-dimensional dynamic convolution (2022). arXiv:2209.07947"},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Lin, Z., Lin, J., Zhu, L., Fu, H., Qin, J., Wang, L.: A new dataset and a baseline model for breast lesion detection in ultrasound videos. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 614\u2013623. Springer (2022)","DOI":"10.1007\/978-3-031-16437-8_59"},{"issue":"7","key":"8_CR18","doi-asserted-by":"publisher","first-page":"4486","DOI":"10.1109\/TCSVT.2021.3127149","volume":"32","author":"Z Liu","year":"2021","unstructured":"Liu, Z., Tan, Y., He, Q., Xiao, Y.: Swinnet: Swin transformer drives edge-aware RGB-d and RGB-t salient object detection. IEEE Trans. Circuits Syst. Video Technol. 32(7), 4486\u20134497 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"8_CR19","doi-asserted-by":"crossref","unstructured":"Qin, C., Cao, J., Fu, H., Anwer, R.M., Khan, F.S.: A spatial-temporal deformable attention based framework for breast lesion detection in videos. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 479\u2013488. Springer (2023)","DOI":"10.1007\/978-3-031-43895-0_45"},{"key":"8_CR20","unstructured":"Redmon, J., Farhadi, A.: Yolov3: an incremental improvement (2018). arXiv:1804.02767"},{"issue":"6","key":"8_CR21","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2016","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-CNN: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39(6), 1137\u20131149 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"8_CR22","doi-asserted-by":"crossref","unstructured":"Shi, Y., Wang, N., Guo, X.: Yolov: making still image object detectors great at video object detection. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 2254\u20132262 (2023)","DOI":"10.1609\/aaai.v37i2.25320"},{"key":"8_CR23","doi-asserted-by":"crossref","unstructured":"Sun, G., Hua, Y., Hu, G., Robertson, N.: Mamba: multi-level aggregation via memory bank for video object detection. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 2620\u20132627 (2021)","DOI":"10.1609\/aaai.v35i3.16365"},{"key":"8_CR24","doi-asserted-by":"crossref","unstructured":"Sun, P., Zhang, R., Jiang, Y., Kong, T., Xu, C., Zhan, W., Tomizuka, M., Li, L., Yuan, Z., Wang, C., et\u00a0al.: Sparse r-CNN: end-to-end object detection with learnable proposals. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14454\u201314463 (2021)","DOI":"10.1109\/CVPR46437.2021.01422"},{"key":"8_CR25","unstructured":"Tu, Z., Wang, Q., Wang, H., Wang, K., Li, C.: Erasure-based interaction network for RGBT video object detection and a unified benchmark (2023). arXiv:2308.01630"},{"key":"8_CR26","doi-asserted-by":"crossref","unstructured":"Wang, C.Y., Bochkovskiy, A., Liao, H.Y.M.: Yolov7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7464\u20137475 (2023)","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"8_CR27","doi-asserted-by":"crossref","unstructured":"Wang, G., Li, C., Ma, Y., Zheng, A., Tang, J., Luo, B.: RGB-T saliency detection benchmark: dataset, baselines, analysis and a novel approach. In: Image and Graphics Technologies and Applications: 13th Conference on Image and Graphics Technologies and Applications, IGTA 2018, Beijing, China, 8\u201310 Apr. 2018, Revised Selected Papers 13, pp. 359\u2013369. Springer (2018)","DOI":"10.1007\/978-981-13-1702-6_36"},{"issue":"5","key":"8_CR28","doi-asserted-by":"publisher","first-page":"2949","DOI":"10.1109\/TCSVT.2021.3099120","volume":"32","author":"J Wang","year":"2021","unstructured":"Wang, J., Song, K., Bao, Y., Huang, L., Yan, Y.: Cgfnet: cross-guided fusion network for RGB-T salient object detection. IEEE Trans. Circuits Syst. Video Technol. 32(5), 2949\u20132961 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"8_CR29","doi-asserted-by":"crossref","unstructured":"Wang, S., Zhou, Y., Yan, J., Deng, Z.: Fully motion-aware network for video object detection. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 542\u2013557 (2018)","DOI":"10.1007\/978-3-030-01261-8_33"},{"key":"8_CR30","doi-asserted-by":"crossref","unstructured":"Wu, H., Chen, Y., Wang, N., Zhang, Z.: Sequence level semantics aggregation for video object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9217\u20139225 (2019)","DOI":"10.1109\/ICCV.2019.00931"},{"key":"8_CR31","unstructured":"Xiao, J., Zhao, T., Yao, Y., Yu, Q., Chen, Y.: Context Augmentation and Feature Refinement Network for Tiny Object Detection (2022). https:\/\/openreview.net\/forum?id=q2ZaVU6bEsT"},{"issue":"14","key":"8_CR32","doi-asserted-by":"publisher","first-page":"16129","DOI":"10.1609\/aaai.v38i14.29546","volume":"38","author":"C Xu","year":"2024","unstructured":"Xu, C., Si, J., Guan, Z., Zhao, W., Wu, Y., Gao, X.: Reliable conflictive multi-view learning. Proc. AAAI Conf. Artif. Intell. 38(14), 16129\u201316137 (2024). https:\/\/doi.org\/10.1609\/aaai.v38i14.29546","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"8_CR33","unstructured":"Yu, F., Koltun, V.: Multi-scale context aggregation by dilated convolutions (2015). arXiv:1511.07122"},{"issue":"8","key":"8_CR34","doi-asserted-by":"publisher","first-page":"2375","DOI":"10.1007\/s11263-021-01465-9","volume":"129","author":"Y Yuan","year":"2021","unstructured":"Yuan, Y., Huang, L., Guo, J., Zhang, C., Chen, X., Wang, J.: Ocnet: object context for semantic segmentation. Int. J. Comput. Vision 129(8), 2375\u20132398 (2021)","journal-title":"Int. J. Comput. Vision"},{"key":"8_CR35","first-page":"1","volume":"61","author":"J Zhang","year":"2023","unstructured":"Zhang, J., Lei, J., Xie, W., Fang, Z., Li, Y., Du, Q.: Superyolo: super resolution assisted object detection in multimodal remote sensing imagery. IEEE Trans. Geosci. Remote Sens. 61, 1\u201315 (2023)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"8_CR36","doi-asserted-by":"crossref","unstructured":"Zhang, X., Zhang, X., Du, X., Zhou, X., Yin, J.: Learning multi-domain convolutional network for RGB-T visual tracking. In: 2018 11th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI), pp.\u00a01\u20136. IEEE (2018)","DOI":"10.1109\/CISP-BMEI.2018.8633180"},{"key":"8_CR37","doi-asserted-by":"publisher","first-page":"5264","DOI":"10.1109\/TIP.2021.3079821","volume":"30","author":"S Zhao","year":"2021","unstructured":"Zhao, S., Gong, M., Fu, H., Tao, D.: Adaptive context-aware multi-modal network for depth completion. IEEE Trans. Image Process. 30, 5264\u20135276 (2021)","journal-title":"IEEE Trans. Image Process."},{"key":"8_CR38","doi-asserted-by":"crossref","unstructured":"Zhou, Q., Li, X., He, L., Yang, Y., Cheng, G., Tong, Y., Ma, L., Tao, D.: Transvod: end-to-end video object detection with spatial-temporal transformers. IEEE Trans. Pattern Anal. Mach. Intell. (2022)","DOI":"10.1109\/TPAMI.2022.3223955"},{"issue":"3","key":"8_CR39","doi-asserted-by":"publisher","first-page":"1224","DOI":"10.1109\/TCSVT.2021.3077058","volume":"32","author":"W Zhou","year":"2021","unstructured":"Zhou, W., Guo, Q., Lei, J., Yu, L., Hwang, J.N.: Ecffnet: effective and consistent feature fusion network for RGB-T salient object detection. IEEE Trans. Circuits Syst. Video Technol. 32(3), 1224\u20131235 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"8_CR40","doi-asserted-by":"crossref","unstructured":"Zhu, H., Wei, H., Li, B., Yuan, X., Kehtarnavaz, N.: A review of video object detection: datasets, metrics and methods. Appl. Sci. 10(21), 7834 (2020)","DOI":"10.3390\/app10217834"},{"key":"8_CR41","doi-asserted-by":"crossref","unstructured":"Zhu, X., Wang, Y., Dai, J., Yuan, L., Wei, Y.: Flow-guided feature aggregation for video object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 408\u2013417 (2017)","DOI":"10.1109\/ICCV.2017.52"},{"key":"8_CR42","doi-asserted-by":"crossref","unstructured":"Zhu, X., Xiong, Y., Dai, J., Yuan, L., Wei, Y.: Deep feature flow for video recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2349\u20132358 (2017)","DOI":"10.1109\/CVPR.2017.441"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8685-5_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T05:17:07Z","timestamp":1730524627000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8685-5_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,3]]},"ISBN":["9789819786848","9789819786855"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8685-5_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,3]]},"assertion":[{"value":"3 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}