{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T04:01:27Z","timestamp":1743048087650,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":28,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819985548"},{"type":"electronic","value":"9789819985555"}],"license":[{"start":{"date-parts":[[2023,12,28]],"date-time":"2023-12-28T00:00:00Z","timestamp":1703721600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,28]],"date-time":"2023-12-28T00:00:00Z","timestamp":1703721600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8555-5_7","type":"book-chapter","created":{"date-parts":[[2023,12,27]],"date-time":"2023-12-27T07:02:36Z","timestamp":1703660556000},"page":"81-93","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Modality Balancing Mechanism for\u00a0RGB-Infrared Object Detection in\u00a0Aerial Image"],"prefix":"10.1007","author":[{"given":"Weibo","family":"Cai","sequence":"first","affiliation":[]},{"given":"Zheng","family":"Li","sequence":"additional","affiliation":[]},{"given":"Junhao","family":"Dong","sequence":"additional","affiliation":[]},{"given":"Jianhuang","family":"Lai","sequence":"additional","affiliation":[]},{"given":"Xiaohua","family":"Xie","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,28]]},"reference":[{"key":"7_CR1","doi-asserted-by":"publisher","unstructured":"Bao, W., Huang, M., Hu, J., Xiang, X.: Attention-guided multi-modal and multi-scale fusion for multispectral pedestrian detection. In: Pattern Recognition and Computer Vision: 5th Chinese Conference, PRCV 2022, Shenzhen, China, 4\u20137 November 2022, Proceedings, Part I, pp. 382\u2013393. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-18907-4_30","DOI":"10.1007\/978-3-031-18907-4_30"},{"key":"7_CR2","unstructured":"Chen, K., et al.: Mmdetection: open mmlab detection toolbox and benchmark. arXiv preprint arXiv:1906.07155 (2019)"},{"key":"7_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.aei.2021.101303","volume":"49","author":"Q Chen","year":"2021","unstructured":"Chen, Q., Huang, Y., Sun, H., Huang, W.: Pavement crack detection using hessian structure propagation. Adv. Eng. Inf. 49, 101303 (2021)","journal-title":"Adv. Eng. Inf."},{"key":"7_CR4","doi-asserted-by":"crossref","unstructured":"Cheng, G., Yuan, X., Yao, X., Yan, K., Zeng, Q., Han, J.: Towards large-scale small object detection: survey and benchmarks. arXiv preprint arXiv:2207.14096 (2022)","DOI":"10.1109\/TPAMI.2023.3290594"},{"key":"7_CR5","doi-asserted-by":"crossref","unstructured":"Ding, J., Xue, N., Long, Y., Xia, G.S., Lu, Q.: Learning ROI transformer for oriented object detection in aerial images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2849\u20132858 (2019)","DOI":"10.1109\/CVPR.2019.00296"},{"issue":"11","key":"7_CR6","doi-asserted-by":"publisher","first-page":"7778","DOI":"10.1109\/TPAMI.2021.3117983","volume":"44","author":"J Ding","year":"2021","unstructured":"Ding, J.: Object detection in aerial images: a large-scale benchmark and challenges. IEEE Trans. Pattern Anal. Mach. Intell. 44(11), 7778\u20137796 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"7_CR7","unstructured":"Du, C., et al.: On uni-modal feature learning in supervised multi-modal learning. arXiv preprint arXiv:2305.01233 (2023)"},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Fu, H., et al.: LRAF-Net: long-range attention fusion network for visible-infrared object detection. IEEE Trans. Neural Netw. Learn. Syst. (2023)","DOI":"10.1109\/TNNLS.2023.3266452"},{"key":"7_CR9","first-page":"1","volume":"60","author":"J Han","year":"2021","unstructured":"Han, J., Ding, J., Li, J., Xia, G.S.: Align deep features for oriented object detection. IEEE Trans. Geosci. Remote Sens. 60, 1\u201311 (2021)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"7_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"7_CR11","unstructured":"Huang, Y., Lin, J., Zhou, C., Yang, H., Huang, L.: Modality competition: what makes joint training of multi-modal network fail in deep learning?(provably). In: International Conference on Machine Learning, pp. 9226\u20139259. PMLR (2022)"},{"key":"7_CR12","doi-asserted-by":"crossref","unstructured":"Jia, X., Zhu, C., Li, M., Tang, W., Zhou, W.: LLVIP: a visible-infrared paired dataset for low-light vision. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3496\u20133504 (2021)","DOI":"10.1109\/ICCVW54120.2021.00389"},{"key":"7_CR13","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2021.103736","volume":"128","author":"K Kim","year":"2021","unstructured":"Kim, K., Kim, S., Shchur, D.: A UAS-based work zone safety monitoring system by integrating internal traffic control plan (ITCP) and automated object detection in game engine environment. Autom. Constr. 128, 103736 (2021)","journal-title":"Autom. Constr."},{"key":"7_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108614","volume":"127","author":"S Li","year":"2022","unstructured":"Li, S., Liu, Y., Zhao, Q., Feng, Z.: Learning residue-aware correlation filters and refining scale for real-time UAV tracking. Pattern Recogn. 127, 108614 (2022)","journal-title":"Pattern Recogn."},{"key":"7_CR15","doi-asserted-by":"crossref","unstructured":"Liang, P.P., Zadeh, A., Morency, L.P.: Foundations and recent trends in multimodal machine learning: principles, challenges, and open questions. arXiv preprint arXiv:2209.03430 (2022)","DOI":"10.1145\/3610661.3617602"},{"key":"7_CR16","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"7_CR17","unstructured":"Qingyun, F., Dapeng, H., Zhaokui, W.: Cross-modality fusion transformer for multispectral object detection. arXiv preprint arXiv:2111.00273 (2021)"},{"key":"7_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108786","volume":"130","author":"F Qingyun","year":"2022","unstructured":"Qingyun, F., Zhaokui, W.: Cross-modality attentive feature fusion for object detection in multispectral remote sensing imagery. Pattern Recogn. 130, 108786 (2022)","journal-title":"Pattern Recogn."},{"key":"7_CR19","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: towards real-time object detection with region proposal networks. Adv. Neural Inf. Process. Syst. 28 (2015)"},{"issue":"10","key":"7_CR20","doi-asserted-by":"publisher","first-page":"6700","DOI":"10.1109\/TCSVT.2022.3168279","volume":"32","author":"Y Sun","year":"2022","unstructured":"Sun, Y., Cao, B., Zhu, P., Hu, Q.: Drone-based RGB-infrared cross-modality vehicle detection via uncertainty-aware learning. IEEE Trans. Circuits Syst. Video Technol. 32(10), 6700\u20136713 (2022)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"7_CR21","first-page":"36161","volume":"35","author":"J Wu","year":"2022","unstructured":"Wu, J., Liang, Y., Akbari, H., Wang, Z., Yu, C., et al.: Scaling multimodal pre-training via cross-modality gradient harmonization. Adv. Neural. Inf. Process. Syst. 35, 36161\u201336173 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"7_CR22","doi-asserted-by":"crossref","unstructured":"Xie, J., et al.: Learning a dynamic cross-modal network for multispectral pedestrian detection. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 4043\u20134052 (2022)","DOI":"10.1145\/3503161.3547895"},{"key":"7_CR23","doi-asserted-by":"crossref","unstructured":"Xie, X., Cheng, G., Wang, J., Yao, X., Han, J.: Oriented R-CNN for object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3520\u20133529 (2021)","DOI":"10.1109\/ICCV48922.2021.00350"},{"key":"7_CR24","doi-asserted-by":"publisher","unstructured":"Yuan, M., Wang, Y., Wei, X.: Translation, scale and rotation: cross-modal alignment meets RGB-infrared vehicle detection. In: Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, 23\u201327 October 2022, Proceedings, Part IX, pp. 509\u2013525. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-20077-9_30","DOI":"10.1007\/978-3-031-20077-9_30"},{"key":"7_CR25","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1016\/j.inffus.2018.09.015","volume":"50","author":"L Zhang","year":"2019","unstructured":"Zhang, L., et al.: Cross-modality interactive attention network for multispectral pedestrian detection. Inf. Fusion 50, 20\u201329 (2019)","journal-title":"Inf. Fusion"},{"key":"7_CR26","doi-asserted-by":"crossref","unstructured":"Zhang, L., Zhu, X., Chen, X., Yang, X., Lei, Z., Liu, Z.: Weakly aligned cross-modal learning for multispectral pedestrian detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5127\u20135137 (2019)","DOI":"10.1109\/ICCV.2019.00523"},{"key":"7_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"787","DOI":"10.1007\/978-3-030-58523-5_46","volume-title":"Computer Vision \u2013 ECCV 2020","author":"K Zhou","year":"2020","unstructured":"Zhou, K., Chen, L., Cao, X.: Improving multispectral pedestrian detection by addressing modality imbalance problems. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12363, pp. 787\u2013803. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58523-5_46"},{"key":"7_CR28","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1007\/s41095-020-0199-z","volume":"7","author":"T Zhou","year":"2021","unstructured":"Zhou, T., Fan, D.P., Cheng, M.M., Shen, J., Shao, L.: RGB-D salient object detection: a survey. Comput. Visual Media 7, 37\u201369 (2021)","journal-title":"Comput. Visual Media"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8555-5_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,27]],"date-time":"2023-12-27T07:04:19Z","timestamp":1703660659000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8555-5_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,28]]},"ISBN":["9789819985548","9789819985555"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8555-5_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023,12,28]]},"assertion":[{"value":"28 December 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Xiamen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/prcv2023.xmu.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1420","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"532","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,78","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,69","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}