{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T18:04:36Z","timestamp":1774029876821,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":31,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819609598","type":"print"},{"value":"9789819609604","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-0960-4_13","type":"book-chapter","created":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T07:35:42Z","timestamp":1733556942000},"page":"206-221","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["ESM-YOLO: Enhanced Small Target Detection Based on\u00a0Visible and\u00a0Infrared Multi-modal Fusion"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-5566-786X","authenticated-orcid":false,"given":"Qianqian","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9083-7266","authenticated-orcid":false,"given":"Linwei","family":"Qiu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3279-8945","authenticated-orcid":false,"given":"Li","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junshe","family":"An","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,8]]},"reference":[{"key":"13_CR1","doi-asserted-by":"publisher","first-page":"5733","DOI":"10.1109\/TIP.2022.3201476","volume":"31","author":"C An","year":"2022","unstructured":"An, C., Wang, Y., Zhang, J., Nguyen, T.Q.: Self-supervised rigid registration for multimodal retinal images. IEEE Trans. Image Process. 31, 5733\u20135747 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"13_CR2","doi-asserted-by":"crossref","unstructured":"Bai, Y., Zhang, Y., Ding, M., Ghanem, B.: Sod-mtgan: Small object detection via multi-task generative adversarial network. In: Proceedings of the European conference on computer vision (ECCV). pp. 206\u2013221 (2018)","DOI":"10.1007\/978-3-030-01261-8_13"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Bai, Y.: Relu-function and derived function review. In: SHS Web of Conferences. vol.\u00a0144, p. 02006. EDP Sciences (2022)","DOI":"10.1051\/shsconf\/202214402006"},{"key":"13_CR4","unstructured":"Bochkovskiy, A., Wang, C.Y., Liao, H.Y.M.: Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934 (2020)"},{"key":"13_CR5","doi-asserted-by":"crossref","unstructured":"Bogdoll, D., Nitsche, M., Z\u00f6llner, J.M.: Anomaly detection in autonomous driving: A survey. In: CVPR. pp. 4488\u20134499 (2022)","DOI":"10.1109\/CVPRW56347.2022.00495"},{"key":"13_CR6","doi-asserted-by":"crossref","unstructured":"Bottou, L.: Large-scale machine learning with stochastic gradient descent. In: Proceedings of COMPSTAT\u20192010: 19th International Conference on Computational StatisticsParis France, August 22-27, 2010 Keynote, Invited and Contributed Papers. pp. 177\u2013186. Springer (2010)","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"13_CR7","doi-asserted-by":"crossref","unstructured":"Cheng, G., Yuan, X., Yao, X., Yan, K., Zeng, Q., Xie, X., Han, J.: Towards large-scale small object detection: Survey and benchmarks. IEEE Transactions on Pattern Analysis and Machine Intelligence (2023)","DOI":"10.1109\/TPAMI.2023.3290594"},{"key":"13_CR8","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.neunet.2017.12.012","volume":"107","author":"S Elfwing","year":"2018","unstructured":"Elfwing, S., Uchibe, E., Doya, K.: Sigmoid-weighted linear units for neural network function approximation in reinforcement learning. Neural Netw. 107, 3\u201311 (2018)","journal-title":"Neural Netw."},{"key":"13_CR9","unstructured":"Jocher, G., Chaurasia, A., Qiu, J.: Ultralytics yolo: Software for object detection. version 8.0.0. https:\/\/github.com\/ultralytics\/ultralytics (2023), accessed July 3, 2024"},{"key":"13_CR10","first-page":"1","volume":"21","author":"X Kang","year":"2024","unstructured":"Kang, X., Yin, H., Duan, P.: Global-local feature fusion network for visible-infrared vehicle detection. IEEE Geosci. Remote Sens. Lett. 21, 1\u20135 (2024)","journal-title":"IEEE Geosci. Remote Sens. Lett."},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Lee, M.: Gelu activation function in deep learning: a comprehensive mathematical analysis and performance. arXiv preprint arXiv:2305.12073 (2023)","DOI":"10.1155\/2023\/4229924"},{"key":"13_CR12","doi-asserted-by":"crossref","unstructured":"Li, J., Liang, X., Wei, Y., Xu, T., Feng, J., Yan, S.: Perceptual generative adversarial networks for small object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 1222\u20131230 (2017)","DOI":"10.1109\/CVPR.2017.211"},{"key":"13_CR13","doi-asserted-by":"crossref","unstructured":"Liu, H., Ye, Y., Zhang, J., Yang, C., Zhao, Y.: Comparative analysis of pixel level fusion algorithms in high resolution sar and optical image fusion. In: IGARSS. pp. 2829\u20132832. IEEE (2022)","DOI":"10.1109\/IGARSS46834.2022.9883331"},{"key":"13_CR14","unstructured":"Misra, D.: Mish: A self regularized non-monotonic activation function. arXiv preprint arXiv:1908.08681 (2019)"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Noh, J., Bae, W., Lee, W., Seo, J., Kim, G.: Better to follow, follow to be better: Towards precise supervision of feature super-resolution for small object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision. pp. 9725\u20139734 (2019)","DOI":"10.1109\/ICCV.2019.00982"},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"Rasamoelina, A.D., Adjailia, F., Sin\u010d\u00e1k, P.: A review of activation function for artificial neural network. In: 2020 IEEE 18th World Symposium on Applied Machine Intelligence and Informatics (SAMI). pp. 281\u2013286. IEEE (2020)","DOI":"10.1109\/SAMI48414.2020.9108717"},{"key":"13_CR17","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1016\/j.jvcir.2015.11.002","volume":"34","author":"S Razakarivony","year":"2016","unstructured":"Razakarivony, S., Jurie, F.: Vehicle detection in aerial imagery: A small target detection benchmark. J. Vis. Commun. Image Represent. 34, 187\u2013203 (2016)","journal-title":"J. Vis. Commun. Image Represent."},{"key":"13_CR18","unstructured":"Redmon, J., Farhadi, A.: Yolov3: An incremental improvement. arXiv preprint arXiv:1804.02767 (2018)"},{"key":"13_CR19","unstructured":"Ultralytics: Yolov5 homepage. https:\/\/github.com\/ultralytics\/yolov5 (2021), accessed July 3, 2024"},{"key":"13_CR20","unstructured":"Valmiki, G.C., Tirupathi, A.S.: Performance analysis between combinations of optimization algorithms and activation functions used in multi-layer perceptron neural networks (2020)"},{"key":"13_CR21","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Advances in neural information processing systems 30 (2017)"},{"key":"13_CR22","doi-asserted-by":"crossref","unstructured":"V\u00f6gtli, M., Sierro, L., Kneub\u00fchler, M., Schreiner, S., Gross, W., Queck, F., Kuester, J., Mispelhorn, J., Middelmann, W.: Hyperthun\u201922: A multi-sensor multi-temporal camouflage detection campaign. In: IGARSS. pp. 2153\u20132156. IEEE (2023)","DOI":"10.1109\/IGARSS52108.2023.10282104"},{"key":"13_CR23","doi-asserted-by":"crossref","unstructured":"Wang, C.Y., Bochkovskiy, A., Liao, H.Y.M.: Yolov7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 7464\u20137475 (2023)","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"13_CR24","doi-asserted-by":"crossref","unstructured":"Wang, C.Y., Liao, H.Y.M., Wu, Y.H., Chen, P.Y., Hsieh, J.W., Yeh, I.H.: Cspnet: A new backbone that can enhance learning capability of cnn. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops. pp. 390\u2013391 (2020)","DOI":"10.1109\/CVPRW50498.2020.00203"},{"key":"13_CR25","doi-asserted-by":"crossref","unstructured":"Xiao, F., Tong, L., Wen, J., Wang, Y.: Decision-level fusion for road network extraction from sar and optical remote sensing images. In: IGARSS. pp. 7427\u20137430. IEEE (2023)","DOI":"10.1109\/IGARSS52108.2023.10282594"},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"Xin, D., Xu, L., Chen, H., Yang, X., Zhang, R.: A vehicle target detection method based on feature level fusion of infrared and visible light image. In: 2022 34th Chinese Control and Decision Conference (CCDC). pp. 469\u2013474. IEEE (2022)","DOI":"10.1109\/CCDC55256.2022.10033899"},{"key":"13_CR27","doi-asserted-by":"crossref","unstructured":"Yu, B., Chen, W., Wang, W.: Research on industrial non-destructive testing technology based on improved yolov5s. In: ICTech. pp. 435\u2013440. IEEE (2023)","DOI":"10.1109\/ICTech58362.2023.00088"},{"key":"13_CR28","first-page":"1","volume":"61","author":"J Zhang","year":"2023","unstructured":"Zhang, J., Lei, J., Xie, W., Fang, Z., Li, Y., Du, Q.: Superyolo: Super resolution assisted object detection in multimodal remote sensing imagery. IEEE Trans. Geosci. Remote Sens. 61, 1\u201315 (2023)","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"issue":"10","key":"13_CR29","doi-asserted-by":"publisher","first-page":"3075","DOI":"10.3390\/s24103075","volume":"24","author":"Q Zhang","year":"2024","unstructured":"Zhang, Q., Zhou, L., An, J.: Real-time recognition algorithm of small target for uav infrared detection. Sensors 24(10), 3075 (2024)","journal-title":"Sensors"},{"key":"13_CR30","doi-asserted-by":"crossref","unstructured":"Zhang, T., Xie, G., Xie, X., Li, L., Zhang, H., Liang, Y.: An efficient bidirectional weighted global feature extraction algorithm for steel surface defects. In: 2023 China Automation Congress (CAC). pp. 6370\u20136375. IEEE (2023)","DOI":"10.1109\/CAC59555.2023.10451161"},{"key":"13_CR31","first-page":"1","volume":"20","author":"J Zhou","year":"2022","unstructured":"Zhou, J., Zhang, R., Zhao, W., Shen, S., Wang, N.: Aps-net: An adaptive point set network for optical remote-sensing object detection. IEEE Geosci. Remote Sens. Lett. 20, 1\u20135 (2022)","journal-title":"IEEE Geosci. Remote Sens. Lett."}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-0960-4_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T08:32:23Z","timestamp":1733560343000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-0960-4_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,8]]},"ISBN":["9789819609598","9789819609604"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-0960-4_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,8]]},"assertion":[{"value":"8 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hanoi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}