{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T10:09:27Z","timestamp":1764583767387,"version":"3.46.0"},"reference-count":90,"publisher":"Springer Science and Business Media LLC","issue":"23-24","license":[{"start":{"date-parts":[[2025,10,23]],"date-time":"2025-10-23T00:00:00Z","timestamp":1761177600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,23]],"date-time":"2025-10-23T00:00:00Z","timestamp":1761177600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Soft Comput"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s00500-025-10893-5","type":"journal-article","created":{"date-parts":[[2025,10,23]],"date-time":"2025-10-23T10:55:57Z","timestamp":1761216957000},"page":"6207-6219","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Hierarchical instance distillation to enhance lightweight object detectors"],"prefix":"10.1007","volume":"29","author":[{"given":"Zhixian","family":"Liu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4771-4280","authenticated-orcid":false,"given":"Yi","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,23]]},"reference":[{"key":"10893_CR1","doi-asserted-by":"crossref","unstructured":"Bai R, Yuan S, Li K, Guo H, Yau W-Y, Xie L (2025) Realm: Real-time line-of-sight maintenance in multi-robot navigation with unknown obstacles. In: Proceedings of the IEEE International Conference on Robotics and Automation (ICRA), Atlanta, USA","DOI":"10.1109\/ICRA55743.2025.11128211"},{"key":"10893_CR2","doi-asserted-by":"crossref","unstructured":"Cai Z, Vasconcelos N (2018) Cascade r-cnn: Delving into high quality object detection. In: CVPR, pp 6154\u20136162","DOI":"10.1109\/CVPR.2018.00644"},{"key":"10893_CR3","doi-asserted-by":"crossref","unstructured":"Cao M, Nguyen TM, Yuan S, Anastasiou A, Zacharia A, Papaioannou S, Kolios P, Panayiotou CG, Polycarpou MM, Xu X et al (2025) Cooperative aerial robot inspection challenge: A benchmark for heterogeneous multi-uav planning and lessons learned. arXiv preprint arXiv:2501.06566","DOI":"10.1109\/MRA.2025.3584341"},{"key":"10893_CR4","doi-asserted-by":"crossref","unstructured":"Cao H, Xu Y, Yang J, Yin P, Ji X, Yuan S, Xie L (2024) Reliable spatial-temporal voxels for multi-modal test-time adaptation. In: Proceedings of the European Conference on Computer Vision (ECCV), Milan, Italy, Springer,\u00a0pp 232\u2013249","DOI":"10.1007\/978-3-031-73390-1_14"},{"key":"10893_CR5","doi-asserted-by":"crossref","unstructured":"Cao H, Xu Y, Yang J, Yin P, Yuan S, Xie L (2023) Multi-modal continual test-time adaptation for 3d semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), Paris, France, pp 18809\u201318819","DOI":"10.1109\/ICCV51070.2023.01724"},{"key":"10893_CR6","doi-asserted-by":"crossref","unstructured":"Cao H, Xu Y, Yang J, Yin P, Yuan S, Xie L (2024) Mopa: Multi-modal prior aided domain adaptation for 3d semantic segmentation. In: Proceedings of the 2024 IEEE International Conference on Robotics and Automation (ICRA), Yokohama, Japan, IEEE,\u00a0pp 9463\u20139470","DOI":"10.1109\/ICRA57147.2024.10610316"},{"key":"10893_CR7","unstructured":"Cao W, Zhang Y, Gao J, Cheng A, Cheng K, Cheng J (2022) Pkd: General distillation framework for object detectors via pearson correlation coefficient. In: NeurIPS"},{"key":"10893_CR8","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: ECCV, Springer,\u00a0pp 213\u2013229","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"10893_CR9","unstructured":"Chen G, Choi W, Yu X, Han T, Chandraker M (2017) Learning efficient object detection models with knowledge distillation. In: Advances in Neural Information Processing Systems 30, pp 742\u2013751"},{"issue":"5","key":"10893_CR10","doi-asserted-by":"publisher","first-page":"4774","DOI":"10.1109\/LRA.2024.3384757","volume":"9","author":"S Chen","year":"2024","unstructured":"Chen S, Liu K, Wang C, Yuan S, Yang J, Xie L (2024) Salient sparse visual odometry with pose-only supervision. IEEE Robot Autom Lett 9(5):4774\u20134781","journal-title":"IEEE Robot Autom Lett"},{"key":"10893_CR11","unstructured":"Chen K, Yang L, Chen Y, Chen K, Xu Y, Li L (2022) Gp-nas-ensemble: A model for the nas performance prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), New Orleans, LA, USA"},{"key":"10893_CR12","doi-asserted-by":"crossref","unstructured":"Cho JH, Hariharan B (2019) On the efficacy of knowledge distillation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 4794\u20134802","DOI":"10.1109\/ICCV.2019.00489"},{"key":"10893_CR13","doi-asserted-by":"crossref","unstructured":"Dai X, Jiang Z, Wu Z, Bao Y, Wang Z, Liu S, Zhou E (2021) General instance distillation for object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 7842\u20137851","DOI":"10.1109\/CVPR46437.2021.00775"},{"key":"10893_CR14","doi-asserted-by":"crossref","unstructured":"Deng T, Wang N, Wang C, Yuan S, Wang J, Wang D, Chen W (2024) Incremental joint learning of depth, pose and implicit scene representation on monocular camera in large-scale scenes. In: arXiv Preprint arXiv:2404.06050","DOI":"10.1109\/TASE.2025.3617654"},{"issue":"1","key":"10893_CR15","doi-asserted-by":"publisher","first-page":"625","DOI":"10.1109\/JIOT.2022.3203559","volume":"10","author":"L Deng","year":"2022","unstructured":"Deng L, Yang J, Yuan S, Zou H, Lu CX, Xie L (2022) Gaitfi: robust device-free human identification via wifi and vision multimodal learning. IEEE Internet Things J 10(1):625\u2013636","journal-title":"IEEE Internet Things J"},{"key":"10893_CR16","doi-asserted-by":"crossref","unstructured":"Dong P, Li L, Wei Z (2023) Diswot: Student architecture search for distillation without training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 11898\u201311908","DOI":"10.1109\/CVPR52729.2023.01145"},{"key":"10893_CR17","doi-asserted-by":"crossref","unstructured":"Dong P, Li L, Wei Z, Niu X, Tian Z, Pan H (2023) Emq: Evolving training-free proxies for automated mixed precision quantization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), Paris, France, pp 17076\u201317086","DOI":"10.1109\/ICCV51070.2023.01566"},{"key":"10893_CR18","unstructured":"Dong P, Li L, Zhong Y, Du D, Fan R, Chen Y, Tang Z, Wang Q, Xue W, Guo Y (2025) Stbllm: Breaking the 1-bit barrier with structured binary llms. In: Proceedings of the International Conference on Learning Representations (ICLR), Vienna, Austria"},{"key":"10893_CR19","doi-asserted-by":"crossref","unstructured":"Duan K, Bai S, Xie L, Qi H, Huang Q, Tian Q (2019) Centernet: Keypoint triplets for object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp 6569\u20136578","DOI":"10.1109\/ICCV.2019.00667"},{"key":"10893_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2021.107424","volume":"106","author":"MA Esfahani","year":"2021","unstructured":"Esfahani MA, Wang H, Bashari B, Wu K, Yuan S (2021) Learning to extract robust handcrafted features with a single observation via evolutionary neurogenesis. Appl Soft Comput 106:107424","journal-title":"Appl Soft Comput"},{"key":"10893_CR21","doi-asserted-by":"crossref","unstructured":"Esfahani MA, Wang H, Wu K, Yuan S (2020) Unsupervised scene categorization, path segmentation and landmark extraction while traveling path. In: 2020 16th International Conference on Control, Automation, Robotics and Vision (ICARCV), Shenzhen, China, IEEE,\u00a0pp 190\u2013195","DOI":"10.1109\/ICARCV50220.2020.9305437"},{"key":"10893_CR22","unstructured":"Geng L, Haozhi C, Mingyang L, Shenghai Y, Jianfei Y (2025) Gera: Geometric embedding for efficient point registration analysis. In: 2025 IEEE International Conference on Robotics and Automation (ICRA), Atlanta, USA"},{"key":"10893_CR23","unstructured":"Gu H, Li W, Li L, Qiyuan Z, Lee M, Sun S, Xue W, Guo Y (2025) Delta decompression for moe-based llms compression. arXiv preprint arXiv:2502.17298"},{"key":"10893_CR24","doi-asserted-by":"crossref","unstructured":"Guo J, Han K, Wang Y, Wu H, Chen X, Xu C, Xu C (2021) Distilling object detectors via decoupled features. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 2154\u20132164","DOI":"10.1109\/CVPR46437.2021.00219"},{"key":"10893_CR25","unstructured":"Hendrycks D, Gimpel K (2017) Bridging nonlinearities and stochastic regularizers with gaussian error linear units. In: Proceedings of the International Conference on Learning Representations (ICLR)"},{"key":"10893_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.108025","volume":"118","author":"Y Hu","year":"2021","unstructured":"Hu Y, Wang X, Li L, Gu Q (2021) Improving one-shot NAS with shrinking-and-expanding supernet. Pattern Recogn 118:108025","journal-title":"Pattern Recogn"},{"issue":"12","key":"10893_CR27","doi-asserted-by":"publisher","first-page":"10922","DOI":"10.1109\/LRA.2024.3479699","volume":"9","author":"X Ji","year":"2024","unstructured":"Ji X, Yuan S, Li J, Yin P, Cao H, Xie L (2024) Sgba: semantic gaussian mixture model-based lidar bundle adjustment. IEEE Robot Autom Lett 9(12):10922\u201310929","journal-title":"IEEE Robot Autom Lett"},{"key":"10893_CR28","doi-asserted-by":"crossref","unstructured":"Ji T, Yuan S, Xie L (2012) Robust rgb-d slam in dynamic environments for autonomous vehicles. In: Proceedings of the 17th International Conference on Control, Automation, Robotics and Vision (ICARCV), Singapore, IEEE,\u00a0pp 665\u2013671","DOI":"10.1109\/ICARCV57592.2022.10004324"},{"issue":"1","key":"10893_CR29","doi-asserted-by":"publisher","first-page":"812","DOI":"10.1109\/LRA.2024.3511397","volume":"10","author":"T Jin","year":"2025","unstructured":"Jin T, Xu X, Yang Y, Yuan S, Nguyen T-M, Li J, Xie L (2025) Robust loop closure by textual cues in challenging environments. IEEE Robot Autom Lett 10(1):812\u2013819","journal-title":"IEEE Robot Autom Lett"},{"key":"10893_CR30","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3002345","author":"T Kong","year":"2020","unstructured":"Kong T, Sun F, Liu H, Jiang Y, Li L, Shi J (2020) Foveabox: beyond anchor-based object detection. IEEE Trans Image Process. https:\/\/doi.org\/10.1109\/TIP.2020.3002345","journal-title":"IEEE Trans Image Process"},{"key":"10893_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2024.126360","volume":"268","author":"Y Lai","year":"2025","unstructured":"Lai Y, Yuan S, Nassar Y, Fan M, Weber T, R\u00e4tsch M (2025) Nvp-hri: zero shot natural voice and posture-based human-robot interaction via large language model. Expert Syst Appl 268:126360","journal-title":"Expert Syst Appl"},{"key":"10893_CR32","doi-asserted-by":"crossref","unstructured":"Law H, Deng J (2018) Cornernet: Detecting objects as paired keypoints. In: Proceedings of the European Conference on Computer Vision (ECCV), pp 734\u2013750","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"10893_CR33","doi-asserted-by":"crossref","unstructured":"Lee J, Xiao L, Schoenholz S, Bahri Y, Novak R, Sohl-Dickstein J, Pennington J\u00a0(2019) Wide neural networks of any depth evolve as linear models under gradient descent. NeurIPS\u00a0","DOI":"10.1088\/1742-5468\/abc62b"},{"key":"10893_CR34","doi-asserted-by":"crossref","unstructured":"Lei A, Deng T, Wang H, Yang J, Yuan S (2025) Audio array-based 3d uav trajectory estimation with lidar pseudo-labeling. In: Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Hyderabad, India. IEEE","DOI":"10.1109\/ICASSP49660.2025.10887734"},{"key":"10893_CR35","doi-asserted-by":"crossref","unstructured":"Li L (2022) Self-regulated feature learning via teacher-free feature distillation. In: Proceedings of the European Conference on Computer Vision (ECCV), Tel Aviv, Israel, pp 512\u2013528","DOI":"10.1007\/978-3-031-19809-0_20"},{"key":"10893_CR36","unstructured":"Li L, Bao Y, Dong P, Yang C, Li A, Luo W, Liu Q, Xue W, Guo Y (2024) Detkds: Knowledge distillation search for object detectors. In: Proceedings of the 41st International Conference on Machine Learning (ICML), Vienna, Austria, pp 5123\u20135135"},{"key":"10893_CR37","first-page":"13253","volume":"37","author":"L Li","year":"2024","unstructured":"Li L, Dong P, Li A, Wei Z, Yang Y (2024) Kd-zero: evolving knowledge distiller for any teacher-student pairs. Adv Neural Inf Process Syst (NeurIPS) 37:13253\u201313265","journal-title":"Adv Neural Inf Process Syst (NeurIPS)"},{"key":"10893_CR38","doi-asserted-by":"crossref","unstructured":"Li L, Dong P, Wei Z, Yang Y (2023) Automated knowledge distillation via monte carlo tree search. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), Paris, France, pp 14253\u201314265","DOI":"10.1109\/ICCV51070.2023.01597"},{"key":"10893_CR39","doi-asserted-by":"crossref","unstructured":"Li J, Leng Q, Liu J, Xu X, Jin T, Cao M, Nguyen T-M, Yuan S, Cao K, Xie L (2025) Helmetposer: A helmet-mounted imu dataset for data-driven estimation of human head motion in diverse conditions. In: Proceedings of the IEEE International Conference on Robotics and Automation (ICRA), Atlanta, USA","DOI":"10.1109\/ICRA55743.2025.11128564"},{"key":"10893_CR40","unstructured":"Li W, Li L, Lee M, Sun S (2024) Als: Adaptive layer sparsity for large language models via activation correlation assessment. In: Advances in Neural Information Processing Systems (NeurIPS), New Orleans, LA, USA\u00a0"},{"key":"10893_CR41","unstructured":"Li W, Li L, Huang Y-L, Lee MG, Sun S, Xue W, Guo Y (2025) Structured mixture-of-experts LLMs compression via singular value decomposition"},{"key":"10893_CR42","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i2.20018","author":"G Li","year":"2022","unstructured":"Li G, Li X, Wang Y, Zhang S, Wu Y, Liang D (2022) Knowledge distillation for object detection via rank mimicking and prediction-guided feature imitation. Proc AAAI Conf Artif Intell. https:\/\/doi.org\/10.1609\/aaai.v36i2.20018","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"10893_CR43","unstructured":"Li L, Jin Z (2022) Shadow knowledge distillation: Bridging offline and online knowledge transfer. In: Advances in Neural Information Processing Systems (NeurIPS), New Orleans, LA, USA, pp 13253\u201313265"},{"key":"10893_CR44","doi-asserted-by":"crossref","unstructured":"Li Q, Jin S, Yan J (2017) Mimicking very efficient network for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","DOI":"10.1109\/CVPR.2017.776"},{"key":"10893_CR45","unstructured":"Li L, Peijie Tang Z, Liu X, Wang Q, Luo W, Xue W, Liu Q, Chu X, Guo Y (2024) Discovering sparsity allocation for layer-wise pruning of large language models. In: Advances in Neural Information Processing Systems (NeurIPS), New Orleans, LA, USA"},{"key":"10893_CR46","doi-asserted-by":"crossref","unstructured":"Li L, Sun H, Li S, Dong P, Luo W, Xue W, Liu Q, Guo Y (2024) Auto-gas: Automated proxy discovery for training-free generative architecture search. In: Proceedings of the European Conference on Computer Vision (ECCV), Milan, Italy, pp 512\u2013528","DOI":"10.1007\/978-3-031-72652-1_3"},{"key":"10893_CR47","doi-asserted-by":"crossref","unstructured":"Li L, Wei Z, Dong P, Luo W, Xue W, Liu Q, Guo Y (2024) Attnzero: Efficient attention discovery for vision transformers. In: Proceedings of the European Conference on Computer Vision (ECCV), Milan, Italy, pp 512\u2013528","DOI":"10.1007\/978-3-031-72652-1_2"},{"key":"10893_CR48","doi-asserted-by":"crossref","unstructured":"Li Q, Yuan S (2024) Jacquard v2: Refining datasets using the human in the loop data correction method. In: Proceedings of the 2024 IEEE International Conference on Robotics and Automation (ICRA), Yokohama, Japan, pp 7932\u20137938","DOI":"10.1109\/ICRA57147.2024.10611652"},{"key":"10893_CR49","doi-asserted-by":"crossref","unstructured":"Liang H, Yang Y, Hu J, Yang J, Liu F, Yuan S (2025) Unsupervised uav 3d trajectories estimation with sparse point clouds. In: Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Hyderabad, India. IEEE","DOI":"10.1109\/ICASSP49660.2025.10890359"},{"key":"10893_CR50","first-page":"1","volume":"61","author":"Y Liao","year":"2023","unstructured":"Liao Y, Li J, Kang S, Li Q, Zhu G, Yuan S, Dong Z, Yang B (2023) Se-calib: Semantic edge-based lidar-camera boresight online calibration in urban scenes. IEEE Trans Geosci Remote Sens 61:1\u201313","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"10893_CR51","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu C-Y, Berg AC (2016) Ssd: Single shot multibox detector. In: ECCV, Springer,\u00a0pp 21\u201337\u00a0","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"10893_CR52","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Goyal P, Girshick R, He K, Dollar P (2017) Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV)","DOI":"10.1109\/ICCV.2017.324"},{"key":"10893_CR53","unstructured":"Liu R, Xu X, Yuan S, Xie L (2025) Handle object navigation as weighted traveling repairman problem. arXiv preprint arXiv:2503.06937"},{"issue":"1","key":"10893_CR54","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1109\/TCST.2024.3469032","volume":"33","author":"F Liu","year":"2025","unstructured":"Liu F, Yuan S, Cao K, Meng W, Xie L (2025) Distance-based multiple noncooperative ground target encirclement for complex environments. IEEE Trans Control Syst Technol 33(1):261\u2013273","journal-title":"IEEE Trans Control Syst Technol"},{"key":"10893_CR55","doi-asserted-by":"crossref","unstructured":"Lou B, Yuan S, Yang J, Su W, Zhang Y, Hu E (2025) Qlio: Quantized lidar-inertial odometry. arXiv preprint arXiv:2503.07949","DOI":"10.1109\/IROS60139.2025.11247236"},{"key":"10893_CR56","unstructured":"Lyu Y, Cao M, Yuan S, Xie L (2021) Vision based autonomous uav plane estimation and following for building inspection. In: arXiv Preprint arXiv:2102.01423"},{"issue":"4","key":"10893_CR57","doi-asserted-by":"publisher","first-page":"3037","DOI":"10.1109\/TAES.2022.3142663","volume":"58","author":"Y Lyu","year":"2022","unstructured":"Lyu Y, Yuan S, Xie L (2022) Structure priors aided visual-inertial navigation in building inspection tasks with auxiliary line features. IEEE Trans Aerosp Electron Syst 58(4):3037\u20133048","journal-title":"IEEE Trans Aerosp Electron Syst"},{"key":"10893_CR58","doi-asserted-by":"crossref","unstructured":"Meng D, Chen X, Fan Z, Zeng G, Li H, Yuan Y, Sun L, Wang J (2021) Conditional detr for fast training convergence. In: ICCV","DOI":"10.1109\/ICCV48922.2021.00363"},{"issue":"04","key":"10893_CR59","first-page":"5191","volume":"34","author":"SI Mirzadeh","year":"2020","unstructured":"Mirzadeh SI, Farajtabar M, Li A, Levine N, Matsukawa A, Ghasemzadeh H (2020) Improved knowledge distillation via teacher assistant. Proc AAAI Conf Artif Intell 34(04):5191\u20135198","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"10893_CR60","doi-asserted-by":"crossref","unstructured":"Nguyen T-M, Yang Y, Nguyen T-D, Yuan S, Xie L (2025) Uloc: Learning to localize in complex large-scale environments with ultra-wideband ranges. In: IEEE International Conference on Robotics and Automation (ICRA), Atlanta, USA","DOI":"10.1109\/ICRA55743.2025.11128859"},{"key":"10893_CR61","doi-asserted-by":"crossref","unstructured":"Nguyen T-M, Yuan S, Nguyen TH, Yin P, Cao H, Xie L, Wozniak M, Jensfelt P, Thiel M, Ziegenbein J (2024) Mcd: Diverse large-scale multi-campus dataset for robot perception. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), Seattle, WA, USA, pp 22304\u201322313","DOI":"10.1109\/CVPR52733.2024.02105"},{"key":"10893_CR62","unstructured":"Qi Z, Yuan S, Liu F, Cao H, Deng T, Yang J, Xie L (2024) Air-embodied: An efficient active 3dgs-based interaction and reconstruction framework with embodied large language model. arXiv preprint arXiv:2409.16019"},{"key":"10893_CR63","doi-asserted-by":"crossref","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: Unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 779\u2013788","DOI":"10.1109\/CVPR.2016.91"},{"key":"10893_CR64","unstructured":"Ren S, He K, Girshick R, Sun J\u00a0(2015) Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems, vol 28"},{"key":"10893_CR65","doi-asserted-by":"crossref","unstructured":"Shrivastava A, Gupta A, Girshick R (2016) Training region-based object detectors with online hard example mining. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 761\u2013769","DOI":"10.1109\/CVPR.2016.89"},{"key":"10893_CR66","doi-asserted-by":"crossref","unstructured":"Tian Z, Shen C, Chen H, He T (2019) Fcos: Fully convolutional one-stage object detection. In: ICCV","DOI":"10.1109\/ICCV.2019.00972"},{"key":"10893_CR67","doi-asserted-by":"crossref","unstructured":"Wang H, Mou X, Mou W, Yuan S, Ulun S, Yang S, Shin B-S (2015) Vision based long range object detection and tracking for unmanned surface vehicle. In: 2015 IEEE 7th International Conference on Cybernetics and Intelligent Systems (CIS) and IEEE Conference on Robotics, Automation and Mechatronics (RAM), Angkor Wat, Cambodia, IEEE,\u00a0pp 101\u2013105","DOI":"10.1109\/ICCIS.2015.7274604"},{"key":"10893_CR68","doi-asserted-by":"crossref","unstructured":"Wang W, Tu Z (2020) Rethinking the value of transformer components. In: Proceedings of the 28th International Conference on Computational Linguistics","DOI":"10.18653\/v1\/2020.coling-main.529"},{"key":"10893_CR69","doi-asserted-by":"crossref","unstructured":"Wang H, Yuan S, Wu K (2017) Heterogeneous stereo: A human vision inspired method for general robotics sensing. In: TENCON 2017-2017 IEEE Region 10 Conference, Penang, Malaysia, IEEE, pp 793\u2013798","DOI":"10.1109\/TENCON.2017.8227967"},{"key":"10893_CR70","doi-asserted-by":"crossref","unstructured":"Wang T, Yuan L, Zhang X, Feng J (2019) Distilling object detectors with fine-grained feature imitation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","DOI":"10.1109\/CVPR.2019.00507"},{"issue":"4","key":"10893_CR71","first-page":"5123","volume":"38","author":"Z Wei","year":"2024","unstructured":"Wei Z, Li L, Dong P, Li A, Lu M, Pan H, Li D (2024) Auto-prox: training-free vision transformer architecture search via automatic proxy discovery. Proc AAAI Conf Artif Intell (AAAI) 38(4):5123\u20135130","journal-title":"Proc AAAI Conf Artif Intell (AAAI)"},{"key":"10893_CR72","unstructured":"Xiao Z, Yang Y, Xu, G, Zeng X, Yuan S (2024) Av-dtec: Self-supervised audio-visual fusion for drone trajectory estimation and classification. In: arXiv Preprint arXiv:2412.16928"},{"key":"10893_CR73","unstructured":"Xiaolong L, Lujun L, Chao L, Yao A (2022) Norm: Knowledge distillation via n-to-one representation matching"},{"key":"10893_CR74","doi-asserted-by":"crossref","unstructured":"Xu X, Cao M, Yuan S, Nguyen TH, Nguyen T-M, Xie L\u00a0(2024) A cost-effective cooperative exploration and inspection strategy for heterogeneous aerial system. In: Proceedings of the 2024 IEEE International Conference on Control and Automation (ICCA), Reykjavik, Iceland, IEEE,\u00a0pp 673\u2013678","DOI":"10.1109\/ICCA62789.2024.10591842"},{"key":"10893_CR75","unstructured":"Xu J, Huang G, Yu W, Zhang X, Zhao L, Li R, Yuan S, Xie L (2024) Selective kalman filter: When and how to fuse multi-sensor information to overcome degeneracy in slam. In: arXiv Preprint arXiv:2412.17235"},{"key":"10893_CR76","unstructured":"Xu K, Jiang Z, Cao H, Yuan S, Wang C, Xie L (2024) An efficient scene coordinate encoding and relocalization method. In: arXiv Preprint arXiv:2412.06488"},{"key":"10893_CR77","unstructured":"Yang J, Huang H, Zhou Y, Chen X, Xu Y, Yuan S, Zou H, Lu CX, Xie L (2024) Mm-fi: Multi-modal non-intrusive 4d human dataset for versatile wireless sensing. Advances in Neural Information Processing Systems 36"},{"key":"10893_CR78","unstructured":"Yang Z, Li Z, Jiang X, Gong Y, Yuan Z, Zhao D, Yuan C (2021) Focal and global knowledge distillation for detectors. In: CVPR, pp 4643\u20134652"},{"key":"10893_CR79","doi-asserted-by":"crossref","unstructured":"Yang Z, Li Z, Jiang X, Gong Y, Yuan Z, Zhao D, Yuan C (2022) Focal and global knowledge distillation for detectors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 4643\u20134652","DOI":"10.1109\/CVPR52688.2022.00460"},{"key":"10893_CR80","doi-asserted-by":"crossref","unstructured":"Yuan S, Wang H (2014) Autonomous object level segmentation. In: Proceedings of International Conference on Control, Automation, Robotics and Vision (ICARCV 2014), Singapore, pp 33\u201337","DOI":"10.1109\/ICARCV.2014.7064275"},{"issue":"02","key":"10893_CR81","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1142\/S230138502150014X","volume":"9","author":"S Yuan","year":"2021","unstructured":"Yuan S, Wang H, Xie L (2021) Survey on localization systems and algorithms for unmanned systems. Unmanned Syst 9(02):129\u2013163","journal-title":"Unmanned Syst"},{"key":"10893_CR82","doi-asserted-by":"crossref","unstructured":"Yang Y, Yuan S, Cao M, Yang J, Xie L (2023) Av-pedaware: Self-supervised audio-visual fusion for dynamic pedestrian awareness. In: Proceedings of the 2023 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), Detroit, MI, USA, IEEE,\u00a0pp 1871\u20131877","DOI":"10.1109\/IROS55552.2023.10342257"},{"key":"10893_CR83","doi-asserted-by":"crossref","unstructured":"Yang Y, Yuan S, Xie L (2022) Overcoming catastrophic forgetting for semantic segmentation via incremental learning. In: 2022 17th International Conference on Control, Automation, Robotics and Vision (ICARCV), Singapore, IEEE,\u00a0pp 299\u2013304","DOI":"10.1109\/ICARCV57592.2022.10004288"},{"issue":"3","key":"10893_CR84","first-page":"144","volume":"3","author":"Y Yang","year":"2024","unstructured":"Yang Y, Yuan S, Yang J, Nguyen TH, Cao M, Nguyen T-M, Wang H, Xie L (2024) Av-fdti: audio-visual fusion for drone threat identification. J Autom Intell 3(3):144\u2013151","journal-title":"J Autom Intell"},{"key":"10893_CR85","unstructured":"You H, Li C, Xu P, Fu Y, Wang Y, Chen X, Baraniuk RG, Wang Z, Lin Y (2019) Drawing early-bird tickets: Towards more efficient training of deep networks. In: ICLR"},{"key":"10893_CR86","doi-asserted-by":"crossref","unstructured":"Yuan S, Yang Y, Nguyen TH, Nguyen T-M, Yang J, Liu F, Li J, Wang H, Xie L (2024) Mmaud: A comprehensive multi-modal anti-uav dataset for modern miniature drone threats. In: Proceedings of the 2024 IEEE International Conference on Robotics and Automation (ICRA), Yokohama, Japan, pp 2745\u20132751","DOI":"10.1109\/ICRA57147.2024.10610957"},{"key":"10893_CR87","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/JIOT.2024.3459918","volume":"1","author":"C Zhao","year":"2024","unstructured":"Zhao C, Hu K, Xu J, Zhao L, Han B, Wu K, Tian M, Yuan S (2024) Adaptive-lio: enhancing robustness and precision through environmental adaptation in lidar inertial odometry. IEEE Internet Things J 1:1\u201313","journal-title":"IEEE Internet Things J"},{"key":"10893_CR88","doi-asserted-by":"crossref","unstructured":"Zheng Z, Ye R, Wang P, Ren D, Zuo W, Hou Q, Cheng M-M (2022) Localization distillation for dense object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), New Orleans, LA, USA, pp 9407\u20139416","DOI":"10.1109\/CVPR52688.2022.00919"},{"issue":"16","key":"10893_CR89","doi-asserted-by":"publisher","first-page":"14128","DOI":"10.1109\/JIOT.2023.3262940","volume":"10","author":"Y Zhou","year":"2023","unstructured":"Zhou Y, Huang H, Yuan S, Zou H, Xie L, Yang J (2023) Metafi++: wifi-enabled transformer-based human pose estimation for metaverse avatar simulation. IEEE Internet Things J 10(16):14128\u201314136","journal-title":"IEEE Internet Things J"},{"key":"10893_CR90","unstructured":"Zhu X, Su W, Lu L, Li B, Wang X, Dai J (2021) Deformable DETR: Deformable transformers for end-to-end object detection. In: Proceedings of the International Conference on Learning Representations (ICLR), Virtual Conference https:\/\/openreview.net\/forum?id=gZ9hCDWe6ke"}],"container-title":["Soft Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00500-025-10893-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00500-025-10893-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00500-025-10893-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T10:05:16Z","timestamp":1764583516000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00500-025-10893-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,23]]},"references-count":90,"journal-issue":{"issue":"23-24","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["10893"],"URL":"https:\/\/doi.org\/10.1007\/s00500-025-10893-5","relation":{},"ISSN":["1432-7643","1433-7479"],"issn-type":[{"type":"print","value":"1432-7643"},{"type":"electronic","value":"1433-7479"}],"subject":[],"published":{"date-parts":[[2025,10,23]]},"assertion":[{"value":"20 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 August 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there are no conflicts of interest regarding the publication of this paper. No financial, professional, or personal relationships influenced the research presented in this work.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}},{"value":"Ethics approval was not required for this study.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Human Participants and Animals"}}]}}