{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T16:13:19Z","timestamp":1770394399977,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":35,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819557578","type":"print"},{"value":"9789819557585","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5758-5_30","type":"book-chapter","created":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T04:59:37Z","timestamp":1770353977000},"page":"414-427","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["EVMDet: EfficientViM for\u00a0Small Object Detection"],"prefix":"10.1007","author":[{"given":"Tianxiang","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jichao","family":"Jiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ning","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuqing","family":"Peng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yingchao","family":"Zeng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiajie","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ziyi","family":"Bao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zimo","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zilong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,7]]},"reference":[{"key":"30_CR1","doi-asserted-by":"crossref","unstructured":"Cai, H., Li, J., Hu, M., Gan, C., Han, S.: EfficientVIT: multi-scale linear attention for high-resolution dense prediction. arXiv preprint arXiv:2205.14756 (2022)","DOI":"10.1109\/ICCV51070.2023.01587"},{"key":"30_CR2","unstructured":"Chen, K., et al.: MMDetection: open MMLAB detection toolbox and benchmark. arXiv preprint arXiv:1906.07155 (2019)"},{"issue":"11","key":"30_CR3","doi-asserted-by":"publisher","first-page":"13467","DOI":"10.1109\/TPAMI.2023.3290594","volume":"45","author":"G Cheng","year":"2023","unstructured":"Cheng, G., et al.: Towards large-scale small object detection: survey and benchmarks. IEEE Trans. Pattern Anal. Mach. Intell. 45(11), 13467\u201313488 (2023). https:\/\/doi.org\/10.1109\/TPAMI.2023.3290594","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"30_CR4","doi-asserted-by":"crossref","unstructured":"Chollet, F.: Xception: deep learning with depthwise separable convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1251\u20131258 (2017)","DOI":"10.1109\/CVPR.2017.195"},{"key":"30_CR5","unstructured":"Dai, D., et al.: DeepSeekMoe: towards ultimate expert specialization in mixture-of-experts language models (2024). https:\/\/arxiv.org\/abs\/2401.06066"},{"key":"30_CR6","unstructured":"Dao, T., Gu, A.: Transformers are SSMS: generalized models and efficient algorithms through structured state space duality. In: Proceedings of the 41st International Conference on Machine Learning. ICML2024, JMLR.org (2024)"},{"key":"30_CR7","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"30_CR8","doi-asserted-by":"crossref","unstructured":"d\u2019Ascoli, S., Touvron, H., Leavitt, M.L., Morcos, A.S., Biroli, G., Sagun, L.: ConVit: improving vision transformers with soft convolutional inductive biases. In: International Conference on Machine Learning, pp. 2286\u20132296. PMLR (2021)","DOI":"10.1088\/1742-5468\/ac9830"},{"key":"30_CR9","unstructured":"Fu, S., et al.: Enhancing DETRS for small object detection via multi-scale refinement and query-aided mining. In: Nguyen, V., Lin, H.T. (eds.) Proceedings of the 16th Asian Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0260, pp. 936\u2013951. PMLR (2025). https:\/\/proceedings.mlr.press\/v260\/fu25a.html"},{"key":"30_CR10","unstructured":"Gu, A., Dao, T.: MAMBA: linear-time sequence modeling with selective state spaces. arXiv preprint arXiv:2312.00752 (2023)"},{"key":"30_CR11","unstructured":"Gu, A., Goel, K., R\u00e9, C.: Efficiently modeling long sequences with structured state spaces. arXiv preprint arXiv:2111.00396 (2021)"},{"key":"30_CR12","doi-asserted-by":"crossref","unstructured":"Hatamizadeh, A., Kautz, J.: MAMBAVision: a hybrid mamba-transformer vision backbone (2025). https:\/\/arxiv.org\/abs\/2407.08083","DOI":"10.1109\/CVPR52734.2025.02352"},{"key":"30_CR13","unstructured":"Howard, A.G., et al.: MobileNets: efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)"},{"issue":"1","key":"30_CR14","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1162\/neco.1991.3.1.79","volume":"3","author":"RA Jacobs","year":"1991","unstructured":"Jacobs, R.A., Jordan, M.I., Nowlan, S.J., Hinton, G.E.: Adaptive mixtures of local experts. Neural Comput. 3(1), 79\u201387 (1991)","journal-title":"Neural Comput."},{"key":"30_CR15","doi-asserted-by":"crossref","unstructured":"Lee, S., Choi, J., Kim, H.J.: EfficientVim: efficient vision mamba with hidden state mixer based state space duality. In: Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR), pp. 14923\u201314933 (2025)","DOI":"10.1109\/CVPR52734.2025.01390"},{"issue":"2","key":"30_CR16","doi-asserted-by":"publisher","first-page":"318","DOI":"10.1109\/TPAMI.2018.2858826","volume":"42","author":"TY Lin","year":"2020","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. IEEE Trans. Pattern Anal. Mach. Intell. 42(2), 318\u2013327 (2020). https:\/\/doi.org\/10.1109\/TPAMI.2018.2858826","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"30_CR17","doi-asserted-by":"crossref","unstructured":"Liu, L., et al.: DeFMAMBA: deformable visual state space model. In: Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR), pp. 8838\u20138847 (June 2025)","DOI":"10.1109\/CVPR52734.2025.00826"},{"key":"30_CR18","first-page":"103031","volume":"37","author":"Y Liu","year":"2024","unstructured":"Liu, Y., et al.: VMamba: visual state space model. Adv. Neural. Inf. Process. Syst. 37, 103031\u2013103063 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"30_CR19","doi-asserted-by":"crossref","unstructured":"Ma, N., Zhang, X., Zheng, H.T., Sun, J.: ShuffleNet V2: practical guidelines for efficient CNN architecture design. In: Proceedings of the European conference on computer vision (ECCV), pp. 116\u2013131 (2018)","DOI":"10.1007\/978-3-030-01264-9_8"},{"key":"30_CR20","unstructured":"Mehta, S., Rastegari, M.: MobileVIT: light-weight, general-purpose, and mobile-friendly vision transformer. arXiv preprint arXiv:2110.02178 (2021)"},{"key":"30_CR21","doi-asserted-by":"crossref","unstructured":"Pei, X., Huang, T., Xu, C.: EfficientVMAMBA: Atrous selective scan for light weight visual mamba. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 39, no. 6, pp. 6443\u20136451 (Apr2025)","DOI":"10.1609\/aaai.v39i6.32690"},{"key":"30_CR22","doi-asserted-by":"crossref","unstructured":"Qiao, S., Chen, L.C., Yuille, A.: Detectors: detecting objects with recursive feature pyramid and switchable atrous convolution (2020). https:\/\/arxiv.org\/abs\/2006.02334","DOI":"10.1109\/CVPR46437.2021.01008"},{"key":"30_CR23","unstructured":"Qu, H., et al.: A survey of mamba. arXiv preprint arXiv:2408.01129 (2024)"},{"key":"30_CR24","unstructured":"Redmon, J., Farhadi, A.: YOLOv3: an incremental improvement (2018). https:\/\/arxiv.org\/abs\/1804.02767"},{"key":"30_CR25","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Cortes, C., Lawrence, N., Lee, D., Sugiyama, M., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol.\u00a028. Curran Associates, Inc. (2015). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2015\/file\/14bfa6bb14875e45bba028a21ed38046-Paper.pdf"},{"key":"30_CR26","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.C.: MobileNetV2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"30_CR27","doi-asserted-by":"publisher","unstructured":"Shi, S., Fang, Q., Xu, X., Zhao, T.: Similarity distance-based label assignment for tiny object detection. In: 2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 13711\u201313718 (2024). https:\/\/doi.org\/10.1109\/IROS58592.2024.10801448","DOI":"10.1109\/IROS58592.2024.10801448"},{"key":"30_CR28","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2022.104471","volume":"123","author":"K Tong","year":"2022","unstructured":"Tong, K., Wu, Y.: Deep learning-based detection from the perspective of small or tiny objects: a survey. Image Vis. Comput. 123, 104471 (2022)","journal-title":"Image Vis. Comput."},{"key":"30_CR29","doi-asserted-by":"crossref","unstructured":"Xing, Z., Ye, T., Yang, Y., Liu, G., Zhu, L.: SegMamba: long-range sequential modeling mamba for 3D medical image segmentation (2024). https:\/\/arxiv.org\/abs\/2401.13560","DOI":"10.1007\/978-3-031-72111-3_54"},{"key":"30_CR30","doi-asserted-by":"crossref","unstructured":"Yu, W., Wang, X.: MAMBAOUT: do we really need mamba for vision? In: Proceedings of the Computer Vision and Pattern Recognition Conference (CVPR), pp. 4484\u20134496 (2025)","DOI":"10.1109\/CVPR52734.2025.00423"},{"key":"30_CR31","doi-asserted-by":"crossref","unstructured":"Yuan, X., Cheng, G., Yan, K., Zeng, Q., Han, J.: Small object detection via coarse-to-fine proposal generation and imitation learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 6317\u20136327 (2023)","DOI":"10.1109\/ICCV51070.2023.00581"},{"key":"30_CR32","doi-asserted-by":"publisher","unstructured":"Zhang, H., et al.: A survey on visual mamba. Appl. Sci. 14(13) (2024). https:\/\/doi.org\/10.3390\/app14135683, https:\/\/www.mdpi.com\/2076-3417\/14\/13\/5683","DOI":"10.3390\/app14135683"},{"key":"30_CR33","unstructured":"Zhu, L., Liao, B., Zhang, Q., Wang, X., Liu, W., Wang, X.: Vision mamba: efficient visual representation learning with bidirectional state space model. In: Proceedings of the 41st International Conference on Machine Learning. ICML 2024, JMLR.org (2024)"},{"issue":"11","key":"30_CR34","doi-asserted-by":"publisher","first-page":"7380","DOI":"10.1109\/TPAMI.2021.3119563","volume":"44","author":"P Zhu","year":"2021","unstructured":"Zhu, P., et al.: Detection and tracking meet drones challenge. IEEE Trans. Pattern Anal. Mach. Intell. 44(11), 7380\u20137399 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"30_CR35","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159 (2020)"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5758-5_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T04:59:41Z","timestamp":1770353981000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5758-5_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819557578","9789819557585"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5758-5_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"7 February 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}