{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T19:24:42Z","timestamp":1775503482876,"version":"3.50.1"},"reference-count":38,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U25A20403"],"award-info":[{"award-number":["U25A20403"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2024YFE0111800"],"award-info":[{"award-number":["2024YFE0111800"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neural Networks"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.neunet.2026.108544","type":"journal-article","created":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T00:23:34Z","timestamp":1768004614000},"page":"108544","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["MambaFPN: A SSM-based feature pyramid network for object detection"],"prefix":"10.1016","volume":"198","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-1917-9711","authenticated-orcid":false,"given":"Le","family":"Liang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9245-3944","authenticated-orcid":false,"given":"Cheng","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0542-2280","authenticated-orcid":false,"given":"Lefei","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.neunet.2026.108544_bib0001","unstructured":"Dao, T., & Gu, A. (2024). Transformers are SSMs: Generalized models and efficient algorithms through structured state space duality. arXiv preprint arXiv: 2405.21060."},{"key":"10.1016\/j.neunet.2026.108544_bib0002","series-title":"2024 IEEE International conference on multimedia and expo (ICME)","first-page":"1","article-title":"Discriminative pedestrian features and gated channel attention for clothes-changing person re-identification","author":"Ding","year":"2024"},{"key":"10.1016\/j.neunet.2026.108544_bib0003","series-title":"International conference on learning representations","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2021"},{"key":"10.1016\/j.neunet.2026.108544_bib0004","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"7036","article-title":"NAS-FPN: Learning scalable feature pyramid architecture for object detection","author":"Ghiasi","year":"2019"},{"key":"10.1016\/j.neunet.2026.108544_bib0005","unstructured":"Gu, A., & Dao, T. (2023). Mamba: Linear-time sequence modeling with selective state spaces. arXiv preprint arXiv: 2312.00752."},{"key":"10.1016\/j.neunet.2026.108544_bib0006","unstructured":"Gu, A., Goel, K., & R\u00e9, C. (2021). Efficiently modeling long sequences with structured state spaces. arXiv preprint arXiv: 2111.00396."},{"key":"10.1016\/j.neunet.2026.108544_bib0007","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"12595","article-title":"AugFPN: Improving multi-scale feature learning for object detection","author":"Guo","year":"2020"},{"key":"10.1016\/j.neunet.2026.108544_bib0008","series-title":"2024 IEEE International conference on multimedia and expo (ICME)","first-page":"1","article-title":"DrivingGen: Efficient safety-critical driving video generation with latent diffusion models","author":"Guo","year":"2024"},{"key":"10.1016\/j.neunet.2026.108544_bib0009","series-title":"2019 IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"5351","article-title":"LVIS: A dataset for large vocabulary instance segmentation","author":"Gupta","year":"2019"},{"key":"10.1016\/j.neunet.2026.108544_bib0010","series-title":"Proceedings of the IEEE International conference on computer vision","first-page":"2961","article-title":"Mask R-CNN","author":"He","year":"2017"},{"key":"10.1016\/j.neunet.2026.108544_bib0011","series-title":"2016 IEEE conference on computer vision and pattern recognition (CVPR)","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2015"},{"key":"10.1016\/j.neunet.2026.108544_bib0012","doi-asserted-by":"crossref","unstructured":"Huang, T., Pei, X., You, S., Wang, F., Qian, C., & Xu, C. (2024). LocalMamba: Visual state space model with windowed selective scan. arXiv preprint arXiv: 2403.09338.","DOI":"10.1007\/978-3-031-91979-4_2"},{"key":"10.1016\/j.neunet.2026.108544_bib0013","series-title":"2024 IEEE International conference on multimedia and expo (ICME)","first-page":"1","article-title":"DSENet: An object-wise density-informed coarse-to-fine object detector for aerial image","author":"Jiang","year":"2024"},{"key":"10.1016\/j.neunet.2026.108544_bib0014","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","first-page":"3041","article-title":"Mask DINO: Towards a unified transformer-based framework for object detection and segmentation","author":"Li","year":"2023"},{"key":"10.1016\/j.neunet.2026.108544_bib0015","unstructured":"Liao, B., Chen, S., Wang, X., Cheng, T., Zhang, Q., Liu, W., & Huang, C. (2022). Maptr: Structured modeling and learning for online vectorized HD map construction. arXiv preprint arXiv: 2208.14437."},{"key":"10.1016\/j.neunet.2026.108544_bib0016","unstructured":"Liao, B., Chen, S., Yin, H., Jiang, B., Wang, C., Yan, S., Zhang, X., Li, X., Zhang, Y., Zhang, Q. et al. (2024a). DiffusionDrive: Truncated diffusion model for end-to-end autonomous driving. arXiv preprint arXiv: 2411.15139."},{"key":"10.1016\/j.neunet.2026.108544_bib0017","unstructured":"Liao, B., Wang, X., Zhu, L., Zhang, Q., & Huang, C. (2024b). VIG: Linear-complexity visual sequence learning with gated linear attention. arXiv preprint arXiv: 2405.18425."},{"key":"10.1016\/j.neunet.2026.108544_bib0018","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"2117","article-title":"Feature pyramid networks for object detection","author":"Lin","year":"2017"},{"key":"10.1016\/j.neunet.2026.108544_bib0019","series-title":"European conference on computer vision","article-title":"Microsoft COCO: Common objects in context","author":"Lin","year":"2014"},{"key":"10.1016\/j.neunet.2026.108544_bib0020","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"8759","article-title":"Path aggregation network for instance segmentation","author":"Liu","year":"2018"},{"key":"10.1016\/j.neunet.2026.108544_bib0021","series-title":"Computer vision\u2013ECCV 2016: 14th European conference, Amsterdam, The Netherlands, October 11\u201314, 2016, proceedings, part i 14","first-page":"21","article-title":"SSD: Single shot multibox detector","author":"Liu","year":"2016"},{"key":"10.1016\/j.neunet.2026.108544_bib0022","unstructured":"Liu, Y., Tian, Y., Zhao, Y., Yu, H., Xie, L., Wang, Y., Ye, Q., & Liu, Y. (2024). Vmamba: Visual state space model. arXiv preprint arXiv: 2401.10166."},{"key":"10.1016\/j.neunet.2026.108544_bib0023","series-title":"Proceedings of the IEEE\/CVF International conference on computer vision","first-page":"10012","article-title":"Swin transformer: Hierarchical vision transformer using shifted windows","author":"Liu","year":"2021"},{"issue":"6","key":"10.1016\/j.neunet.2026.108544_bib0024","doi-asserted-by":"crossref","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","article-title":"Faster r-CNN: Towards real-time object detection with region proposal networks","volume":"39","author":"Ren","year":"2016","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"1","key":"10.1016\/j.neunet.2026.108544_bib0025","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1007\/s44267-024-00064-9","article-title":"Vitgaze: Gaze following with interaction features in vision transformers","volume":"2","author":"Song","year":"2024","journal-title":"Visual Intelligence"},{"key":"10.1016\/j.neunet.2026.108544_bib0026","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"14454","article-title":"Sparse R-CNN: End-to-end object detection with learnable proposals","author":"Sun","year":"2021"},{"key":"10.1016\/j.neunet.2026.108544_bib0027","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"10781","article-title":"EfficientDet: Scalable and efficient object detection","author":"Tan","year":"2020"},{"issue":"4","key":"10.1016\/j.neunet.2026.108544_bib0028","first-page":"1922","article-title":"FCOS: A simple and strong anchor-free object detector","volume":"44","author":"Tian","year":"2020","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.1016\/j.neunet.2026.108544_bib0029","series-title":"Advances in neural information processing systems","article-title":"Attention is all you need","author":"Vaswani","year":"2017"},{"issue":"7","key":"10.1016\/j.neunet.2026.108544_bib0030","doi-asserted-by":"crossref","first-page":"5350","DOI":"10.1109\/TCSVT.2023.3344713","article-title":"Efficient task-specific feature re-fusion for more accurate object detection and instance segmentation","volume":"34","author":"Wang","year":"2024","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.108544_bib0031","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110570","article-title":"OpenInst: A simple query-based method for open-world instance segmentation","volume":"153","author":"Wang","year":"2024","journal-title":"Pattern Recognition"},{"key":"10.1016\/j.neunet.2026.108544_bib0032","series-title":"Proceedings of the European conference on computer vision (ECCV)","first-page":"365","article-title":"Mancs: A multi-task attentional network with curriculum sampling for person re-identification","author":"Wang","year":"2018"},{"key":"10.1016\/j.neunet.2026.108544_bib0033","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"13359","article-title":"Scale-equalizing pyramid convolution for object detection","author":"Wang","year":"2020"},{"key":"10.1016\/j.neunet.2026.108544_bib0034","unstructured":"Wu, Y., Kirillov, A., Massa, F., Lo, W.-Y., & Girshick, R. (2019). Detectron2. https:\/\/github.com\/facebookresearch\/detectron2."},{"key":"10.1016\/j.neunet.2026.108544_bib0035","series-title":"2024 IEEE International conference on multimedia and expo (ICME)","first-page":"1","article-title":"HCF-Net: Hierarchical context fusion network for infrared small object detection","author":"Xu","year":"2024"},{"key":"10.1016\/j.neunet.2026.108544_bib0036","unstructured":"Yang, S., Wang, B., Shen, Y., Panda, R., & Kim, Y. (2023). Gated linear attention transformers with hardware-efficient training. arXiv preprint arXiv: 2312.06635."},{"key":"10.1016\/j.neunet.2026.108544_bib0037","series-title":"Computer vision\u2013ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, 2020, proceedings, part XXVIII 16","first-page":"323","article-title":"Feature pyramid transformer","author":"Zhang","year":"2020"},{"key":"10.1016\/j.neunet.2026.108544_bib0038","unstructured":"Zhu, L., Liao, B., Zhang, Q., Wang, X., Liu, W., & Wang, X. (2024). Vision mamba: Efficient visual representation learning with bidirectional state space model. arXiv preprint arXiv: 2401.09417."}],"container-title":["Neural Networks"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026000079?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026000079?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T18:39:13Z","timestamp":1775500753000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0893608026000079"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":38,"alternative-id":["S0893608026000079"],"URL":"https:\/\/doi.org\/10.1016\/j.neunet.2026.108544","relation":{},"ISSN":["0893-6080"],"issn-type":[{"value":"0893-6080","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"MambaFPN: A SSM-based feature pyramid network for object detection","name":"articletitle","label":"Article Title"},{"value":"Neural Networks","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neunet.2026.108544","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"108544"}}