{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T17:18:14Z","timestamp":1776100694303,"version":"3.50.1"},"reference-count":50,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100017549","name":"Science and Technology Innovation 2025 Major Project of Ningbo","doi-asserted-by":"publisher","award":["2023Z044"],"award-info":[{"award-number":["2023Z044"]}],"id":[{"id":"10.13039\/501100017549","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Engineering Applications of Artificial Intelligence"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.engappai.2026.114448","type":"journal-article","created":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T08:04:56Z","timestamp":1773648296000},"page":"114448","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["A dual-stream foreground-aware enhancement network with spiralscan-Mamba for vision-based occupancy prediction in autonomous driving"],"prefix":"10.1016","volume":"173","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2765-9078","authenticated-orcid":false,"given":"Rui","family":"Zheng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nannan","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanyin","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chuiyi","family":"Deng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhuoyi","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiheng","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6957-3059","authenticated-orcid":false,"given":"Junwei","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.engappai.2026.114448_b1","series-title":"nuScenes: A multimodal dataset for autonomous driving","author":"Caesar","year":"2019"},{"key":"10.1016\/j.engappai.2026.114448_b2","doi-asserted-by":"crossref","unstructured":"Cao, A.-Q., De Charette, R., 2022. Monoscene: Monocular 3d semantic scene completion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 3991\u20134001.","DOI":"10.1109\/CVPR52688.2022.00396"},{"key":"10.1016\/j.engappai.2026.114448_b3","doi-asserted-by":"crossref","unstructured":"Chollet, F., 2017. Xception: Deep learning with depthwise separable convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 1251\u20131258.","DOI":"10.1109\/CVPR.2017.195"},{"key":"10.1016\/j.engappai.2026.114448_b4","first-page":"2609","article-title":"LOMA: Language-assisted semantic occupancy network via triplane mamba","volume":"vol. 39","author":"Cui","year":"2025"},{"key":"10.1016\/j.engappai.2026.114448_b5","series-title":"2009 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"248","article-title":"Imagenet: A large-scale hierarchical image database","author":"Deng","year":"2009"},{"key":"10.1016\/j.engappai.2026.114448_b6","series-title":"Mamba: Linear-time sequence modeling with selective state spaces","author":"Gu","year":"2023"},{"key":"10.1016\/j.engappai.2026.114448_b7","series-title":"Efficiently modeling long sequences with structured state spaces","author":"Gu","year":"2021"},{"key":"10.1016\/j.engappai.2026.114448_b8","first-page":"572","article-title":"Combining recurrent, convolutional, and continuous-time models with linear state space layers","volume":"34","author":"Gu","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.engappai.2026.114448_b9","doi-asserted-by":"crossref","unstructured":"Gu, J., Li, K., Wang, F., Wei, Y., Wu, Z., Fan, H., Wang, M., 2025. Motion matters: Motion-guided modulation network for skeleton-based micro-action recognition. In: Proceedings of the 33rd ACM International Conference on Multimedia. pp. 5461\u20135470.","DOI":"10.1145\/3746027.3754722"},{"key":"10.1016\/j.engappai.2026.114448_b10","first-page":"22982","article-title":"Diagonal state spaces are as effective as structured state spaces","volume":"35","author":"Gupta","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.engappai.2026.114448_b11","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J., 2016. Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"10.1016\/j.engappai.2026.114448_b12","series-title":"2024 IEEE International Conference on Robotics and Automation","first-page":"16425","article-title":"Fastocc: Accelerating 3d occupancy prediction by fusing the 2d bird\u2019s-eye view and perspective view","author":"Hou","year":"2024"},{"key":"10.1016\/j.engappai.2026.114448_b13","series-title":"Bevdet: High-performance multi-camera 3d object detection in bird-eye-view","author":"Huang","year":"2021"},{"key":"10.1016\/j.engappai.2026.114448_b14","doi-asserted-by":"crossref","unstructured":"Huang, Y., Thammatadatrakoon, A., Zheng, W., Zhang, Y., Du, D., Lu, J., 2025. Gaussianformer-2: Probabilistic gaussian superposition for efficient 3d occupancy prediction. In: Proceedings of the Computer Vision and Pattern Recognition Conference. pp. 27477\u201327486.","DOI":"10.1109\/CVPR52734.2025.02559"},{"key":"10.1016\/j.engappai.2026.114448_b15","doi-asserted-by":"crossref","unstructured":"Huang, Y., Zheng, W., Zhang, Y., Zhou, J., Lu, J., 2023. Tri-perspective view for vision-based 3d semantic occupancy prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 9223\u20139232.","DOI":"10.1109\/CVPR52729.2023.00890"},{"key":"10.1016\/j.engappai.2026.114448_b16","series-title":"European Conference on Computer Vision","first-page":"376","article-title":"Gaussianformer: Scene as gaussians for vision-based 3d semantic occupancy prediction","author":"Huang","year":"2024"},{"key":"10.1016\/j.engappai.2026.114448_b17","doi-asserted-by":"crossref","unstructured":"Huang, Y., Zheng, W., Zhang, B., Zhou, J., Lu, J., 2024b. Selfocc: Self-supervised vision-based 3d occupancy prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 19946\u201319956.","DOI":"10.1109\/CVPR52733.2024.01885"},{"key":"10.1016\/j.engappai.2026.114448_b18","first-page":"1486","article-title":"Bevstereo: Enhancing depth estimation in multi-view 3d object detection with temporal stereo","volume":"vol. 37","author":"Li","year":"2023"},{"key":"10.1016\/j.engappai.2026.114448_b19","series-title":"What makes convolutional models great on long sequence modeling?","author":"Li","year":"2022"},{"key":"10.1016\/j.engappai.2026.114448_b20","first-page":"1477","article-title":"Bevdepth: Acquisition of reliable depth for multi-view 3d object detection","volume":"vol. 37","author":"Li","year":"2023"},{"key":"10.1016\/j.engappai.2026.114448_b21","series-title":"European Conference on Computer Vision","first-page":"90","article-title":"Viewformer: Exploring spatiotemporal modeling for multi-view 3d occupancy perception via view-guided transformers","author":"Li","year":"2024"},{"key":"10.1016\/j.engappai.2026.114448_b22","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2025.103370","article-title":"Alternating interaction fusion of image-point cloud for multi-modal 3D object detection","volume":"65","author":"Li","year":"2025","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.engappai.2026.114448_b23","article-title":"Repetitive action counting with hybrid temporal relation modeling","author":"Li","year":"2025","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.engappai.2026.114448_b24","article-title":"Bevformer: learning bird\u2019s-eye-view representation from lidar-camera via spatiotemporal transformers","author":"Li","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"2","key":"10.1016\/j.engappai.2026.114448_b25","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1007\/s11063-024-11585-1","article-title":"Stabilization of semi-Markovian jumping uncertain complex-valued networks with time-varying delay: A sliding-mode control approach","volume":"56","author":"Li","year":"2024","journal-title":"Neural Process. Lett."},{"key":"10.1016\/j.engappai.2026.114448_b26","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2024.102955","article-title":"SparseDet: Towards efficient multi-view 3D object detection via sparse scene representation","volume":"62","author":"Li","year":"2024","journal-title":"Adv. Eng. Informatics"},{"key":"10.1016\/j.engappai.2026.114448_b27","series-title":"Fb-occ: 3d occupancy prediction based on forward-backward view transformation","author":"Li","year":"2023"},{"key":"10.1016\/j.engappai.2026.114448_b28","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P., 2017. Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision. pp. 2980\u20132988.","DOI":"10.1109\/ICCV.2017.324"},{"key":"10.1016\/j.engappai.2026.114448_b29","series-title":"Bevfusion: Multi-task multi-sensor fusion with unified bird\u2019s-eye view representation","author":"Liu","year":"2022"},{"key":"10.1016\/j.engappai.2026.114448_b30","first-page":"103031","article-title":"Vmamba: Visual state space model","volume":"37","author":"Liu","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.engappai.2026.114448_b31","series-title":"Micro-gesture online recognition using learnable query points","author":"Liu","year":"2024"},{"key":"10.1016\/j.engappai.2026.114448_b32","series-title":"U-mamba: Enhancing long-range dependency for biomedical image segmentation","author":"Ma","year":"2024"},{"key":"10.1016\/j.engappai.2026.114448_b33","doi-asserted-by":"crossref","unstructured":"Ma, Q., Tan, X., Qu, Y., Ma, L., Zhang, Z., Xie, Y., 2024. Cotr: Compact occupancy transformer for vision-based 3d occupancy prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 19936\u201319945.","DOI":"10.1109\/CVPR52733.2024.01884"},{"issue":"1","key":"10.1016\/j.engappai.2026.114448_b34","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1145\/3503250","article-title":"Nerf: Representing scenes as neural radiance fields for view synthesis","volume":"65","author":"Mildenhall","year":"2021","journal-title":"Commun. ACM"},{"key":"10.1016\/j.engappai.2026.114448_b35","series-title":"European Conference on Computer Vision","first-page":"414","article-title":"Atlas: End-to-end 3d scene reconstruction from posed images","author":"Murez","year":"2020"},{"key":"10.1016\/j.engappai.2026.114448_b36","series-title":"2024 IEEE International Conference on Robotics and Automation","first-page":"12404","article-title":"Renderocc: Vision-centric 3d occupancy prediction with 2d rendering supervision","author":"Pan","year":"2024"},{"key":"10.1016\/j.engappai.2026.114448_b37","series-title":"Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XIV 16","first-page":"194","article-title":"Lift, splat, shoot: Encoding images from arbitrary camera rigs by implicitly unprojecting to 3d","author":"Philion","year":"2020"},{"key":"10.1016\/j.engappai.2026.114448_b38","doi-asserted-by":"crossref","unstructured":"Qian, W., Li, K., Guo, D., Hu, B., Wang, M., 2024. Cluster-phys: Facial clues clustering towards efficient remote physiological measurement. In: Proceedings of the 32nd ACM International Conference on Multimedia. pp. 330\u2013339.","DOI":"10.1145\/3664647.3680670"},{"key":"10.1016\/j.engappai.2026.114448_b39","series-title":"European Conference on Computer Vision","first-page":"72","article-title":"Occupancy as set of points","author":"Shi","year":"2024"},{"key":"10.1016\/j.engappai.2026.114448_b40","first-page":"64318","article-title":"Occ3d: A large-scale 3d occupancy prediction benchmark for autonomous driving","volume":"36","author":"Tian","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.engappai.2026.114448_b41","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.engappai.2026.114448_b42","doi-asserted-by":"crossref","DOI":"10.1016\/j.cviu.2024.104222","article-title":"Leveraging vision-language prompts for real-world image restoration and enhancement","volume":"250","author":"Wei","year":"2025","journal-title":"Comput. Vis. Image Underst."},{"key":"10.1016\/j.engappai.2026.114448_b43","doi-asserted-by":"crossref","unstructured":"Wei, Y., Zhao, L., Zheng, W., Zhu, Z., Zhou, J., Lu, J., 2023. Surroundocc: Multi-camera 3d occupancy prediction for autonomous driving. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 21729\u201321740.","DOI":"10.1109\/ICCV51070.2023.01986"},{"key":"10.1016\/j.engappai.2026.114448_b44","series-title":"Deep height decoupling for precise vision-based 3d occupancy prediction","author":"Wu","year":"2024"},{"key":"10.1016\/j.engappai.2026.114448_b45","doi-asserted-by":"crossref","unstructured":"Xia, Z., Pan, X., Song, S., Li, L.E., Huang, G., 2022. Vision transformer with deformable attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 4794\u20134803.","DOI":"10.1109\/CVPR52688.2022.00475"},{"key":"10.1016\/j.engappai.2026.114448_b46","series-title":"Flashocc: Fast and memory-efficient occupancy prediction via channel-to-height plugin","author":"Yu","year":"2023"},{"key":"10.1016\/j.engappai.2026.114448_b47","series-title":"Occnerf: Self-supervised multi-camera occupancy prediction with neural radiance fields","author":"Zhang","year":"2023"},{"key":"10.1016\/j.engappai.2026.114448_b48","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Zhu, Z., Du, D., 2023. Occformer: Dual-path transformer for vision-based 3d semantic occupancy prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 9433\u20139443.","DOI":"10.1109\/ICCV51070.2023.00865"},{"key":"10.1016\/j.engappai.2026.114448_b49","series-title":"2015 International Joint Conference on Neural Networks","first-page":"1","article-title":"Improving deep neural networks using softplus units","author":"Zheng","year":"2015"},{"key":"10.1016\/j.engappai.2026.114448_b50","series-title":"Vision mamba: Efficient visual representation learning with bidirectional state space model","author":"Zhu","year":"2024"}],"container-title":["Engineering Applications of Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626007293?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626007293?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T16:22:13Z","timestamp":1776097333000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0952197626007293"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":50,"alternative-id":["S0952197626007293"],"URL":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114448","relation":{},"ISSN":["0952-1976"],"issn-type":[{"value":"0952-1976","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A dual-stream foreground-aware enhancement network with spiralscan-Mamba for vision-based occupancy prediction in autonomous driving","name":"articletitle","label":"Article Title"},{"value":"Engineering Applications of Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114448","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"114448"}}