{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,12]],"date-time":"2026-06-12T00:58:25Z","timestamp":1781225905029,"version":"3.54.1"},"reference-count":53,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Knowledge-Based Systems"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.knosys.2026.115998","type":"journal-article","created":{"date-parts":[[2026,4,19]],"date-time":"2026-04-19T01:32:23Z","timestamp":1776562343000},"page":"115998","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Large-kernel spatially parallel feature fusion for monocular 3D perception in autonomous driving"],"prefix":"10.1016","volume":"343","author":[{"given":"Ruanzhi","family":"Jiao","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3457-1982","authenticated-orcid":false,"given":"Jinlai","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chang","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lin","family":"Hu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.knosys.2026.115998_b1","series-title":"European Conference on Computer Vision","first-page":"664","article-title":"Deviant: Depth equivariant network for monocular 3d object detection","author":"Kumar","year":"2022"},{"key":"10.1016\/j.knosys.2026.115998_b2","doi-asserted-by":"crossref","unstructured":"H. Chen, Y. Huang, W. Tian, Z. Gao, L. Xiong, Monorun: Monocular 3d object detection by reconstruction and uncertainty propagation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 10379\u201310388.","DOI":"10.1109\/CVPR46437.2021.01024"},{"key":"10.1016\/j.knosys.2026.115998_b3","series-title":"Stereo R-CNN based 3D object detection for autonomous driving","author":"Li","year":"2019"},{"key":"10.1016\/j.knosys.2026.115998_b4","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"9155","article-title":"Monodetr: Depth-guided transformer for monocular 3d object detection","author":"Zhang","year":"2023"},{"key":"10.1016\/j.knosys.2026.115998_b5","series-title":"Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XIII 16","first-page":"311","article-title":"Rethinking pseudo-lidar representation","author":"Ma","year":"2020"},{"key":"10.1016\/j.knosys.2026.115998_b6","doi-asserted-by":"crossref","unstructured":"K.-C. Huang, T.-H. Wu, H.-T. Su, W.H. Hsu, Monodtr: Monocular 3d object detection with depth-aware transformer, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 4012\u20134021.","DOI":"10.1109\/CVPR52688.2022.00398"},{"key":"10.1016\/j.knosys.2026.115998_b7","doi-asserted-by":"crossref","unstructured":"A. Kundu, Y. Li, J.M. Rehg, 3d-rcnn: Instance-level 3d object reconstruction via render-and-compare, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2018, pp. 3559\u20133568.","DOI":"10.1109\/CVPR.2018.00375"},{"key":"10.1016\/j.knosys.2026.115998_b8","series-title":"Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXIII 16","first-page":"135","article-title":"Kinematic 3d object detection in monocular video","author":"Brazil","year":"2020"},{"key":"10.1016\/j.knosys.2026.115998_b9","doi-asserted-by":"crossref","unstructured":"S. Shi, C. Guo, L. Jiang, Z. Wang, J. Shi, X. Wang, H. Li, Pv-rcnn: Point-voxel feature set abstraction for 3d object detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp. 10529\u201310538.","DOI":"10.1109\/CVPR42600.2020.01054"},{"key":"10.1016\/j.knosys.2026.115998_b10","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2026.115998_b11","series-title":"2024 International Conference on 3D Vision (3DV)","first-page":"1125","article-title":"Monolss: Learnable sample selection for monocular 3D detection","author":"Li","year":"2024"},{"key":"10.1016\/j.knosys.2026.115998_b12","series-title":"2012 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3354","article-title":"Are we ready for autonomous driving? the kitti vision benchmark suite","author":"Geiger","year":"2012"},{"key":"10.1016\/j.knosys.2026.115998_b13","doi-asserted-by":"crossref","unstructured":"Q. Lian, P. Li, X. Chen, Monojsg: Joint semantic and geometric cost volume for monocular 3d object detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 1070\u20131079.","DOI":"10.1109\/CVPR52688.2022.00114"},{"key":"10.1016\/j.knosys.2026.115998_b14","doi-asserted-by":"crossref","unstructured":"Y. Zhou, H. Zhu, Q. Liu, S. Chang, M. Guo, Monoatt: Online monocular 3d object detection with adaptive token transformer, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 17493\u201317503.","DOI":"10.1109\/CVPR52729.2023.01678"},{"key":"10.1016\/j.knosys.2026.115998_b15","series-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020"},{"issue":"4","key":"10.1016\/j.knosys.2026.115998_b16","doi-asserted-by":"crossref","first-page":"2396","DOI":"10.1109\/TPAMI.2023.3330944","article-title":"Monocular depth estimation: A thorough review","volume":"46","author":"Arampatzakis","year":"2023","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"10","key":"10.1016\/j.knosys.2026.115998_b17","doi-asserted-by":"crossref","first-page":"2024","DOI":"10.1109\/TPAMI.2015.2505283","article-title":"Learning depth from single monocular images using deep convolutional neural fields","volume":"38","author":"Liu","year":"2015","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.115998_b18","doi-asserted-by":"crossref","unstructured":"Y. Li, Y. Chen, N. Wang, Z. Zhang, Scale-aware trident networks for object detection, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2019, pp. 6054\u20136063.","DOI":"10.1109\/ICCV.2019.00615"},{"key":"10.1016\/j.knosys.2026.115998_b19","series-title":"European Conference on Computer Vision","first-page":"644","article-title":"Rtm3d: Real-time monocular 3d detection from object keypoints for autonomous driving","author":"Li","year":"2020"},{"key":"10.1016\/j.knosys.2026.115998_b20","first-page":"6789","article-title":"Non-deep networks","volume":"35","author":"Goyal","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2026.115998_b21","doi-asserted-by":"crossref","unstructured":"H. Zhao, J. Shi, X. Qi, X. Wang, J. Jia, Pyramid scene parsing network, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2017, pp. 2881\u20132890.","DOI":"10.1109\/CVPR.2017.660"},{"key":"10.1016\/j.knosys.2026.115998_b22","doi-asserted-by":"crossref","unstructured":"J. Dai, H. Qi, Y. Xiong, Y. Li, G. Zhang, H. Hu, Y. Wei, Deformable convolutional networks, in: Proceedings of the IEEE International Conference on Computer Vision, 2017, pp. 764\u2013773.","DOI":"10.1109\/ICCV.2017.89"},{"key":"10.1016\/j.knosys.2026.115998_b23","series-title":"International Conference on Machine Learning","first-page":"22691","article-title":"Dynamixer: a vision mlp architecture with dynamic mixing","author":"Wang","year":"2022"},{"key":"10.1016\/j.knosys.2026.115998_b24","series-title":"Separable self-attention for mobile vision transformers","author":"Mehta","year":"2022"},{"key":"10.1016\/j.knosys.2026.115998_b25","series-title":"European Conference on Computer Vision","first-page":"3","article-title":"Edgenext: efficiently amalgamated cnn-transformer architecture for mobile vision applications","author":"Maaz","year":"2022"},{"key":"10.1016\/j.knosys.2026.115998_b26","doi-asserted-by":"crossref","unstructured":"J. Hu, L. Shen, G. Sun, Squeeze-and-excitation networks, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2018, pp. 7132\u20137141.","DOI":"10.1109\/CVPR.2018.00745"},{"key":"10.1016\/j.knosys.2026.115998_b27","doi-asserted-by":"crossref","unstructured":"S. Woo, J. Park, J.-Y. Lee, I.S. Kweon, Cbam: Convolutional block attention module, in: Proceedings of the European Conference on Computer Vision, ECCV, 2018, pp. 3\u201319.","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"10.1016\/j.knosys.2026.115998_b28","series-title":"ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"2235","article-title":"Sa-net: Shuffle attention for deep convolutional neural networks","author":"Zhang","year":"2021"},{"issue":"8","key":"10.1016\/j.knosys.2026.115998_b29","doi-asserted-by":"crossref","first-page":"690","DOI":"10.1109\/34.784284","article-title":"Shape-from-shading: a survey","volume":"21","author":"Zhang","year":"1999","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.115998_b30","series-title":"Deep layer aggregation","author":"Yu","year":"2019"},{"key":"10.1016\/j.knosys.2026.115998_b31","first-page":"4794","article-title":"Vision transformer with deformable attention","author":"Xia","year":"2022","journal-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR)"},{"key":"10.1016\/j.knosys.2026.115998_b32","series-title":"ShuffleNet: An extremely efficient convolutional neural network for mobile devices","author":"Zhang","year":"2017"},{"key":"10.1016\/j.knosys.2026.115998_b33","doi-asserted-by":"crossref","unstructured":"C. Reading, A. Harakeh, J. Chae, S.L. Waslander, Categorical depth distribution network for monocular 3d object detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 8555\u20138564.","DOI":"10.1109\/CVPR46437.2021.00845"},{"key":"10.1016\/j.knosys.2026.115998_b34","doi-asserted-by":"crossref","unstructured":"Z. Zou, X. Ye, L. Du, X. Cheng, X. Tan, L. Zhang, J. Feng, X. Xue, E. Ding, The devil is in the task: Exploiting reciprocal appearance-localization features for monocular 3d object detection, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 2713\u20132722.","DOI":"10.1109\/ICCV48922.2021.00271"},{"key":"10.1016\/j.knosys.2026.115998_b35","doi-asserted-by":"crossref","unstructured":"D. Park, R. Ambrus, V. Guizilini, J. Li, A. Gaidon, Is pseudo-lidar needed for monocular 3d object detection?, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 3142\u20133152.","DOI":"10.1109\/ICCV48922.2021.00313"},{"key":"10.1016\/j.knosys.2026.115998_b36","series-title":"Monodistill: Learning spatial features for monocular 3d object detection","author":"Chong","year":"2022"},{"key":"10.1016\/j.knosys.2026.115998_b37","series-title":"European Conference on Computer Vision","first-page":"71","article-title":"Did-m3d: Decoupling instance depth for monocular 3d object detection","author":"Peng","year":"2022"},{"key":"10.1016\/j.knosys.2026.115998_b38","series-title":"ECCV","article-title":"Cross-modality knowledge distillation network for monocular 3D object detection","author":"Hong","year":"2022"},{"key":"10.1016\/j.knosys.2026.115998_b39","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2025.114043","article-title":"Monoafkd: Align and frequency cross knowledge distillation for monocular 3D object detection","volume":"326","author":"Peng","year":"2025","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.knosys.2026.115998_b40","doi-asserted-by":"crossref","unstructured":"Z. Liu, Z. Wu, R. T\u00f3th, Smoke: Single-stage monocular 3d object detection via keypoint estimation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, 2020, pp. 996\u2013997.","DOI":"10.1109\/CVPRW50498.2020.00506"},{"key":"10.1016\/j.knosys.2026.115998_b41","doi-asserted-by":"crossref","unstructured":"Y. Chen, L. Tai, K. Sun, M. Li, Monopair: Monocular 3d object detection using pairwise spatial relationships, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp. 12093\u201312102.","DOI":"10.1109\/CVPR42600.2020.01211"},{"key":"10.1016\/j.knosys.2026.115998_b42","doi-asserted-by":"crossref","unstructured":"Y. Lu, X. Ma, L. Yang, T. Zhang, Y. Liu, Q. Chu, J. Yan, W. Ouyang, Geometry uncertainty projection network for monocular 3d object detection, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 3111\u20133121.","DOI":"10.1109\/ICCV48922.2021.00310"},{"key":"10.1016\/j.knosys.2026.115998_b43","doi-asserted-by":"crossref","unstructured":"X. Ma, Y. Zhang, D. Xu, D. Zhou, S. Yi, H. Li, W. Ouyang, Delving into localization errors for monocular 3d object detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 4721\u20134730.","DOI":"10.1109\/CVPR46437.2021.00469"},{"key":"10.1016\/j.knosys.2026.115998_b44","doi-asserted-by":"crossref","unstructured":"Y. Zhang, J. Lu, J. Zhou, Objects are different: Flexible monocular 3d object detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 3289\u20133298.","DOI":"10.1109\/CVPR46437.2021.00330"},{"key":"10.1016\/j.knosys.2026.115998_b45","first-page":"1810","article-title":"Learning auxiliary monocular contexts helps monocular 3d object detection","volume":"vol. 36","author":"Liu","year":"2022"},{"key":"10.1016\/j.knosys.2026.115998_b46","doi-asserted-by":"crossref","unstructured":"Z. Li, Z. Qu, Y. Zhou, J. Liu, H. Wang, L. Jiang, Diversity matters: Fully exploiting depth clues for reliable monocular 3d object detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 2791\u20132800.","DOI":"10.1109\/CVPR52688.2022.00281"},{"issue":"2","key":"10.1016\/j.knosys.2026.115998_b47","doi-asserted-by":"crossref","first-page":"240","DOI":"10.1109\/TIV.2022.3143954","article-title":"Monocular 3D object detection with sequential feature association and depth hint augmentation","volume":"7","author":"Gao","year":"2022","journal-title":"IEEE Trans. Intell. Veh."},{"key":"10.1016\/j.knosys.2026.115998_b48","doi-asserted-by":"crossref","unstructured":"L. Yan, P. Yan, S. Xiong, X. Xiang, Y. Tan, Monocd: Monocular 3d object detection with complementary depths, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 10248\u201310257.","DOI":"10.1109\/CVPR52733.2024.00976"},{"issue":"3","key":"10.1016\/j.knosys.2026.115998_b49","doi-asserted-by":"crossref","first-page":"3574","DOI":"10.1109\/TITS.2025.3525772","article-title":"MonoAMNet: Three-stage real-time monocular 3D object detection with adaptive methods","volume":"26","author":"Pan","year":"2025","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.knosys.2026.115998_b50","series-title":"Nuscenes: A multimodal dataset for autonomous driving","author":"Caesar","year":"2019"},{"key":"10.1016\/j.knosys.2026.115998_b51","doi-asserted-by":"crossref","unstructured":"G. Brazil, X. Liu, M3d-rpn: Monocular 3d region proposal network for object detection, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2019, pp. 9287\u20139296.","DOI":"10.1109\/ICCV.2019.00938"},{"key":"10.1016\/j.knosys.2026.115998_b52","series-title":"2023 IEEE\/CVF Winter Conference on Applications of Computer Vision","first-page":"4270","article-title":"Multivariate probabilistic monocular 3D object detection","author":"Shi","year":"2023"},{"key":"10.1016\/j.knosys.2026.115998_b53","doi-asserted-by":"crossref","unstructured":"P. Sun, H. Kretzschmar, X. Dotiwalla, A. Chouard, V. Patnaik, P. Tsui, J. Guo, Y. Zhou, Y. Chai, B. Caine, et al., Scalability in perception for autonomous driving: Waymo open dataset, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, 2020.","DOI":"10.1109\/CVPR42600.2020.00252"}],"container-title":["Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126007240?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126007240?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,12]],"date-time":"2026-06-12T00:12:42Z","timestamp":1781223162000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0950705126007240"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":53,"alternative-id":["S0950705126007240"],"URL":"https:\/\/doi.org\/10.1016\/j.knosys.2026.115998","relation":{},"ISSN":["0950-7051"],"issn-type":[{"value":"0950-7051","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Large-kernel spatially parallel feature fusion for monocular 3D perception in autonomous driving","name":"articletitle","label":"Article Title"},{"value":"Knowledge-Based Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.knosys.2026.115998","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"115998"}}