{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T07:42:17Z","timestamp":1777880537177,"version":"3.51.4"},"reference-count":57,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62176046"],"award-info":[{"award-number":["62176046"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Knowledge-Based Systems"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1016\/j.knosys.2026.115564","type":"journal-article","created":{"date-parts":[[2026,2,22]],"date-time":"2026-02-22T06:44:24Z","timestamp":1771742664000},"page":"115564","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["STPE-map: Multimodal alignment and spatio-temporal priors for online HD map construction"],"prefix":"10.1016","volume":"339","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2281-5641","authenticated-orcid":false,"given":"Keke","family":"Tian","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3417-2007","authenticated-orcid":false,"given":"Muquan","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0007-7917-7253","authenticated-orcid":false,"given":"Jing","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6174-3877","authenticated-orcid":false,"given":"Ke","family":"Qin","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.knosys.2026.115564_bib0001","doi-asserted-by":"crossref","first-page":"652","DOI":"10.1016\/j.neunet.2023.09.035","article-title":"LaenNet: learning robust GCNs by propagating labels","volume":"168","author":"Zhang","year":"2023","journal-title":"Neural Netw."},{"key":"10.1016\/j.knosys.2026.115564_bib0002","series-title":"15th International IEEE Conference on Intelligent Transportation Systems, ITSC 2012, Anchorage, AK, USA, September 16\u201319, 2012","first-page":"270","article-title":"A random finite set approach to multiple lane detection","author":"Deusch","year":"2012"},{"key":"10.1016\/j.knosys.2026.115564_bib0003","series-title":"2019 IEEE\/CVF International Conference on Computer Vision, ICCV 2019, Seoul, Korea (South), October 27-November 2, 2019","first-page":"2921","article-title":"3D-LaneNet: end-to-end 3D multiple lane detection","author":"Garnett","year":"2019"},{"key":"10.1016\/j.knosys.2026.115564_bib0004","series-title":"2017 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017, Honolulu, HI, USA, July 21\u201326, 2017","first-page":"6565","article-title":"CNN-SLAM: real-time dense monocular SLAM with learned depth prediction","author":"Tateno","year":"2017"},{"key":"10.1016\/j.knosys.2026.115564_bib0005","series-title":"Computer Vision - ECCV 2014 - 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part II","first-page":"834","article-title":"LSD-SLAM: large-scale direct monocular SLAM","volume":"8690","author":"Engel","year":"2014"},{"issue":"6","key":"10.1016\/j.knosys.2026.115564_bib0006","doi-asserted-by":"crossref","first-page":"1052","DOI":"10.1109\/TPAMI.2007.1049","article-title":"MonoSLAM: real-time single camera SLAM","volume":"29","author":"Davison","year":"2007","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"4","key":"10.1016\/j.knosys.2026.115564_bib0007","doi-asserted-by":"crossref","first-page":"2239","DOI":"10.1109\/TPAMI.2023.3335410","article-title":"DN-DETR: accelerate DETR training by introducing query denoising","volume":"46","author":"Li","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.115564_bib0008","series-title":"IEEE\/CVF Winter Conference on Applications of Computer Vision, WACV 2025, Tucson, AZ, USA, February 26\u2013March 6, 2025","first-page":"1628","article-title":"RT-DETRv3: real-time end-to-end object detection with hierarchical dense positive supervision","author":"Wang","year":"2025"},{"issue":"3","key":"10.1016\/j.knosys.2026.115564_bib0009","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1007\/s11760-024-03757-2","article-title":"MS-DETR: a real-time multi-scale detection transformer for PCB defect detection","volume":"19","author":"Ji","year":"2025","journal-title":"Signal Image Video Process."},{"key":"10.1016\/j.knosys.2026.115564_bib0010","unstructured":"Z. Yao, J. Ai, B. Li, C. Zhang, Efficient DETR: improving end-to-end object detector with dense prior, CoRR abs\/2104.01318(2021). arXiv: 2104.01318."},{"key":"10.1016\/j.knosys.2026.115564_bib0011","series-title":"IEEE\/CVF International Conference on Computer Vision, ICCV 2023, Paris, France, October 1\u20136, 2023","first-page":"6610","article-title":"Group DETR: fast DETR training with group-wise one-to-many assignment","author":"Chen","year":"2023"},{"key":"10.1016\/j.knosys.2026.115564_bib0012","series-title":"The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1\u20135, 2023","article-title":"MapTR: structured modeling and learning for online vectorized HD map construction","author":"Liao","year":"2023"},{"issue":"3","key":"10.1016\/j.knosys.2026.115564_bib0013","doi-asserted-by":"crossref","first-page":"1352","DOI":"10.1007\/s11263-024-02235-z","article-title":"MapTRv2: an end-to-end framework for online vectorized HD map construction","volume":"133","author":"Liao","year":"2025","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.knosys.2026.115564_bib0014","series-title":"Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022","article-title":"BEVFusion: A simple and robust LiDAR-camera fusion framework","author":"Liang","year":"2022"},{"key":"10.1016\/j.knosys.2026.115564_bib0015","series-title":"2023 IEEE International Conference on Robotics and Automation (ICRA)","first-page":"2774","article-title":"BEVFusion: multi-task multi-sensor fusion with unified bird\u2019s-eye view representation","author":"Liu","year":"2023"},{"key":"10.1016\/j.knosys.2026.115564_bib0016","series-title":"IEEE\/CVF Winter Conference on Applications of Computer Vision, WACV","first-page":"7341","article-title":"StreamMapNet: streaming mapping network for vectorized online HD map construction","author":"Yuan","year":"2024"},{"key":"10.1016\/j.knosys.2026.115564_bib0017","series-title":"IEEE\/CVF Winter Conference on Applications of Computer Vision, WACV","first-page":"8134","article-title":"PrevPredMap: exploring temporal modeling with previous predictions for online vectorized HD map construction","author":"Peng","year":"2025"},{"key":"10.1016\/j.knosys.2026.115564_bib0018","series-title":"IEEE\/CVF Winter Conference on Applications of Computer Vision, WACV","first-page":"9248","article-title":"MemFusionMap: working memory fusion for online vectorized HD map construction","author":"Song","year":"2025"},{"key":"10.1016\/j.knosys.2026.115564_bib0019","series-title":"Computer Vision - ECCV 2024 - 18th European Conference","first-page":"203","article-title":"Stream query denoising for vectorized HD-map construction","volume":"15077","author":"Wang","year":"2024"},{"key":"10.1016\/j.knosys.2026.115564_bib0020","unstructured":"R. Wang, X. Lu, X. Liu, X. Zou, T. Cao, Y. Li, PriorMapNet: enhancing online vectorized HD map construction with priors, CoRR abs\/2408.08802(2024b). 10.48550\/ARXIV.2408.08802."},{"key":"10.1016\/j.knosys.2026.115564_bib0021","series-title":"Computer Vision - ECCV 2022 - 17th European Conference","first-page":"1","article-title":"BEVFormer: learning bird\u2019s-eye-view representation from multi-camera images via spatiotemporal transformers","volume":"13669","author":"Li","year":"2022"},{"issue":"2","key":"10.1016\/j.knosys.2026.115564_bib0022","doi-asserted-by":"crossref","first-page":"1889","DOI":"10.1109\/TITS.2024.3518537","article-title":"InstaGraM: instance-level graph modeling for vectorized HD map learning","volume":"26","author":"Shin","year":"2025","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.knosys.2026.115564_bib0023","series-title":"23rd IEEE International Conference on Intelligent Transportation Systems, ITSC 2020, Rhodes, Greece, September 20\u201323, 2020","first-page":"1","article-title":"Detecting lane and road markings at a distance with perspective transformer layers","author":"Yu","year":"2020"},{"key":"10.1016\/j.knosys.2026.115564_bib0024","series-title":"IEEE\/CVF Winter Conference on Applications of Computer Vision, WACV 2023, Waikoloa, HI, USA, January 2\u20137, 2023","first-page":"5924","article-title":"BEVSegFormer: bird\u2019s eye view semantic segmentation from arbitrary camera rigs","author":"Peng","year":"2023"},{"issue":"3","key":"10.1016\/j.knosys.2026.115564_bib0025","doi-asserted-by":"crossref","first-page":"4867","DOI":"10.1109\/LRA.2020.3004325","article-title":"Cross-view semantic segmentation for sensing surroundings","volume":"5","author":"Pan","year":"2020","journal-title":"IEEE Rob. Autom. Lett."},{"key":"10.1016\/j.knosys.2026.115564_bib0026","article-title":"BEVDet4D: exploit temporal cues in multi-camera 3D object detection","volume":"abs\/2203.17054","author":"Huang","year":"2022","journal-title":"CoRR"},{"key":"10.1016\/j.knosys.2026.115564_bib0027","series-title":"IEEE Intelligent Vehicles Symposium, IV","first-page":"2776","article-title":"UniBEV: multi-modal 3D object detection with uniform BEV encoders for robustness against missing sensor modalities","author":"Wang","year":"2024"},{"issue":"7","key":"10.1016\/j.knosys.2026.115564_bib0028","doi-asserted-by":"crossref","first-page":"6544","DOI":"10.1109\/LRA.2024.3401172","article-title":"Exploring recurrent long-term temporal fusion for multi-view 3D perception","volume":"9","author":"Han","year":"2024","journal-title":"IEEE Rob. Autom. Lett."},{"key":"10.1016\/j.knosys.2026.115564_bib0029","article-title":"Sparse4D v2: recurrent temporal fusion with sparse model","volume":"abs\/2305.14018","author":"Lin","year":"2023","journal-title":"CoRR"},{"key":"10.1016\/j.knosys.2026.115564_bib0030","first-page":"7276","article-title":"Spatial as deep: spatial CNN for traffic scene understanding","author":"Pan","year":"2018"},{"key":"10.1016\/j.knosys.2026.115564_bib0031","first-page":"964","article-title":"Semantic MapNet: building allocentric semantic maps and representations from egocentric views","author":"Cartillier","year":"2021"},{"key":"10.1016\/j.knosys.2026.115564_bib0032","series-title":"IEEE International Symposium on Mixed and Augmented Reality, ISMAR 2018, Munich, Germany, October 16\u201320, 2018","first-page":"10","article-title":"MaskFusion: real-time recognition, tracking and reconstruction of multiple moving objects","author":"R\u00fcnz","year":"2018"},{"key":"10.1016\/j.knosys.2026.115564_bib0033","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR","first-page":"14901","article-title":"SkyEye: self-supervised bird\u2019s-eye-view semantic mapping using monocular frontal view images","author":"Gosala","year":"2023"},{"key":"10.1016\/j.knosys.2026.115564_bib0034","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, June 18\u201324, 2022","first-page":"13750","article-title":"Cross-view transformers for real-time map-view semantic segmentation","author":"Zhou","year":"2022"},{"issue":"12","key":"10.1016\/j.knosys.2026.115564_bib0035","doi-asserted-by":"crossref","first-page":"11289","DOI":"10.1109\/LRA.2024.3490384","article-title":"TICMapNet: a tightly coupled temporal fusion pipeline for vectorized HD map learning","volume":"9","author":"Qiu","year":"2024","journal-title":"IEEE Rob. Autom. Lett."},{"key":"10.1016\/j.knosys.2026.115564_bib0036","series-title":"IEEE\/CVF International Conference on Computer Vision, ICCV","first-page":"3649","article-title":"PivotNet: vectorized pivot learning for end-to-end HD map construction","author":"Ding","year":"2023"},{"key":"10.1016\/j.knosys.2026.115564_bib0037","unstructured":"S. Chen, T. Cheng, X. Wang, W. Meng, Q. Zhang, W. Liu, Efficient and robust 2D-to-BEV representation learning via geometry-guided Kernel transformer, CoRR abs\/2206.04584(2022). 10.48550\/ARXIV.2206.04584."},{"key":"10.1016\/j.knosys.2026.115564_bib0038","series-title":"IEEE International Conference on Robotics and Automation, ICRA","first-page":"9056","article-title":"SuperFusion: multilevel LiDAR-camera fusion for long-range HD map generation","author":"Dong","year":"2024"},{"key":"10.1016\/j.knosys.2026.115564_bib0039","series-title":"Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems","first-page":"5998","article-title":"Attention is all you need","author":"Vaswani","year":"2017"},{"key":"10.1016\/j.knosys.2026.115564_bib0040","series-title":"The Tenth International Conference on Learning Representations, ICLR","article-title":"DAB-DETR: dynamic anchor boxes are better queries for DETR","author":"Liu","year":"2022"},{"key":"10.1016\/j.knosys.2026.115564_bib0041","series-title":"2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR","first-page":"11618","article-title":"nuScenes: a multimodal dataset for autonomous driving","author":"Caesar","year":"2020"},{"key":"10.1016\/j.knosys.2026.115564_bib0042","article-title":"Argoverse 2: next generation datasets for self-driving perception and forecasting","volume":"abs\/2301.00493","author":"Wilson","year":"2023","journal-title":"CoRR"},{"key":"10.1016\/j.knosys.2026.115564_bib0043","series-title":"Computer Vision - ECCV 2020 - 16th European Conference","first-page":"194","article-title":"Lift, splat, shoot: encoding images from arbitrary camera rigs by implicitly unprojecting to 3D","volume":"12359","author":"Philion","year":"2020"},{"issue":"6","key":"10.1016\/j.knosys.2026.115564_bib0044","doi-asserted-by":"crossref","first-page":"1239","DOI":"10.1007\/s11760-021-01854-0","article-title":"Semantic frustum-based sparsely embedded convolutional detection","volume":"15","author":"Feng","year":"2021","journal-title":"Signal Image Video Process."},{"key":"10.1016\/j.knosys.2026.115564_bib0045","series-title":"2022 International Conference on Robotics and Automation, ICRA","first-page":"4628","article-title":"HDMapNet: an online HD map construction and evaluation framework","author":"Li","year":"2022"},{"key":"10.1016\/j.knosys.2026.115564_bib0046","series-title":"International Conference on Machine Learning, ICML","first-page":"22352","article-title":"VectorMapNet: end-to-end vectorized HD map learning","volume":"202","author":"Liu","year":"2023"},{"key":"10.1016\/j.knosys.2026.115564_bib0047","series-title":"Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems, NeurIPS","article-title":"Online map vectorization for autonomous driving: a rasterization perspective","author":"Zhang","year":"2023"},{"key":"10.1016\/j.knosys.2026.115564_bib0048","article-title":"HybriMap: hybrid clues utilization for effective vectorized HD map construction","volume":"abs\/2404.11155","author":"Zhang","year":"2024","journal-title":"CoRR"},{"key":"10.1016\/j.knosys.2026.115564_bib0049","series-title":"Computer Vision - ECCV 2024 - 18th European Conference","first-page":"311","article-title":"ADMap: anti-disturbance framework for vectorized HD map construction","volume":"15067","author":"Hu","year":"2024"},{"key":"10.1016\/j.knosys.2026.115564_bib0050","article-title":"FastMap: fast queries initialization based vectorized HD map reconstruction framework","volume":"abs\/2503.05492","author":"Hu","year":"2025","journal-title":"CoRR"},{"key":"10.1016\/j.knosys.2026.115564_bib0051","article-title":"SparseMeXT unlocking the potential of sparse representations for HD map construction","volume":"abs\/2505.08808","author":"Jiang","year":"2025","journal-title":"CoRR"},{"key":"10.1016\/j.knosys.2026.115564_bib0052","series-title":"Computer Vision - ECCV 2024 - 18th European Conference","first-page":"461","article-title":"Leveraging enhanced queries of point sets for vectorized map construction","volume":"15115","author":"Liu","year":"2024"},{"key":"10.1016\/j.knosys.2026.115564_bib0053","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR","first-page":"14812","article-title":"MGMap: mask-guided learning for online vectorized HD map construction","author":"Liu","year":"2024"},{"key":"10.1016\/j.knosys.2026.115564_bib0054","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR","first-page":"15396","article-title":"HIMap: hybrid representation learning for end-to-end vectorized HD map construction","author":"Zhou","year":"2024"},{"key":"10.1016\/j.knosys.2026.115564_bib0055","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR","first-page":"13218","article-title":"End-to-end vectorized HD-map construction with piecewise b\u00e9zier curve","author":"Qiao","year":"2023"},{"key":"10.1016\/j.knosys.2026.115564_bib0056","series-title":"2016 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2016, Las Vegas, NV, USA, June 27\u201330, 2016","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"key":"10.1016\/j.knosys.2026.115564_bib0057","series-title":"Proceedings of the 36th International Conference on Machine Learning, ICML","first-page":"6105","article-title":"EfficientNet: rethinking model scaling for convolutional neural networks","volume":"97","author":"Tan","year":"2019"}],"container-title":["Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126003060?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126003060?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:14:21Z","timestamp":1777594461000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0950705126003060"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4]]},"references-count":57,"alternative-id":["S0950705126003060"],"URL":"https:\/\/doi.org\/10.1016\/j.knosys.2026.115564","relation":{},"ISSN":["0950-7051"],"issn-type":[{"value":"0950-7051","type":"print"}],"subject":[],"published":{"date-parts":[[2026,4]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"STPE-map: Multimodal alignment and spatio-temporal priors for online HD map construction","name":"articletitle","label":"Article Title"},{"value":"Knowledge-Based Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.knosys.2026.115564","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"115564"}}