{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T13:25:08Z","timestamp":1762953908981,"version":"3.28.0"},"reference-count":51,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10161214","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T17:20:56Z","timestamp":1688491256000},"page":"7046-7053","source":"Crossref","is-referenced-by-count":14,"title":["HFT: Lifting Perspective Representations via Hybrid Feature Transformation for BEV Perception"],"prefix":"10.1109","author":[{"given":"Jiayu","family":"Zou","sequence":"first","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences;, School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China"}]},{"given":"Zheng","family":"Zhu","sequence":"additional","affiliation":[{"name":"PhiGent Robotics,Beijing,China"}]},{"given":"Junjie","family":"Huang","sequence":"additional","affiliation":[{"name":"PhiGent Robotics,Beijing,China"}]},{"given":"Tian","family":"Yang","sequence":"additional","affiliation":[{"name":"PhiGent Robotics,Beijing,China"}]},{"given":"Guan","family":"Huang","sequence":"additional","affiliation":[{"name":"PhiGent Robotics,Beijing,China"}]},{"given":"Xingang","family":"Wang","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences;, School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01185"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560849"},{"key":"ref15","article-title":"Learning to look around objects for top-view representations of outdoor scenes","author":"schulter","year":"0","journal-title":"Proceedings of the European Conference on Computer Vision"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561344"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9562015"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561275"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01057"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6385958"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2016.XII.042"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2018.8569311"},{"key":"ref51","article-title":"Bevformer: Learning bird's-eye-view representation from multi-camera images via spatiotemporal transformers","author":"li","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref50","first-page":"1475","article-title":"Probabilistic and geometric depth: Detecting objects in perspective","author":"wang","year":"0","journal-title":"Conference on Robot Learning"},{"key":"ref46","article-title":"Orthographic feature trans-form for monocular 3d object detection","author":"roddick","year":"0","journal-title":"British Machine Vision Conference"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.236"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00107"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00667"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00939"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"journal-title":"Fixing weight decay regularization in adam","year":"2018","author":"loshchilov","key":"ref43"},{"key":"ref49","first-page":"180","article-title":"Detr3d: 3d object detection from multi-view images via 3d-to-2d queries","author":"wang","year":"0","journal-title":"Conference on Robot Learning"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.185"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636577"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01236-7"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3094564"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636384"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636241"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636559"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00895"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref34","article-title":"Petr: Position embedding transformation for multi-view 3d object detection","author":"liu","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00475"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00454"},{"key":"ref31","article-title":"Bevdet: High-performance multi-camera 3d object detection in bird-eye-view","author":"huang","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01339"},{"key":"ref33","article-title":"Beverse: Unified perception and prediction in birds-eye-view for vision-centric autonomous driving","author":"zhang","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref32","article-title":"Bevdet4d: Exploit temporal cues in multi-camera 3d object detection","author":"huang","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9635858"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636707"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2891028"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01528"},{"key":"ref26","article-title":"Distilling the knowledge in a neural network","author":"hinton","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3004325"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC45102.2020.9294462"},{"key":"ref22","first-page":"194","article-title":"Lift, splat, shoot:Encoding images from arbitrary camera rigs by implicitly unprojecting to 3d","author":"philion","year":"0","journal-title":"Proceedings of the European Conference on Computer Vision"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01115"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01499"},{"key":"ref27","article-title":"Dual learning for machine translation","volume":"29","author":"he","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref29","first-page":"arxiv?2107","author":"li","year":"2021","journal-title":"Hdmapnet An online hd map construction and evaluation framework"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2023,5,29]]},"location":"London, United Kingdom","end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10161214.pdf?arnumber=10161214","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T17:31:33Z","timestamp":1690219893000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10161214\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":51,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10161214","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}