{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T14:19:29Z","timestamp":1766067569191},"reference-count":55,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,5,23]]},"DOI":"10.1109\/icra46639.2022.9811749","type":"proceedings-article","created":{"date-parts":[[2022,7,12]],"date-time":"2022-07-12T19:36:40Z","timestamp":1657654600000},"source":"Crossref","is-referenced-by-count":18,"title":["Depth Estimation Matters Most: Improving Per-Object Depth Estimation for Monocular 3D Detection and Tracking"],"prefix":"10.1109","author":[{"given":"Longlong","family":"Jing","sequence":"first","affiliation":[{"name":"Waymo LLC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruichi","family":"Yu","sequence":"additional","affiliation":[{"name":"Waymo LLC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Henrik","family":"Kretzschmar","sequence":"additional","affiliation":[{"name":"Waymo LLC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kang","family":"Li","sequence":"additional","affiliation":[{"name":"Waymo LLC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Charles R.","family":"Qi","sequence":"additional","affiliation":[{"name":"Waymo LLC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hang","family":"Zhao","sequence":"additional","affiliation":[{"name":"Waymo LLC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alper","family":"Ayvaci","sequence":"additional","affiliation":[{"name":"Waymo LLC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xu","family":"Chen","sequence":"additional","affiliation":[{"name":"Waymo LLC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dillon","family":"Cower","sequence":"additional","affiliation":[{"name":"Waymo LLC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yingwei","family":"Li","sequence":"additional","affiliation":[{"name":"Johns Hopkins University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yurong","family":"You","sequence":"additional","affiliation":[{"name":"Cornell University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Han","family":"Deng","sequence":"additional","affiliation":[{"name":"Waymo LLC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Congcong","family":"Li","sequence":"additional","affiliation":[{"name":"Waymo LLC"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dragomir","family":"Anguelov","sequence":"additional","affiliation":[{"name":"Waymo LLC"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00329"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6908"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00695"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01169"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00114"},{"key":"ref30","article-title":"Monocular 3d object detection and box fitting trained end-to-end using intersection-over-union loss","author":"jorgensen","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref37","article-title":"Semantically-guided representation learning for self-supervised monocular depth","author":"guizilini","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref36","first-page":"2366","article-title":"Depth map prediction from a single image using a multi-scale deep network","author":"eigen","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00653"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2016.7533003"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00938"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00394"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00208"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00062"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01298"},{"key":"ref20","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01309"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00102"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.52"},{"key":"ref23","article-title":"Looking fast and slow: Memory-guided mobile video object detection","author":"liu","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913491297"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"ref50","article-title":"Fairmot: On the fairness of detection and re-identification in multiple object tracking","author":"zhang","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref51","article-title":"Soda: Multi-object tracking with soft data association","author":"hung","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1049\/cp.2010.0495"},{"key":"ref54","article-title":"Disnet: A novel method for distance estimation from monocular camera","author":"haseeb","year":"2018","journal-title":"10th Planning Perception and Navigation for Intelligent Vehicles (PPNIV18) IROS"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.3390\/s150923805"},{"key":"ref52","article-title":"Towards real-time multi-object tracking","author":"wang","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341164"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00549"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00214"},{"key":"ref12","article-title":"Centernet3d: An anchor free object detector for autonomous driving","author":"wang","year":"2020","journal-title":"ar Xiv preprint"},{"key":"ref13","article-title":"Rtm3d: Real-time monocular 3d detection from object keypoints for autonomous driving","author":"li","year":"2020","journal-title":"ECCV"},{"key":"ref14","article-title":"Tracking objects as points","author":"zhou","year":"2020","journal-title":"ECCV"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00506"},{"key":"ref17","article-title":"Refinedmpl: Refined monocular pseudolidar for 3d object detection in autonomous driving","author":"vianney","year":"2019","journal-title":"ar Xiv preprint"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.699"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00393"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.29007\/qwpk"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00939"},{"key":"ref6","article-title":"High quality monocular depth estimation via transfer learning","author":"alhashim","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01211"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58601-0_19"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00864"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref9","article-title":"Objects as points","author":"zhou","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref46","first-page":"213","article-title":"Fusenet: Incorporating depth into semantic segmentation via fusion-based cnn architecture","author":"hazirbas","year":"0","journal-title":"Asian Conference on Computer Vision"},{"key":"ref45","article-title":"Audiovisual slowfast networks for video recognition","author":"xiao","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.316"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.213"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00752"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00466"},{"key":"ref44","article-title":"Multi-view fusion of sensor data for improved perception and prediction in autonomous driving","author":"fadadu","year":"2020","journal-title":"ar Xiv preprint"},{"key":"ref43","first-page":"641","article-title":"Deep continuous fusion for multi-sensor 3d object detection","author":"liang","year":"0","journal-title":"Proceedings of the European Conference on Computer Vision (ECCV)"}],"event":{"name":"2022 IEEE International Conference on Robotics and Automation (ICRA)","location":"Philadelphia, PA, USA","start":{"date-parts":[[2022,5,23]]},"end":{"date-parts":[[2022,5,27]]}},"container-title":["2022 International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9811522\/9811357\/09811749.pdf?arnumber=9811749","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T23:04:40Z","timestamp":1667516680000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9811749\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,23]]},"references-count":55,"URL":"https:\/\/doi.org\/10.1109\/icra46639.2022.9811749","relation":{},"subject":[],"published":{"date-parts":[[2022,5,23]]}}}