{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:19:27Z","timestamp":1778080767495,"version":"3.51.4"},"publisher-location":"Cham","reference-count":53,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031851865","type":"print"},{"value":"9783031851872","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-85187-2_9","type":"book-chapter","created":{"date-parts":[[2025,4,23]],"date-time":"2025-04-23T05:14:13Z","timestamp":1745385253000},"page":"137-152","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["CARLA Drone: Monocular 3D Object Detection from\u00a0a\u00a0Different Perspective"],"prefix":"10.1007","author":[{"given":"Johannes","family":"Meier","sequence":"first","affiliation":[]},{"given":"Luca","family":"Scalerandi","sequence":"additional","affiliation":[]},{"given":"Oussema","family":"Dhaouadi","sequence":"additional","affiliation":[]},{"given":"Jacques","family":"Kaiser","sequence":"additional","affiliation":[]},{"given":"Nikita","family":"Araslanov","sequence":"additional","affiliation":[]},{"given":"Daniel","family":"Cremers","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,24]]},"reference":[{"key":"9_CR1","doi-asserted-by":"crossref","unstructured":"Araslanov, N., Roth, S.: Self-supervised augmentation consistency for adapting semantic segmentation. In: CVPR, pp. 15384\u201315394 (2021)","DOI":"10.1109\/CVPR46437.2021.01513"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Brazil, G., Kumar, A., Straub, J., Ravi, N., Johnson, J., Gkioxari, G.: Omni3D: a large benchmark and model for 3D object detection in the wild. In: CVPR, pp. 13154\u201313164 (2023)","DOI":"10.1109\/CVPR52729.2023.01264"},{"key":"9_CR3","doi-asserted-by":"crossref","unstructured":"Brazil, G., Liu, X.: M3D-RPN: monocular 3D region proposal network for object detection. In: ICCV, pp. 9286\u20139295 (2019)","DOI":"10.1109\/ICCV.2019.00938"},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"Brazil, G., Pons-Moll, G., Liu, X., Schiele, B.: Kinematic 3D object detection in monocular video. In: ECCV, pp. 135\u2013152 (2020)","DOI":"10.1007\/978-3-030-58592-1_9"},{"key":"9_CR5","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al.: NuScenes: a multimodal dataset for autonomous driving. In: CVPR, pp. 11618\u201311628 (2020)","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"9_CR6","doi-asserted-by":"crossref","unstructured":"Chen, L., Liu, F., Zhao, Y., Wang, W., Yuan, X., Zhu, J.: VALID: a comprehensive virtual aerial image dataset. In: ICRA, pp. 2009\u20132016 (2020)","DOI":"10.1109\/ICRA40945.2020.9197186"},{"key":"9_CR7","unstructured":"Chen, X., Chen, M., Tang, S., Niu, Y., Zhu, J.: MOSE: boosting vision-based roadside 3D object detection with scene cues. arXiv:2404.05280 (2024)"},{"key":"9_CR8","doi-asserted-by":"crossref","unstructured":"Chen, X., Kundu, K., Zhang, Z., Ma, H., Fidler, S., Urtasun, R.: Monocular 3D object detection for autonomous driving. In: CVPR, pp. 2147\u20132156 (2016)","DOI":"10.1109\/CVPR.2016.236"},{"key":"9_CR9","first-page":"4416","volume":"45","author":"Y Chen","year":"2022","unstructured":"Chen, Y., Huang, S., Liu, S., Yu, B., Jia, J.: DSGN++: exploiting visual-spatial relation for stereo-based 3D detectors. IEEE TPAMI 45, 4416\u20134429 (2022)","journal-title":"IEEE TPAMI"},{"key":"9_CR10","unstructured":"Choi, W., Shin, M., Im, S.: Depth-discriminative metric learning for monocular 3D object detection. In: NeurIPS (2023)"},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"Cre\u00df, C., et al.: A9-Dataset: multi-sensor infrastructure-based dataset for mobility research. IEEE IV, pp. 965\u2013970 (2022)","DOI":"10.1109\/IV51971.2022.9827401"},{"key":"9_CR12","unstructured":"Dosovitskiy, A., Ros, G., Codevilla, F., L\u00f3pez, A.M., Koltun, V.: CARLA: an open urban driving simulator. In: CoRL (2017)"},{"key":"9_CR13","doi-asserted-by":"crossref","unstructured":"Du, D., et al.: The unmanned aerial vehicle benchmark: object detection and tracking. In: ECCV, pp. 375\u2013391 (2018)","DOI":"10.1007\/978-3-030-01249-6_23"},{"key":"9_CR14","doi-asserted-by":"crossref","unstructured":"Fonder, M., Droogenbroeck, M.V.: Mid-air: a multi-modal dataset for extremely low altitude drone flights. In: CVPRW (2019)","DOI":"10.1109\/CVPRW.2019.00081"},{"key":"9_CR15","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The KITTI vision benchmark suite. In: CVPR, pp. 3354\u20133361 (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"9_CR16","doi-asserted-by":"crossref","unstructured":"Hsieh, M., Lin, Y., Hsu, W.H.: Drone-based object counting by spatially regularized regional proposal network. In: ICCV, pp. 4165\u20134173 (2017)","DOI":"10.1109\/ICCV.2017.446"},{"issue":"4","key":"9_CR17","doi-asserted-by":"publisher","first-page":"1959","DOI":"10.1109\/LRA.2023.3245421","volume":"8","author":"Y Hu","year":"2023","unstructured":"Hu, Y., Fang, S., Xie, W., Chen, S.: Aerial monocular 3D object detection. IEEE Rob. Autom. Lett. 8(4), 1959\u20131966 (2023)","journal-title":"IEEE Rob. Autom. Lett."},{"key":"9_CR18","unstructured":"Jia, J., Li, Z., Shi, Y.: MonoUNI: a unified vehicle and infrastructure-side monocular 3D object detection network with sufficient depth clues. In: NeurIPS (2023)"},{"key":"9_CR19","doi-asserted-by":"crossref","unstructured":"Kumar, A., Brazil, G., Corona, E., Parchami, A., Liu, X.: Deviant: depth equivariant network for monocular 3D object detection. In: ECCV, pp. 664\u2013683 (2022)","DOI":"10.1007\/978-3-031-20077-9_39"},{"key":"9_CR20","doi-asserted-by":"crossref","unstructured":"Kundu, A., Li, Y., Rehg, J.M.: 3D-RCNN: instance-level 3D object reconstruction via render-and-compare. In: CVPR, pp. 3559\u20133568 (2018)","DOI":"10.1109\/CVPR.2018.00375"},{"key":"9_CR21","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: BEVDepth: acquisition of reliable depth for multi-view 3D object detection. In: AAAI, pp. 1477\u20131485 (2023)","DOI":"10.1609\/aaai.v37i2.25233"},{"key":"9_CR22","doi-asserted-by":"crossref","unstructured":"Li, Z., Jia, J., Shi, Y.: MonoLSS: learnable sample selection for monocular 3D detection. In: 3DV, pp. 1125\u20131135 (2024)","DOI":"10.1109\/3DV62453.2024.00088"},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Li, Z., et al.: BEVFormer: learning bird\u2019s-eye-view representation from multi-camera images via spatiotemporal transformers. In: ECCV (2022)","DOI":"10.1007\/978-3-031-20077-9_1"},{"key":"9_CR24","doi-asserted-by":"crossref","unstructured":"Li, Z., Qu, Z., Zhou, Y., Liu, J., Wang, H., Jiang, L.: Diversity matters: fully exploiting depth clues for reliable monocular 3D object detection. In: CVPR, pp. 2781\u20132790 (2022)","DOI":"10.1109\/CVPR52688.2022.00281"},{"key":"9_CR25","doi-asserted-by":"crossref","unstructured":"Li, Z., Xu, X., Lim, S., Zhao, H.: UniMODE: unified monocular 3D object detection. arXiv:2402.18573 (2024)","DOI":"10.1109\/CVPR52733.2024.01567"},{"key":"9_CR26","doi-asserted-by":"crossref","unstructured":"Lian, Q., Li, P., Chen, X.: MonoJSG: joint semantic and geometric cost volume for monocular 3D object detection. In: CVPR, pp. 1060\u20131069 (2022)","DOI":"10.1109\/CVPR52688.2022.00114"},{"key":"9_CR27","doi-asserted-by":"crossref","unstructured":"Lian, Q., Ye, B., Xu, R., Yao, W., Zhang, T.: Exploring geometric consistency for monocular 3D object detection. In: CVPR, pp. 1675\u20131684 (2022)","DOI":"10.1109\/CVPR52688.2022.00173"},{"key":"9_CR28","doi-asserted-by":"crossref","unstructured":"Liu, X., Xue, N., Wu, T.: Learning auxiliary monocular contexts helps monocular 3D object detection. In: AAAI, pp. 1810\u20131818 (2022)","DOI":"10.1609\/aaai.v36i2.20074"},{"key":"9_CR29","doi-asserted-by":"crossref","unstructured":"Liu, X., Zheng, C., Cheng, K., Xue, N., Qi, G., Wu, T.: Monocular 3D object detection with bounding box denoising in 3D by perceiver. In: ICCV, pp. 6413\u20136423 (2023)","DOI":"10.1109\/ICCV51070.2023.00592"},{"key":"9_CR30","doi-asserted-by":"crossref","unstructured":"Lu, Y., et al.: Geometry uncertainty projection network for monocular 3D object detection. In: ICCV, pp. 3091\u20133101 (2021)","DOI":"10.1109\/ICCV48922.2021.00310"},{"key":"9_CR31","doi-asserted-by":"crossref","unstructured":"Ma, X., et al.: Delving into localization errors for monocular 3D object detection. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00469"},{"key":"9_CR32","doi-asserted-by":"publisher","first-page":"7326","DOI":"10.1109\/LRA.2024.3414272","volume":"9","author":"J Qiao","year":"2024","unstructured":"Qiao, J., et al.: MonoSample: synthetic 3D data augmentation method in monocular 3D object detection. IEEE Rob. Autom. Lett. 9, 7326\u20137332 (2024)","journal-title":"IEEE Rob. Autom. Lett."},{"key":"9_CR33","doi-asserted-by":"crossref","unstructured":"Reading, C., Harakeh, A., Chae, J., Waslander, S.L.: Categorical depth distribution network for monocular 3D object detection. In: CVPR, pp. 8555\u20138564 (2021)","DOI":"10.1109\/CVPR46437.2021.00845"},{"key":"9_CR34","doi-asserted-by":"crossref","unstructured":"Shi, H., et al.: CoBEV: elevating roadside 3D object detection with depth and height complementarity. arXiv preprint arXiv:2310.02815 (2023)","DOI":"10.1109\/TIP.2024.3463409"},{"key":"9_CR35","doi-asserted-by":"crossref","unstructured":"Shi, X., Chen, Z., Kim, T.K.: Multivariate probabilistic monocular 3D object detection. In: WACV, pp. 4270\u20134279 (2023)","DOI":"10.1109\/WACV56688.2023.00426"},{"key":"9_CR36","doi-asserted-by":"crossref","unstructured":"Shi, X., Ye, Q., Chen, X., Chen, C., Chen, Z., Kim, T.: Geometry-based distance decomposition for monocular 3D object detection. In: ICCV, pp. 15152\u201315161 (2021)","DOI":"10.1109\/ICCV48922.2021.01489"},{"key":"9_CR37","doi-asserted-by":"crossref","unstructured":"Simonelli, A., Bul\u00f2, S.R., Porzi, L., Lopez-Antequera, M., Kontschieder, P.: Disentangling monocular 3D object detection. In: ICCV, pp. 1991\u20131999 (2019)","DOI":"10.1109\/ICCV.2019.00208"},{"key":"9_CR38","doi-asserted-by":"crossref","unstructured":"Sun, P., et al.: Scalability in perception for autonomous driving: waymo open dataset. In: CVPR, pp. 2443\u20132451 (2020)","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"9_CR39","doi-asserted-by":"crossref","unstructured":"Sun, Y., Cao, B., Zhu, P., Hu, Q.: Drone-based RGB-infrared cross-modality vehicle detection via uncertainty-aware learning. In: IEEE TCSVT, pp. 6700\u20136713 (2020)","DOI":"10.1109\/TCSVT.2022.3168279"},{"key":"9_CR40","unstructured":"Tong, W., et al.: 3D data augmentation for driving scenes on camera. arXiv preprint arXiv:2303.10340 (2023)"},{"key":"9_CR41","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: TartanAir: a dataset to push the limits of visual SLAM. In: IROS, pp. 4909\u20134916 (2020)","DOI":"10.1109\/IROS45743.2020.9341801"},{"key":"9_CR42","doi-asserted-by":"crossref","unstructured":"Yang, L., et al.: MonoGAE: roadside monocular 3D object detection with ground-aware embeddings. arXiv preprint arXiv:2310.00400 (2023)","DOI":"10.1109\/TITS.2024.3412759"},{"key":"9_CR43","doi-asserted-by":"crossref","unstructured":"Yang, L., et al.: BEVHeight: a robust framework for vision-based roadside 3D object detection. In: CVPR, pp. 21611\u201321620 (2023)","DOI":"10.1109\/CVPR52729.2023.02070"},{"issue":"11","key":"9_CR44","first-page":"6832","volume":"33","author":"L Yang","year":"2023","unstructured":"Yang, L., et al.: Mix-Teaching: a simple, unified and effective semi-supervised learning framework for monocular 3D object detection. IEEE TCSVT 33(11), 6832\u20136844 (2023)","journal-title":"IEEE TCSVT"},{"key":"9_CR45","doi-asserted-by":"crossref","unstructured":"Ye, X., et al.: Rope3D: the roadside perception dataset for autonomous driving and monocular 3D object detection task. In: CVPR, pp. 21309\u201321318 (2022)","DOI":"10.1109\/CVPR52688.2022.02065"},{"key":"9_CR46","doi-asserted-by":"crossref","unstructured":"Yu, F., Wang, D., Shelhamer, E., Darrell, T.: Deep layer aggregation. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00255"},{"key":"9_CR47","doi-asserted-by":"crossref","unstructured":"Yu, H., et al.: DAIR-V2X: a large-scale dataset for vehicle-infrastructure cooperative 3D object detection. In: CVPR, pp. 21329\u201321338 (2022)","DOI":"10.1109\/CVPR52688.2022.02067"},{"key":"9_CR48","unstructured":"Zhang, H., Ciss\u00e9, M., Dauphin, Y.N., Lopez-Paz, D.: mixup: beyond empirical risk minimization. In: ICLR (2018)"},{"key":"9_CR49","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Lu, J., Zhou, J.: Objects are different: flexible monocular 3D object detection. In: CVPR, pp. 3289\u20133298 (2021)","DOI":"10.1109\/CVPR46437.2021.00330"},{"key":"9_CR50","doi-asserted-by":"crossref","unstructured":"Zhou, X., Koltun, V., Kr\u00e4henb\u00fchl, P.: Tracking objects as points. In: ECCV, pp. 474\u2013490 (2020)","DOI":"10.1007\/978-3-030-58548-8_28"},{"key":"9_CR51","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Barnes, C., Lu, J., Yang, J., Li, H.: On the continuity of rotation representations in neural networks. In: CVPR, pp. 5745\u20135753 (2019)","DOI":"10.1109\/CVPR.2019.00589"},{"issue":"11","key":"9_CR52","doi-asserted-by":"publisher","first-page":"7380","DOI":"10.1109\/TPAMI.2021.3119563","volume":"44","author":"P Zhu","year":"2022","unstructured":"Zhu, P., et al.: Detection and tracking meet drones challenge. IEEE TPAMI 44(11), 7380\u20137399 (2022)","journal-title":"IEEE TPAMI"},{"key":"9_CR53","doi-asserted-by":"crossref","unstructured":"Zimmer, W., Cre\u00df, C., Nguyen, H.T., Knoll, A.: TUMTraf intersection dataset: all you need for urban 3D camera-lidar roadside perception. In: ITSC, pp. 1030\u20131037 (2023)","DOI":"10.1109\/ITSC57777.2023.10422289"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-85187-2_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T12:03:37Z","timestamp":1760702617000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-85187-2_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031851865","9783031851872"],"references-count":53,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-85187-2_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"24 April 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}