{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T06:19:12Z","timestamp":1778048352606,"version":"3.51.4"},"reference-count":73,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T00:00:00Z","timestamp":1772755200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T00:00:00Z","timestamp":1772755200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,3,6]]},"DOI":"10.1109\/wacv61042.2026.00026","type":"proceedings-article","created":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T19:59:32Z","timestamp":1778011172000},"page":"181-191","source":"Crossref","is-referenced-by-count":0,"title":["IDEAL-M3D: Instance Diversity-Enriched Active Learning for Monocular 3D Detection"],"prefix":"10.1109","author":[{"given":"Johannes","family":"Meier","sequence":"first","affiliation":[{"name":"DeepScenario"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Florian","family":"G\u00fcnther","sequence":"additional","affiliation":[{"name":"TU Munich"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Riccardo","family":"Marin","sequence":"additional","affiliation":[{"name":"TU Munich"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Oussema","family":"Dhaouadi","sequence":"additional","affiliation":[{"name":"DeepScenario"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jacques","family":"Kaiser","sequence":"additional","affiliation":[{"name":"DeepScenario"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniel","family":"Cremers","sequence":"additional","affiliation":[{"name":"TU Munich"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58517-4_9"},{"key":"ref2","first-page":"1027","article-title":"k-means++: the advantages of careful seeding","volume-title":"Proceedings of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms, SODA 2007, New Orleans, Louisiana, USA, January 7-9, 2007","author":"Arthur"},{"key":"ref3","article-title":"Deep batch active learning by diverse, uncertain gradient lower bounds","volume-title":"ICLR","author":"Ash"},{"key":"ref4","article-title":"Distribution discrepancy and feature heterogeneity for active 3d object detection","author":"Chen","year":"2024","journal-title":"CoRL"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3435937"},{"key":"ref6","article-title":"3d object proposals for accurate object class detection","volume":"28","author":"Chen","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3318070"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IV64158.2025.11097484"},{"key":"ref10","article-title":"Bayesian active learning for semantic segmentation","author":"Didari","year":"2024","journal-title":"CoRR"},{"key":"ref11","first-page":"1183","article-title":"Deep bayesian active learning with image data","volume-title":"ICML","volume":"70","author":"Gal"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"ref13","article-title":"Discriminative active learning","author":"Gissin","year":"2019","journal-title":"CoRR"},{"key":"ref14","article-title":"Hybrid active learning with uncertainty-weighted embeddings","author":"He","year":"2024","journal-title":"Trans. Mach. Learn. Res., 2024"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00234"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/IV51971.2022.9827454"},{"key":"ref17","article-title":"Gate3d: Generalized attention-based task-synergized estimation in 3d*","volume-title":"CVPRW","author":"Im"},{"key":"ref18","volume-title":"Monouni: A unified vehicle and infrastructure-side monocular 3d object detection network with sufficient depth clues","author":"Jia","year":"2023"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.52202\/079017-0362"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1406.3269"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20077-9_39"},{"key":"ref22","article-title":"A survey on deep active learning: Recent advances and new frontiers","author":"Li","year":"2024","journal-title":"CoRR"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/3DV62453.2024.00088"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00281"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02074"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i2.20074"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"ref29","article-title":"Decoupled weight decay regularization","volume-title":"7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019","author":"Loshchilov"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00310"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01676"},{"key":"ref32","article-title":"Exploring active 3d object detection from a generalization perspective","volume-title":"ICLR","author":"Luo"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02276"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00469"},{"key":"ref35","article-title":"STONE: A submodular optimization framework for active 3d object detection","volume-title":"NeurIPS","author":"MAO"},{"key":"ref36","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-031-85187-2_9","article-title":"CARLA Drone: monocular 3d object detection from a different perspective","volume-title":"German Conference on Pattern Recognition, GCPR","author":"Meier"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.597"},{"key":"ref38","article-title":"Dinov2: Learning robust visual features without supervision","volume-title":"TMLR","author":"Oquab"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00979"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00611"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73661-2_5"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00377"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01014"},{"key":"ref44","article-title":"SAM 2: Segment anything in images and videos","author":"Ravi","year":"2024","journal-title":"CoRR"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00845"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref47","article-title":"Active learning for convolutional neural networks: A core-set approach","volume-title":"ICLR","author":"Sener"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/584091.584093"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00945"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00208"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3111621"},{"issue":"86","key":"ref53","first-page":"2579","article-title":"Visualizing data using t-sne","volume":"9","author":"van der Maaten","year":"2008","journal-title":"Journal of Machine Learning Research"},{"key":"ref54","article-title":"Object-aware DINO (oh-a-dino): Enhancing self-supervised representations for multi-object instance retrieval","author":"Wagner","year":"2025","journal-title":"CoRR"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01506"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00627"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00976"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01684"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3270728"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02065"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00255"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02067"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00531"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00529"},{"key":"ref65","article-title":"Multiple-criteria based active learning with fixed-size determinantal point processes","author":"Zhan","year":"2021","journal-title":"CoRR"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/634"},{"key":"ref67","article-title":"A comparative survey of deep active learning","author":"Zhan","year":"2022","journal-title":"CoRR"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01601"},{"key":"ref69","article-title":"Revisiting monocular 3d object detection from scene-level depth retargeting to instance-level spatial refinement","author":"Zhang","year":"2024"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00840"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00330"},{"key":"ref72","article-title":"Objects as points","author":"Zhou","year":"2019","journal-title":"CoRR"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72940-9_19"}],"event":{"name":"2026 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV)","location":"Tucson, AZ, USA","start":{"date-parts":[[2026,3,6]]},"end":{"date-parts":[[2026,3,10]]}},"container-title":["2026 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11491838\/11491925\/11492628.pdf?arnumber=11492628","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T05:58:52Z","timestamp":1778047132000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11492628\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,6]]},"references-count":73,"URL":"https:\/\/doi.org\/10.1109\/wacv61042.2026.00026","relation":{},"subject":[],"published":{"date-parts":[[2026,3,6]]}}}