{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T18:50:34Z","timestamp":1771613434088,"version":"3.50.1"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T00:00:00Z","timestamp":1752969600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T00:00:00Z","timestamp":1752969600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Life Robotics"],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1007\/s10015-025-01048-8","type":"journal-article","created":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T04:04:32Z","timestamp":1752984272000},"page":"555-566","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["MonoDGAE: depth-guided attention and bilateral filtering for robust monocular 3D object detection"],"prefix":"10.1007","volume":"30","author":[{"given":"George Albert","family":"Bitwire","sequence":"first","affiliation":[]},{"given":"Samuel","family":"Kakuba","sequence":"additional","affiliation":[]},{"given":"Dae Woong","family":"Cha","sequence":"additional","affiliation":[]},{"given":"Dong Seog","family":"Han","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"key":"1048_CR1","unstructured":"Zhou X, Wang D, Kr\u00e4henb\u00fchl P (2019) Objects as points. In: Advances in neural information processing systems (NeurIPS), vol\u00a032. Curran Associates, Inc, pp 7070\u20137081"},{"key":"1048_CR2","doi-asserted-by":"crossref","unstructured":"Brazil G, Liu X (2019) M3D-RPN: monocular 3D region proposal network for object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 9287\u20139296","DOI":"10.1109\/ICCV.2019.00938"},{"key":"1048_CR3","doi-asserted-by":"crossref","unstructured":"Ku J, Pon AD, Waslander SL (2019) Monocular 3D object detection leveraging accurate proposals and shape reconstruction. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11867\u201311876","DOI":"10.1109\/CVPR.2019.01214"},{"key":"1048_CR4","doi-asserted-by":"crossref","unstructured":"Huang, K-C, Wu T-H, Su H-T, Hsu WH (2022) MonoDTR: monocular 3D object detection with depth-aware transformer. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 4012\u20134021","DOI":"10.1109\/CVPR52688.2022.00398"},{"key":"1048_CR5","unstructured":"Liao P, Yang F, Wu D, Zhao W, Yu J (2024) MonoDETRNext: next-generation accurate and efficient monocular 3D object detector. arXiv preprint arXiv:2405.15176"},{"key":"1048_CR6","doi-asserted-by":"crossref","unstructured":"Ranasinghe Y, Hegde D, Patel VM (2024) MonoDiff: monocular 3D object detection and pose estimation with diffusion models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10659\u201310670","DOI":"10.1109\/CVPR52733.2024.01014"},{"key":"1048_CR7","doi-asserted-by":"crossref","unstructured":"Ding M, Huo Y, Yi H, Wang Z, Shi J, Lu Z, Luo P (2020) Learning depth-guided convolutions for monocular 3D object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops, pp 1000\u20131001","DOI":"10.1109\/CVPRW50498.2020.00508"},{"key":"1048_CR8","doi-asserted-by":"crossref","unstructured":"Yan L, Yan P, Xiong S, Xiang X, Tan Y (2024) MonoCD: monocular 3D object detection with complementary depths. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10248\u201310257","DOI":"10.1109\/CVPR52733.2024.00976"},{"key":"1048_CR9","doi-asserted-by":"crossref","unstructured":"Brazil G, Pons-Moll G, Liu X, Schiele B (2020) Kinematic 3D object detection in monocular video. In: Computer vision\u2014ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXIII 16. Springer, Berlin, pp 135\u2013152","DOI":"10.1007\/978-3-030-58592-1_9"},{"key":"1048_CR10","doi-asserted-by":"crossref","unstructured":"Chen Y, Tai L, Sun K, Li M (2020) MonoPair: monocular 3D object detection using pairwise spatial relationships. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12093\u201312102","DOI":"10.1109\/CVPR42600.2020.01211"},{"key":"1048_CR11","doi-asserted-by":"crossref","unstructured":"Jiang X, Jin S, Zhang X, Shao L, Lu S (2024) MonoMAE: enhancing monocular 3D detection through depth-aware masked autoencoders. arXiv preprint arXiv:2405.07696","DOI":"10.52202\/079017-0362"},{"issue":"3","key":"1048_CR12","doi-asserted-by":"publisher","first-page":"1623","DOI":"10.1109\/TPAMI.2020.3019967","volume":"44","author":"R Ranftl","year":"2020","unstructured":"Ranftl R, Lasinger K, Hafner D, Schindler K, Koltun V (2020) Towards robust monocular depth estimation: mixing datasets for zero-shot cross-dataset transfer. IEEE Trans Pattern Anal Mach Intell 44(3):1623\u20131637","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1048_CR13","doi-asserted-by":"crossref","unstructured":"Ranftl R, Bochkovskiy A, Koltun V (2021) Vision transformers for dense prediction. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 12179\u201312188","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"1048_CR14","unstructured":"Chen L-C, Papandreou G, Schroff F, Adam H (2017) Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587"},{"key":"1048_CR15","doi-asserted-by":"crossref","unstructured":"Woo S, Park J, Lee J-Y, Kweon IS (2018) CBAM: convolutional block attention module. In: Proceedings of the European conference on computer vision (ECCV), pp 3\u201319","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"1048_CR16","doi-asserted-by":"crossref","unstructured":"Geiger A, Lenz P, Urtasun R (2012) Are we ready for autonomous driving? The KITTI vision benchmark suite. In: Conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"1048_CR17","doi-asserted-by":"crossref","unstructured":"Vu KD, Tran TT, Nguyen DD (2024) MonoDSSMs: efficient monocular 3D object detection with depth-aware state space models. In: Proceedings of the Asian conference on computer vision, pp 3883\u20133900","DOI":"10.1007\/978-981-96-0972-7_6"},{"key":"1048_CR18","unstructured":"Bitwire GA, Han DS (2024) YOLOv11: revolutionizing object detection with focus on tiny objects in complex settings. In: Proceedings of the symposium of the Korean Institute of Communications and Information Sciences. KICS, Gyeongbuk"},{"key":"1048_CR19","doi-asserted-by":"crossref","unstructured":"Mousavian A, Anguelov D, Flynn J, Kosecka J (2017) 3D bounding box estimation using deep learning and geometry. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7074\u20137082","DOI":"10.1109\/CVPR.2017.597"},{"key":"1048_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.icte.2025.04.012","author":"S-G Cheon","year":"2025","unstructured":"Cheon S-G, Shin H-J, Bae S-H (2025) Region-aware knowledge distillation between monocular camera-based 3D object detectors. ICT Express. https:\/\/doi.org\/10.1016\/j.icte.2025.04.012","journal-title":"ICT Express"},{"key":"1048_CR21","doi-asserted-by":"publisher","first-page":"569","DOI":"10.1016\/j.icte.2025.02.006","volume":"11","author":"S Siddiqui","year":"2025","unstructured":"Siddiqui S, Kim J, Lee J (2025) A comparative study of phantom sponge for monocular 3D object detection on edge devices. ICT Express 11:569\u2013575","journal-title":"ICT Express"},{"issue":"1","key":"1048_CR22","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1016\/j.icte.2023.07.009","volume":"10","author":"D Lim","year":"2024","unstructured":"Lim D, Kim J, Kim H (2024) Efficient robot tracking system using single-image-based object detection and position estimation. ICT Express 10(1):125\u2013131","journal-title":"ICT Express"},{"issue":"1","key":"1048_CR23","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1016\/j.icte.2024.11.003","volume":"11","author":"S Kakuba","year":"2025","unstructured":"Kakuba S, Han DS (2025) Addressing data scarcity in speech emotion recognition: a comprehensive review. ICT Express 11(1):110\u2013123","journal-title":"ICT Express"},{"key":"1048_CR24","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN., Kaiser \u0141, Polosukhin I (2017) Attention is all you need. In: Proceedings of the 31st international conference on neural information processing systems (NeurIPS). Curran Associates, Inc, pp 5998\u20136008"},{"key":"1048_CR25","unstructured":"Ito T, Cocchi L, Klinger T, Ram P, Campbell M, Hearne L (2025) Learning positional encodings in transformers depends on initialization"},{"key":"1048_CR26","doi-asserted-by":"crossref","unstructured":"Luo S, Dai H, Shao L, Ding Y (2021) M3DSSD: monocular 3D single stage object detector. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6145\u20136154","DOI":"10.1109\/CVPR46437.2021.00608"},{"key":"1048_CR27","doi-asserted-by":"crossref","unstructured":"Fu J, Liu J, Tian H, Li Y, Bao Y, Fang Z, Lu H (2019) Dual attention network for scene segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3146\u20133154","DOI":"10.1109\/CVPR.2019.00326"},{"key":"1048_CR28","doi-asserted-by":"crossref","unstructured":"Wang X, Girshick R, Gupta A, He K (2018) Non-local neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7794\u20137803","DOI":"10.1109\/CVPR.2018.00813"},{"key":"1048_CR29","doi-asserted-by":"crossref","unstructured":"Yu F, Wang D, Shelhamer E, Darrell T (2018) Deep layer aggregation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2403\u20132412","DOI":"10.1109\/CVPR.2018.00255"},{"issue":"1","key":"1048_CR30","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1016\/j.icte.2021.02.001","volume":"7","author":"Y-S Yoon","year":"2021","unstructured":"Yoon Y-S, Hwang S, Lee D, Lee S, Suh J-W, Jung S-U (2021) 3D mesh transformation preprocessing system in the real space for augmented reality services. ICT Express 7(1):71\u201375","journal-title":"ICT Express"},{"key":"1048_CR31","doi-asserted-by":"crossref","unstructured":"Wang F, Jiang M, Qian C, Yang S, Li C, Zhang H, Wang X, Tang X (2017) Residual attention network for image classification. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3156\u20133164","DOI":"10.1109\/CVPR.2017.683"},{"key":"1048_CR32","unstructured":"Cordonnier J-B, Loukas A, Jaggi M (2019) On the relationship between self-attention and convolutional layers"},{"key":"1048_CR33","doi-asserted-by":"crossref","unstructured":"Ma X, Wang Z, Li H, Zhang P, Ouyang W, Fan X (2019) Accurate monocular 3D object detection via color-embedded 3D reconstruction for autonomous driving. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 6851\u20136860","DOI":"10.1109\/ICCV.2019.00695"},{"key":"1048_CR34","doi-asserted-by":"crossref","unstructured":"Simonelli A, Bulo SR, Porzi L, Ricci E, Kontschieder P (2020) Towards generalization across depth for monocular 3d object detection. In: Computer vision\u2013ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXII 16. Springer, Berlin, pp 767\u2013782","DOI":"10.1007\/978-3-030-58542-6_46"},{"key":"1048_CR35","doi-asserted-by":"crossref","unstructured":"Ma X, Liu S, Xia Z, Zhang H, Zeng X, Ouyang W (2020) Rethinking pseudo-lidar representation. In: Computer vision\u2013ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XIII 16. Springer, Berlin, pp 311\u2013327","DOI":"10.1007\/978-3-030-58601-0_19"},{"key":"1048_CR36","doi-asserted-by":"crossref","unstructured":"Zhou Y, He Y, Zhu H, Wang C, Li H, Jiang Q (2021) Monocular 3D object detection: an extrinsic parameter free approach. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7556\u20137566","DOI":"10.1109\/CVPR46437.2021.00747"},{"key":"1048_CR37","doi-asserted-by":"crossref","unstructured":"Zhang Y, Lu J, Zhou J (2021) Objects are different: flexible monocular 3D object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3289\u20133298","DOI":"10.1109\/CVPR46437.2021.00330"},{"key":"1048_CR38","doi-asserted-by":"crossref","unstructured":"Kumar A, Brazil G, Liu X (2021) GrooMeD-NMS: grouped mathematically differentiable NMS for monocular 3D object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8973\u20138983","DOI":"10.1109\/CVPR46437.2021.00886"},{"key":"1048_CR39","doi-asserted-by":"crossref","unstructured":"Shi X, Ye Q, Chen X, Chen C, Chen Z, Kim T-K (2021) Geometry-based distance decomposition for monocular 3D object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 15172\u201315181","DOI":"10.1109\/ICCV48922.2021.01489"},{"key":"1048_CR40","doi-asserted-by":"crossref","unstructured":"Reading C, Harakeh A, Chae J, Waslander SL (2021) Categorical depth distribution network for monocular 3D object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8555\u20138564","DOI":"10.1109\/CVPR46437.2021.00845"},{"key":"1048_CR41","doi-asserted-by":"crossref","unstructured":"Zou Z, Ye X, Du L, Cheng X, Tan X, Zhang L, Feng J, Xue X, Ding E (2021) The devil is in the task: exploiting reciprocal appearance-localization features for monocular 3D object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2713\u20132722","DOI":"10.1109\/ICCV48922.2021.00271"},{"key":"1048_CR42","doi-asserted-by":"crossref","unstructured":"Lu Y, Ma X, Yang L, Zhang T, Liu Y, Chu Q, Yan J, Ouyang W (2021) Geometry uncertainty projection network for monocular 3D object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 3111\u20133121","DOI":"10.1109\/ICCV48922.2021.00310"},{"key":"1048_CR43","doi-asserted-by":"crossref","unstructured":"Chen H, Huang Y, Tian W, Gao Z, Xiong L (2021) MonoRUn: monocular 3D object detection by reconstruction and uncertainty propagation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10379\u201310388","DOI":"10.1109\/CVPR46437.2021.01024"},{"key":"1048_CR44","doi-asserted-by":"crossref","unstructured":"Wang L, Du L, Ye X, Fu Y, Guo G, Xue X, Feng J, Zhang L (2021) Depth-conditioned dynamic message propagation for monocular 3D object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 454\u2013463","DOI":"10.1109\/CVPR46437.2021.00052"},{"key":"1048_CR45","doi-asserted-by":"crossref","unstructured":"Zhang R, Qiu H, Wang T, Guo Z, Cui Z, Qiao Y, Li H, Gao P (2023) MonoDETR: depth-guided transformer for monocular 3D object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 9155\u20139166","DOI":"10.1109\/ICCV51070.2023.00840"},{"key":"1048_CR46","doi-asserted-by":"crossref","unstructured":"Chen X, Kundu K, Zhang Z, Ma H, Fidler S, Urtasun R (2016) Monocular 3D object detection for autonomous driving. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2147\u20132156","DOI":"10.1109\/CVPR.2016.236"},{"key":"1048_CR47","unstructured":"Loshchilov I, Hutter F (2017) Decoupled weight decay regularization"}],"container-title":["Artificial Life and Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10015-025-01048-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10015-025-01048-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10015-025-01048-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,16]],"date-time":"2025-11-16T20:48:08Z","timestamp":1763326088000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10015-025-01048-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,20]]},"references-count":47,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,11]]}},"alternative-id":["1048"],"URL":"https:\/\/doi.org\/10.1007\/s10015-025-01048-8","relation":{},"ISSN":["1433-5298","1614-7456"],"issn-type":[{"value":"1433-5298","type":"print"},{"value":"1614-7456","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7,20]]},"assertion":[{"value":"30 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 June 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 July 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}