{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:23:02Z","timestamp":1778080982069,"version":"3.51.4"},"reference-count":87,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,5,12]],"date-time":"2025-05-12T00:00:00Z","timestamp":1747008000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,12]],"date-time":"2025-05-12T00:00:00Z","timestamp":1747008000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62271143"],"award-info":[{"award-number":["62271143"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Frontier Technologies R\\&D Program of Jiangsu","award":["BF2024060"],"award-info":[{"award-number":["BF2024060"]}]},{"name":"Big Data Computing Center of Southeast University"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s00530-025-01819-0","type":"journal-article","created":{"date-parts":[[2025,5,12]],"date-time":"2025-05-12T09:33:10Z","timestamp":1747042390000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Depth-free view synthesis from diffusion models for monocular 3D detector in autonomous driving"],"prefix":"10.1007","volume":"31","author":[{"given":"Yuguang","family":"Shi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sifan","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaobo","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"1819_CR1","doi-asserted-by":"publisher","first-page":"120253","DOI":"10.1016\/j.eswa.2023.120253","volume":"227","author":"M Rezaei","year":"2023","unstructured":"Rezaei, M., Azarmi, M., Mir, F.M.P.: 3d-net: monocular 3d object recognition for traffic monitoring. Expert Syst. Appl. 227, 120253 (2023)","journal-title":"Expert Syst. Appl."},{"key":"1819_CR2","doi-asserted-by":"publisher","first-page":"124945","DOI":"10.1016\/j.eswa.2024.124945","volume":"256","author":"Y Tang","year":"2024","unstructured":"Tang, Y., He, H., Wang, Y., Wu, J.: Towards efficient multi-modal 3d object detection: homogeneous sparse fuse network. Expert Syst. Appl. 256, 124945 (2024)","journal-title":"Expert Syst. Appl."},{"key":"1819_CR3","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1016\/j.neucom.2022.04.075","volume":"494","author":"W Chen","year":"2022","unstructured":"Chen, W., Li, P., Zhao, H.: Msl3d: 3d object detection from monocular, stereo and point cloud for autonomous driving. Neurocomputing 494, 23\u201332 (2022)","journal-title":"Neurocomputing"},{"key":"1819_CR4","unstructured":"You, Y., Wang, Y., Chao, W.-L., Garg, D., Pleiss, G., Hariharan, B., Campbell, M., Weinberger, K.Q.: Pseudo-lidar++: Accurate Depth for 3d Object Detection in Autonomous Driving. arXiv preprint arXiv:1906.06310 (2019)"},{"key":"1819_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Y.-N., Dai, H., Ding, Y.: Pseudo-stereo for monocular 3d object detection in autonomous driving. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 887\u2013897 (2022)","DOI":"10.1109\/CVPR52688.2022.00096"},{"key":"1819_CR6","doi-asserted-by":"crossref","unstructured":"Guo, X., Shi, S., Wang, X., Li, H.: Liga-stereo: learning lidar geometry aware representations for stereo-based 3d detector. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3153\u20133163 (2021)","DOI":"10.1109\/ICCV48922.2021.00314"},{"key":"1819_CR7","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The KITTI vision benchmark suite. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 3354\u20133361. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"1819_CR8","doi-asserted-by":"publisher","first-page":"128081","DOI":"10.1016\/j.neucom.2024.128081","volume":"599","author":"L Jiang","year":"2024","unstructured":"Jiang, L., Schaefer, G., Meng, Q.: Multi-scale feature fusion for single image novel view synthesis. Neurocomputing 599, 128081 (2024)","journal-title":"Neurocomputing"},{"key":"1819_CR9","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1819_CR10","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"1819_CR11","doi-asserted-by":"crossref","unstructured":"Li, P., Chen, X., Shen, S.: Stereo r-cnn based 3d object detection for autonomous driving. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7644\u20137652 (2019)","DOI":"10.1109\/CVPR.2019.00783"},{"key":"1819_CR12","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1016\/j.neucom.2021.11.048","volume":"471","author":"Y Shi","year":"2022","unstructured":"Shi, Y., Guo, Y., Mi, Z., Li, X.: Stereo CenterNet-based 3d object detection for autonomous driving. Neurocomputing 471, 219\u2013229 (2022)","journal-title":"Neurocomputing"},{"key":"1819_CR13","doi-asserted-by":"crossref","unstructured":"Mousavian, A., Anguelov, D., Flynn, J., Kosecka, J.: 3d bounding box estimation using deep learning and geometry. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7074\u20137082 (2017)","DOI":"10.1109\/CVPR.2017.597"},{"key":"1819_CR14","unstructured":"Zhang, Y., Ma, X., Yi, S., Hou, J., Wang, Z., Ouyang, W., Xu, D.: Learning Geometry-Guided Depth Via Projective Modeling for Monocular 3d Object Detection. arXiv preprint arXiv:2107.13931 (2021)"},{"key":"1819_CR15","doi-asserted-by":"crossref","unstructured":"Simonelli, A., Bulo, S.R., Porzi, L., Ricci, E., Kontschieder, P.: Towards generalization across depth for monocular 3d object detection. In: European Conference on Computer Vision, pp. 767\u2013782. Springer (2020)","DOI":"10.1007\/978-3-030-58542-6_46"},{"key":"1819_CR16","doi-asserted-by":"crossref","unstructured":"Lu, Y., Ma, X., Yang, L., Zhang, T., Liu, Y., Chu, Q., Yan, J., Ouyang, W.: Geometry uncertainty projection network for monocular 3d object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3111\u20133121 (2021)","DOI":"10.1109\/ICCV48922.2021.00310"},{"key":"1819_CR17","doi-asserted-by":"crossref","unstructured":"Brazil, G., Liu, X.: M3d-rpn: monocular 3d region proposal network for object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9287\u20139296 (2019)","DOI":"10.1109\/ICCV.2019.00938"},{"key":"1819_CR18","doi-asserted-by":"crossref","unstructured":"Brazil, G., Pons-Moll, G., Liu, X., Schiele, B.: Kinematic 3d object detection in monocular video. In: European Conference on Computer Vision, pp. 135\u2013152. Springer (2020)","DOI":"10.1007\/978-3-030-58592-1_9"},{"key":"1819_CR19","doi-asserted-by":"crossref","unstructured":"Kumar, A., Brazil, G., Liu, X.: Groomed-nms: grouped mathematically differentiable nms for monocular 3d object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8973\u20138983 (2021)","DOI":"10.1109\/CVPR46437.2021.00886"},{"issue":"2","key":"1819_CR20","doi-asserted-by":"publisher","first-page":"919","DOI":"10.1109\/LRA.2021.3052442","volume":"6","author":"Y Liu","year":"2021","unstructured":"Liu, Y., Yixuan, Y., Liu, M.: Ground-aware monocular 3d object detection for autonomous driving. IEEE Robot Autom. Lett. 6(2), 919\u2013926 (2021)","journal-title":"IEEE Robot Autom. Lett."},{"key":"1819_CR21","unstructured":"Zhou, X., Wang, D., Kr\u00e4henb\u00fchl, P.: Objects as Points. arXiv preprint arXiv:1904.07850 (2019)"},{"key":"1819_CR22","doi-asserted-by":"crossref","unstructured":"Li, P., Zhao, H., Liu, P., Cao, F.: Rtm3d: Real-Time Monocular 3d Detection From Object Keypoints for Autonomous Driving. arXiv preprint arXiv:2001.03343 (2020)","DOI":"10.1007\/978-3-030-58580-8_38"},{"key":"1819_CR23","doi-asserted-by":"crossref","unstructured":"Liu, Z., Wu, Z., T\u00f3th, R.: Smoke: single-stage monocular 3d object detection via keypoint estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, pp. 996\u2013997 (2020)","DOI":"10.1109\/CVPRW50498.2020.00506"},{"key":"1819_CR24","doi-asserted-by":"crossref","unstructured":"Ma, X., Zhang, Y., Xu, D., Zhou, D., Yi, S., Li, H., Ouyang, W.: Delving into localization errors for monocular 3d object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4721\u20134730 (2021)","DOI":"10.1109\/CVPR46437.2021.00469"},{"key":"1819_CR25","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1016\/j.neucom.2021.01.110","volume":"441","author":"P Li","year":"2021","unstructured":"Li, P., Zhao, H.: Monocular 3d object detection using dual quadric for autonomous driving. Neurocomputing 441, 151\u2013160 (2021)","journal-title":"Neurocomputing"},{"key":"1819_CR26","doi-asserted-by":"crossref","unstructured":"Chen, Y., Tai, L., Sun, K., Li, M.: Monopair: monocular 3d object detection using pairwise spatial relationships. in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12093\u201312102 (2020)","DOI":"10.1109\/CVPR42600.2020.01211"},{"key":"1819_CR27","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Lu, J., Zhou, J.: Objects are different: flexible monocular 3d object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3289\u20133298 (2021)","DOI":"10.1109\/CVPR46437.2021.00330"},{"key":"1819_CR28","unstructured":"Yang, L., Zhang, X., Wang, L., Zhu, M., Li, J.: Lite-fpn for keypoint-based monocular 3d object detection. arXiv preprint arXiv:2105.00268 (2021)"},{"key":"1819_CR29","doi-asserted-by":"crossref","unstructured":"Li, Z., Qu, Z., Zhou, Y., Liu, J., Wang, H., Jiang, L.: Diversity matters: fully exploiting depth clues for reliable monocular 3d object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2791\u20132800 (2022)","DOI":"10.1109\/CVPR52688.2022.00281"},{"key":"1819_CR30","doi-asserted-by":"crossref","unstructured":"Chen, X., Kundu, K., Zhang, Z., Ma, H., Fidler, S., Urtasun, R.: Monocular 3d object detection for autonomous driving. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2147\u20132156 (2016)","DOI":"10.1109\/CVPR.2016.236"},{"key":"1819_CR31","doi-asserted-by":"crossref","unstructured":"Qin, Z., Wang, J., Lu, Y.: Monogrnet: a geometric reasoning network for monocular 3d object localization. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 8851\u20138858 (2019)","DOI":"10.1609\/aaai.v33i01.33018851"},{"key":"1819_CR32","doi-asserted-by":"crossref","unstructured":"Ding, M., Huo, Y., Yi, H., Wang, Z., Shi, J., Lu, Z., Luo, P.: Learning depth-guided convolutions for monocular 3d object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, pp. 1000\u20131001 (2020)","DOI":"10.1109\/CVPRW50498.2020.00508"},{"key":"1819_CR33","doi-asserted-by":"crossref","unstructured":"Wang, L., Du, L., Ye, X., Fu, Y., Guo, G., Xue, X., Feng, J., Zhang, L.: Depth-conditioned dynamic message propagation for monocular 3d object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 454\u2013463 (2021)","DOI":"10.1109\/CVPR46437.2021.00052"},{"key":"1819_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, R., Qiu, H., Wang, T., Xu, X., Guo, Z., Qiao, Y., Gao, P., Li, H.: Monodetr: Depth-Aware Transformer for Monocular 3d Object Detection. arXiv preprint arXiv:2203.13310 (2022)","DOI":"10.1109\/ICCV51070.2023.00840"},{"key":"1819_CR35","doi-asserted-by":"crossref","unstructured":"Huang, K.-C., Wu, T.-H., Su, H.-T., Hsu, W.H.: Monodtr: monocular 3d object detection with depth-aware transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4012\u20134021 (2022)","DOI":"10.1109\/CVPR52688.2022.00398"},{"key":"1819_CR36","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chao, W.-L., Garg, D., Hariharan, B., Campbell, M., Weinberger, K.Q.: Pseudo-lidar from visual depth estimation: bridging the gap in 3d object detection for autonomous driving. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8445\u20138453 (2019)","DOI":"10.1109\/CVPR.2019.00864"},{"key":"1819_CR37","doi-asserted-by":"crossref","unstructured":"Ma, X., Liu, S., Xia, Z., Zhang, H., Zeng, X., Ouyang, W.: Rethinking pseudo-lidar representation, in: Computer Vision\u2014ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XIII 16, pp. 311\u2013327. Springer (2020)","DOI":"10.1007\/978-3-030-58601-0_19"},{"key":"1819_CR38","doi-asserted-by":"crossref","unstructured":"Reading, C., Harakeh, A., Chae, J., Waslander, S.L.: Categorical depth distribution network for monocular 3d object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8555\u20138564 (2021)","DOI":"10.1109\/CVPR46437.2021.00845"},{"key":"1819_CR39","doi-asserted-by":"crossref","unstructured":"Simonelli, A., Bulo, S.R., Porzi, L., Kontschieder, P., Ricci, E.: Are we missing confidence in pseudo-lidar methods for monocular 3d object detection? In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3225\u20133233 (2021)","DOI":"10.1109\/ICCV48922.2021.00321"},{"key":"1819_CR40","doi-asserted-by":"crossref","unstructured":"Ma, X., Wang, Z., Li, H., Zhang, P., Ouyang, W., Fan, X.: Accurate monocular 3d object detection via color-embedded 3d reconstruction for autonomous driving. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6851\u20136860 (2019)","DOI":"10.1109\/ICCV.2019.00695"},{"key":"1819_CR41","doi-asserted-by":"publisher","first-page":"2753","DOI":"10.1109\/TIP.2019.2952201","volume":"29","author":"W Bao","year":"2019","unstructured":"Bao, W., Xu, B., Chen, Z.: Monofenet: monocular 3d object detection with feature enhancement networks. IEEE Trans. Image Process. 29, 2753\u20132765 (2019)","journal-title":"IEEE Trans. Image Process."},{"key":"1819_CR42","doi-asserted-by":"crossref","unstructured":"Park, D., Ambrus, R., Guizilini, V., Li, J., Gaidon, A.: Is pseudo-lidar needed for monocular 3d object detection? In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3142\u20133152 (2021)","DOI":"10.1109\/ICCV48922.2021.00313"},{"key":"1819_CR43","unstructured":"Vianney, J.M.U., Aich, S., Liu, B.: Refinedmpl: Refined Monocular Pseudolidar for 3d Object Detection in Autonomous Driving. arXiv preprint arXiv:1911.09712 (2019)"},{"key":"1819_CR44","doi-asserted-by":"crossref","unstructured":"Shi, S., Wang, X., Li, H.: Pointrcnn: 3d object proposal generation and detection from point cloud. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 770\u2013779 (2019)","DOI":"10.1109\/CVPR.2019.00086"},{"key":"1819_CR45","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: Pointnet: deep learning on point sets for 3d classification and segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 652\u2013660 (2017)"},{"key":"1819_CR46","doi-asserted-by":"crossref","unstructured":"Cai, Y., Li, B., Jiao, Z., Li, H., Zeng, X., Wang, X.: Monocular 3d object detection with decoupled structured polygon estimation and height-guided depth estimation. In:Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 10478\u201310485 (2020)","DOI":"10.1609\/aaai.v34i07.6618"},{"key":"1819_CR47","doi-asserted-by":"crossref","unstructured":"Xie, J., Girshick, R., Farhadi, A.: Deep3d: Fully automatic 2d-to-3d video conversion with deep convolutional neural networks. In: Computer Vision\u2014ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, Proceedings, Part IV 14, pp. 842\u2013857. Springer (2016)","DOI":"10.1007\/978-3-319-46493-0_51"},{"key":"1819_CR48","doi-asserted-by":"crossref","unstructured":"Garg, R., Bg, V.K., Carneiro, G., Reid, I.: Unsupervised cnn for single view depth estimation: Geometry to the rescue. In: Computer Vision\u2014ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part VIII 14, pp. 740\u2013756. Springer (2016)","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"1819_CR49","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac\u00a0Aodha, O., Brostow, G.J.: Unsupervised monocular depth estimation with left-right consistency. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 270\u2013279 (2017)","DOI":"10.1109\/CVPR.2017.699"},{"key":"1819_CR50","doi-asserted-by":"crossref","unstructured":"Mahjourian, R., Wicke, M., Angelova, A.: Unsupervised learning of depth and ego-motion from monocular video using 3d geometric constraints. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5667\u20135675 (2018)","DOI":"10.1109\/CVPR.2018.00594"},{"key":"1819_CR51","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac\u00a0Aodha, O., Firman, M., Brostow, G.J.: Digging into self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3828\u20133838 (2019)","DOI":"10.1109\/ICCV.2019.00393"},{"key":"1819_CR52","doi-asserted-by":"crossref","unstructured":"Shu, C., Yu, K., Duan, Z., Yang, K.: Feature-metric loss for self-supervised learning of depth and egomotion. In: Computer Vision\u2014ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XIX, pp. 572\u2013588. Springer, Berlin (2020)","DOI":"10.1007\/978-3-030-58529-7_34"},{"key":"1819_CR53","doi-asserted-by":"crossref","unstructured":"Peng, R., Wang, R., Lai, Y., Tang, L., Cai, Y.: Excavating the potential capacity of self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15560\u201315569 (2021)","DOI":"10.1109\/ICCV48922.2021.01527"},{"key":"1819_CR54","doi-asserted-by":"crossref","unstructured":"He, M., Hui, L., Bian, Y., Ren, J., Xie, J., Yang, J.: Ra-depth: resolution adaptive self-supervised monocular depth estimation. In: Computer Vision\u2014ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXVII, pp. 565\u2013581. Springer (2022)","DOI":"10.1007\/978-3-031-19812-0_33"},{"key":"1819_CR55","doi-asserted-by":"crossref","unstructured":"Tulsiani, S., Tucker, R., Snavely, N.: Layer-structured 3d scene inference via view synthesis. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 302\u2013317 (2018)","DOI":"10.1007\/978-3-030-01234-2_19"},{"key":"1819_CR56","doi-asserted-by":"crossref","unstructured":"Zhou, T., Tucker, R., Flynn, J., Fyffe, G., Snavely, N.: Stereo Magnification: Learning View Synthesis Using Multiplane Images. arXiv preprint arXiv:1805.09817 (2018)","DOI":"10.1145\/3197517.3201323"},{"issue":"1","key":"1819_CR57","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: Nerf: representing scenes as neural radiance fields for view synthesis. Commu. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commu. ACM"},{"key":"1819_CR58","doi-asserted-by":"crossref","unstructured":"Tucker, R., Snavely, N.: Single-view view synthesis with multiplane images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 551\u2013560. (2020)","DOI":"10.1109\/CVPR42600.2020.00063"},{"key":"1819_CR59","doi-asserted-by":"crossref","unstructured":"Li, J., Feng, Z., She, Q., Ding, H., Wang, C., Lee, G.H.: Mine: towards continuous depth mpi with nerf for novel view synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12578\u201312588 (2021)","DOI":"10.1109\/ICCV48922.2021.01235"},{"key":"1819_CR60","doi-asserted-by":"crossref","unstructured":"Zhang, C., Lin, C., Liao, K., Nie, L., Zhao, Y.: Sivsformer: Parallax-aware transformers for single-image-based view synthesis. In: 2022 IEEE Conference on Virtual Reality and 3D User Interfaces (VR), pp. 47\u201356. IEEE (2022)","DOI":"10.1109\/VR51125.2022.00022"},{"key":"1819_CR61","doi-asserted-by":"crossref","unstructured":"Saharia, C., Ho, J., Chan, W., Salimans, T., Fleet, D. J., & Norouzi, M.: Image super-resolution via iterative refinement. IEEE transactions on pattern analysis and machine intelligence, 45(4), 4713\u20134726 (2022)","DOI":"10.1109\/TPAMI.2022.3204461"},{"key":"1819_CR62","unstructured":"Sahak, H., Watson, D., Saharia, C., Fleet, D.: Denoising Diffusion Probabilistic Models for Robust Image Super-Resolution in the Wild. arXiv preprint arXiv:2302.07864 (2023)"},{"key":"1819_CR63","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat gans on image synthesis. Adv. Neural Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1819_CR64","unstructured":"Meng, C., He, Y., Song, Y., Song, J., Wu, J., Zhu, J.-Y., Ermon, S.: Sdedit: Guided Image Synthesis and Editing With Stochastic Differential equations. arXiv preprint arXiv:2108.01073 (2021)"},{"key":"1819_CR65","unstructured":"Nichol, A.Q., Dhariwal, P.: Improved denoising diffusion probabilistic models. In: International Conference on Machine Learning, PMLR, pp. 8162\u20138171 (2021)"},{"key":"1819_CR66","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Van\u00a0Hoorick, B., Tokmakov, P., Zakharov, S., Vondrick, C.: Zero-1-to-3: zero-shot one image to 3d object. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9298\u20139309 (2023)","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"1819_CR67","doi-asserted-by":"crossref","unstructured":"Li, B., Xue, K., Liu, B., Lai, Y.-K.: Bbdm: image-to-image translation with brownian bridge diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1952\u20131961 (2023)","DOI":"10.1109\/CVPR52729.2023.00194"},{"key":"1819_CR68","doi-asserted-by":"crossref","unstructured":"Kumari, N., Su, G., Zhang, R., Park, T., Shechtman, E., Zhu, J.-Y.: Customizing Text-to-Image Diffusion With Camera Viewpoint Control, arXiv preprint arXiv:2404.12333 (2024)","DOI":"10.1145\/3680528.3687564"},{"key":"1819_CR69","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"1819_CR70","doi-asserted-by":"crossref","unstructured":"Woo, S., Debnath, S., Hu, R., Chen, X., Liu, Z., Kweon, I.S., Xie, S.: Convnext v2: Co-Designing and Scaling Convnets With Masked Autoencoders. arXiv preprint arXiv:2301.00808 (2023)","DOI":"10.1109\/CVPR52729.2023.01548"},{"key":"1819_CR71","doi-asserted-by":"crossref","unstructured":"Yu, W., Zhou, P., Yan, S., Wang, X.: Inceptionnext: when inception meets convnext. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5672\u20135683 (2024)","DOI":"10.1109\/CVPR52733.2024.00542"},{"key":"1819_CR72","doi-asserted-by":"crossref","unstructured":"Hang, T., Gu, S., Li, C., Bao, J., Chen, D., Hu, H., Geng, X., Guo, B.: Efficient Diffusion Training Via min-snr Weighting Strategy, arXiv preprint arXiv:2303.09556 (2023)","DOI":"10.1109\/ICCV51070.2023.00684"},{"issue":"4","key":"1819_CR73","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"1819_CR74","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising Diffusion Implicit Models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"1819_CR75","unstructured":"Lu, C., Zhou, Y., Bao, F., Chen, J., Li, C., Zhu, J.: Dpm-solver: A Fast ODE Solver for Diffusion Probabilistic Model Sampling in Around 10 Steps. arXiv preprint arXiv:2206.00927 (2022)"},{"key":"1819_CR76","unstructured":"Liu, X., Gong, C., Liu, Q.: Flow Straight and Fast: Learning to Generate and Transfer Data With Rectified Flow. arXiv preprint arXiv:2209.03003 (2022)"},{"key":"1819_CR77","unstructured":"Song, Y., Dhariwal, P., Chen, M., Sutskever, I.: Consistency models (2023)"},{"key":"1819_CR78","unstructured":"Chen, T.: On the Importance of Noise Scheduling for Diffusion Models. arXiv preprint arXiv:2301.10972 (2023)"},{"key":"1819_CR79","doi-asserted-by":"crossref","unstructured":"Qian, R., Garg, D., Wang, Y., You, Y., Belongie, S., Hariharan, B., Campbell, M., Weinberger, K.Q., Chao, W.-L.: End-to-end pseudo-lidar for image-based 3d object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5881\u20135890 (2020)","DOI":"10.1109\/CVPR42600.2020.00592"},{"key":"1819_CR80","doi-asserted-by":"crossref","unstructured":"Liu, Y., Wang, L., Liu, M.: Yolostereo3d: a step back to 2d for efficient stereo 3d detection. In: 2021 IEEE International Conference on Robotics and Automation (ICRA), pp. 13018\u201313024. IEEE (2021)","DOI":"10.1109\/ICRA48506.2021.9561423"},{"key":"1819_CR81","doi-asserted-by":"crossref","unstructured":"Chen, H., Huang, Y., Tian, W., Gao, Z., Xiong, L.: Monorun: Monocular 3d object detection by reconstruction and uncertainty propagation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10379\u201310388 (2021)","DOI":"10.1109\/CVPR46437.2021.01024"},{"key":"1819_CR82","doi-asserted-by":"crossref","unstructured":"Wu, Z., Wu, Y., Pu, J., Li, X., Wang, X.: Attention-based depth distillation with 3d-aware positional encoding for monocular 3d object detection. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 37, pp. 2892\u20132900 (2023)","DOI":"10.1609\/aaai.v37i3.25391"},{"key":"1819_CR83","doi-asserted-by":"crossref","unstructured":"Yan, L., Yan, P., Xiong, S., Xiang, X., Tan, Y.: Monocd: Monocular 3d object detection with complementary depths. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10248\u201310257 (2024)","DOI":"10.1109\/CVPR52733.2024.00976"},{"issue":"5","key":"1819_CR84","doi-asserted-by":"publisher","first-page":"1259","DOI":"10.1109\/TPAMI.2017.2706685","volume":"40","author":"X Chen","year":"2017","unstructured":"Chen, X., Kundu, K., Zhu, Y., Ma, H., Fidler, S., Urtasun, R.: 3d object proposals using stereo imagery for accurate object class detection. IEEE Trans. Pattern Anal. Mach. Intell. 40(5), 1259\u20131272 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1819_CR85","doi-asserted-by":"crossref","unstructured":"Barabanau, I., Artemov, A., Burnaev, E., Murashkin, V.: Monocular 3d Object Detection Via Geometric Reasoning on Keypoints. arXiv preprint arXiv:1905.05618 (2019)","DOI":"10.5220\/0009102506520659"},{"key":"1819_CR86","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"1819_CR87","unstructured":"Simonyan, K., Zisserman, A.: Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv preprint arXiv:1409.1556 (2014)"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01819-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-01819-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01819-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T09:04:19Z","timestamp":1757927059000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-01819-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,12]]},"references-count":87,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["1819"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-01819-0","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,12]]},"assertion":[{"value":"8 November 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 April 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"246"}}