{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T15:49:41Z","timestamp":1779292181527,"version":"3.51.4"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s11263-025-02362-1","type":"journal-article","created":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T12:13:34Z","timestamp":1738412014000},"page":"3807-3821","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["DiffuVolume: Diffusion Model for Volume based Stereo Matching"],"prefix":"10.1007","volume":"133","author":[{"given":"Dian","family":"Zheng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiao-Ming","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zuhao","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingke","family":"Meng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8327-0003","authenticated-orcid":false,"given":"Wei-Shi","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,2,1]]},"reference":[{"key":"2362_CR1","unstructured":"Baranchuk, D., Rubachev, I., Voynov, A., Khrulkov, V. & Babenko, A. (2021). Label-efficient semantic segmentation with diffusion models. In: International conference on learning representations."},{"key":"2362_CR2","unstructured":"Biswas, J., & Veloso, M. (2011). 2011. RGB-D Workshop at RSS (Vol: Depth camera based localization and navigation for indoor mobile robots."},{"key":"2362_CR3","doi-asserted-by":"crossref","unstructured":"Chang, J. R., & Chen, Y. S. (2018). Pyramid stereo matching network. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 5410\u20135418.","DOI":"10.1109\/CVPR.2018.00567"},{"key":"2362_CR4","doi-asserted-by":"crossref","unstructured":"Chen, C., Seff, A., Kornhauser, A., & Xiao, J. (2015). Deepdriving: Learning affordance for direct perception in autonomous driving. In: Proceedings of the IEEE international conference on computer vision, pp. 2722\u20132730.","DOI":"10.1109\/ICCV.2015.312"},{"key":"2362_CR5","first-page":"22158","volume":"33","author":"X Cheng","year":"2020","unstructured":"Cheng, X., Zhong, Y., Harandi, M., Dai, Y., Chang, X., Li, H., & Ge, Z. (2020). Hierarchical neural architecture search for deep stereo matching. Advances in Neural Information Processing Systems, 33, 22158\u201322169.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2362_CR6","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P. & Urtasun, R. (2012). Are we ready for autonomous driving? the kitti vision benchmark suite. In: 2012 IEEE conference on computer vision and pattern recognition pp. 3354\u20133361.","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"2362_CR7","doi-asserted-by":"crossref","unstructured":"Gu, X., Fan, Z., Zhu, S., Dai, Z., Tan, F., & Tan, P. (2020). Cascade cost volume for high-resolution multi-view stereo and stereo matching. In:  Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 2495\u20132504.","DOI":"10.1109\/CVPR42600.2020.00257"},{"key":"2362_CR8","doi-asserted-by":"crossref","unstructured":"Guo, X., Yang, K., Yang, W., Wang, X., & Li, H. (2019). Group-wise correlation stereo network. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 3273\u20133282.","DOI":"10.1109\/CVPR.2019.00339"},{"issue":"2","key":"2362_CR9","doi-asserted-by":"publisher","first-page":"328","DOI":"10.1109\/TPAMI.2007.1166","volume":"30","author":"H Hirschmuller","year":"2007","unstructured":"Hirschmuller, H. (2007). Stereo processing by semiglobal matching and mutual information. IEEE Transactions on Pattern Analysis and Machine Intelligence, 30(2), 328\u2013341.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2362_CR10","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., & Abbeel, P. (2020). Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, 33, 6840\u20136851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2362_CR11","doi-asserted-by":"crossref","unstructured":"Kendall, A., Martirosyan, H., Dasgupta, S., Henry, P., Kennedy, R., Bachrach, A., & Bry, A. (2017). End-to-end learning of geometry and context for deep stereo regression. In: Proceedings of the IEEE international conference on computer vision, pp. 66\u201375.","DOI":"10.1109\/ICCV.2017.17"},{"key":"2362_CR12","unstructured":"Kingma, D.P., & Ba, J. (2015). Adam: A method for stochastic optimization. In: International conference on learning representations."},{"key":"2362_CR13","doi-asserted-by":"crossref","unstructured":"Klaus, A., Sormann, M. & Karner, K. (2006). Segment-based stereo matching using belief propagation and a self-adapting dissimilarity measure. In: 18th International conference on pattern recognition (ICPR\u201906) 3, 15\u201318.","DOI":"10.1109\/ICPR.2006.1033"},{"key":"2362_CR14","doi-asserted-by":"crossref","unstructured":"Li, J., Wang, P., Xiong, P., Cai, T., Yan, Z., Yang, L. & Liu, S. (2022). Practical stereo matching via cascaded recurrent network with adaptive correlation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition pp. 16263\u201316272.","DOI":"10.1109\/CVPR52688.2022.01578"},{"key":"2362_CR15","doi-asserted-by":"crossref","unstructured":"Lipson, L., Teed, Z. & Deng, J. (2021). Raft-stereo: Multilevel recurrent field transforms for stereo matching. In: International conference on 3D vision pp. 218\u2013227.","DOI":"10.1109\/3DV53792.2021.00032"},{"key":"2362_CR16","doi-asserted-by":"publisher","first-page":"1647","DOI":"10.1609\/aaai.v36i2.20056","volume":"36","author":"B Liu","year":"2022","unstructured":"Liu, B., Yu, H., & Long, Y. (2022). Local similarity pattern and cost self-reassembling for deep stereo matching networks. Proceedings of the AAAI Conference on Artificial Intelligence, 36, 1647\u20131655.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"2362_CR17","doi-asserted-by":"crossref","unstructured":"Mayer, N., Ilg, E., Hausser, P., Fischer, P., Cremers, D., Dosovitskiy, A. & Brox, T. (2016). A large dataset to train convolutional networks for disparity, optical flow, and scene flow estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition pp. 4040\u20134048.","DOI":"10.1109\/CVPR.2016.438"},{"key":"2362_CR18","doi-asserted-by":"crossref","unstructured":"Mei, X., Sun, X., Dong, W., Wang, H. & Zhang, X. (2013). Segment-tree based cost aggregation for stereo matching. In: Proceedings of the IEEE conference on computer vision and pattern recognition pp. 313\u2013320.","DOI":"10.1109\/CVPR.2013.47"},{"key":"2362_CR19","doi-asserted-by":"publisher","first-page":"427","DOI":"10.5194\/isprsannals-II-3-W5-427-2015","volume":"2","author":"M Menze","year":"2015","unstructured":"Menze, M., Heipke, C., & Geiger, A. (2015). Joint 3d estimation of vehicles and scene flow. ISPRS Annals of the Photogrammetry, Remote Sensing and Spatial Information Sciences, 2, 427\u2013434.","journal-title":"ISPRS Annals of the Photogrammetry, Remote Sensing and Spatial Information Sciences"},{"key":"2362_CR20","unstructured":"Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J. & Chanan, G. (2019). others Pytorch: An imperative style, high-performance deep learning library. Advances in Neural Information Processing Systems, 32."},{"key":"2362_CR21","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P. & Brox, T. (2015). U-net: Convolutional networks for biomedical image segmentation. Medical Image Computing and Computer-Assisted Intervention pp. 234\u2013241.","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"2362_CR22","doi-asserted-by":"crossref","unstructured":"Saharia, C., Chan, W., Chang, H., Lee, C., Ho, J., Salimans, T. & Norouzi, M. (2022). Palette: Image-to-image diffusion models. In: ACM SIGGRAPH 2022 conference proceedings pp. 1\u201310.","DOI":"10.1145\/3528233.3530757"},{"issue":"4","key":"2362_CR23","first-page":"4713","volume":"45","author":"C Saharia","year":"2022","unstructured":"Saharia, C., Ho, J., Chan, W., Salimans, T., Fleet, D. J., & Norouzi, M. (2022). Image super-resolution via iterative refinement. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(4), 4713\u20134726.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2362_CR24","doi-asserted-by":"crossref","unstructured":"Scharstein, D., Hirschm\u00fcller, H., Kitajima, Y., Krathwohl, G., Ne\u0161i\u0107, N., Wang, X. & Westling, P. (2014). High-resolution stereo datasets with subpixel-accurate ground truth. In: German conference on pattern recognition pp. 31\u201342.","DOI":"10.1007\/978-3-319-11752-2_3"},{"key":"2362_CR25","doi-asserted-by":"crossref","unstructured":"Schops, T., Schonberger, J.L., Galliani, S., Sattler, T., Schindler, K., Pollefeys, M. & Geiger, A. (2017). A multi-view stereo benchmark with high-resolution images and multi-camera videos. In: Proceedings of the IEEE conference on computer vision and pattern recognition pp. 3260\u20133269.","DOI":"10.1109\/CVPR.2017.272"},{"key":"2362_CR26","doi-asserted-by":"crossref","unstructured":"Seki, A., & Pollefeys, M. (2017). Sgm-nets: Semi-global matching with neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition pp. 231\u2013240.","DOI":"10.1109\/CVPR.2017.703"},{"key":"2362_CR27","doi-asserted-by":"crossref","unstructured":"Shao, R., Zheng, Z., Zhang, H., Sun, J. & Liu, Y. (2022). Diffustereo: High quality human reconstruction via diffusion-based stereo using sparse cameras. In: Proceedings of the European conference on computer vision pp. 702\u2013720.","DOI":"10.1007\/978-3-031-19824-3_41"},{"key":"2362_CR28","doi-asserted-by":"crossref","unstructured":"Shen, Z., Dai, Y. & Rao, Z. (2021). Cfnet: Cascade and fused cost volume for robust stereo matching. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition pp. 13906\u201313915.","DOI":"10.1109\/CVPR46437.2021.01369"},{"key":"2362_CR29","doi-asserted-by":"crossref","unstructured":"Shen, Z., Dai, Y., Song, X., Rao, Z., Zhou, D. & Zhang, L. (2022). Pcw-net: Pyramid combination and warping cost volume for stereo matching. In: Proceedings of the European conference on computer vision pp. 280\u2013297.","DOI":"10.1007\/978-3-031-19824-3_17"},{"key":"2362_CR30","unstructured":"Song, J., Meng, C. & Ermon, S. (2020). Denoising diffusion implicit models. In: International conference on learning representations."},{"key":"2362_CR31","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S. & Poole, B. (2020). Score-based generative modeling through stochastic differential equations. In: International conference on learning representations."},{"key":"2362_CR32","doi-asserted-by":"crossref","unstructured":"Tankovich, V., Hane, C., Zhang, Y., Kowdle, A., Fanello, S. & Bouaziz, S. (2021). Hitnet: Hierarchical iterative tile refinement network for real-time stereo matching. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition pp. 14362\u201314372.","DOI":"10.1109\/CVPR46437.2021.01413"},{"key":"2362_CR33","doi-asserted-by":"crossref","unstructured":"Xu, B., Xu, Y., Yang, X., Jia, W. & Guo, Y. (2021). Bilateral grid learning for stereo matching networks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition pp. 12497\u201312506.","DOI":"10.1109\/CVPR46437.2021.01231"},{"key":"2362_CR34","doi-asserted-by":"crossref","unstructured":"Xu, G., Cheng, J., Guo, P. & Yang, X. (2022). Attention concatenation volume for accurate and efficient stereo matching. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition pp. 12981\u201312990.","DOI":"10.1109\/CVPR52688.2022.01264"},{"key":"2362_CR35","doi-asserted-by":"crossref","unstructured":"Xu, G., Wang, X., Ding, X. & Yang, X. (2023). Iterative geometry encoding volume for stereo matching. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition pp. 21919\u201321928.","DOI":"10.1109\/CVPR52729.2023.02099"},{"key":"2362_CR36","doi-asserted-by":"crossref","unstructured":"Xu, G., Wang, Y., Cheng, J., Tang, J. & Yang, X. (2023). Accurate and efficient stereo matching via attention concatenation volume. IEEE Transactions on Pattern Analysis and Machine Intelligence.","DOI":"10.1109\/TPAMI.2023.3335480"},{"key":"2362_CR37","unstructured":"Yang, X., Shih, S M., Fu, Y., Zhao, X. & Ji, S. (2022). Your vit is secretly a hybrid discriminative-generative diffusion model. arXiv preprint arXiv:2208.07791."},{"key":"2362_CR38","doi-asserted-by":"crossref","unstructured":"Zhang, F., Prisacariu, V., Yang, R. & Torr, P.H. (2019). Ga-net: Guided aggregation net for end-to-end stereo matching. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition pp. 185\u2013194.","DOI":"10.1109\/CVPR.2019.00027"},{"key":"2362_CR39","doi-asserted-by":"crossref","unstructured":"Zhang, F., Qi, X., Yang, R., Prisacariu, V., Wah, B. & Torr, P. (2020). Domain-invariant stereo matching networks. In: Proceedings of the European conference on computer vision pp. 420\u2013439.","DOI":"10.1007\/978-3-030-58536-5_25"},{"key":"2362_CR40","doi-asserted-by":"publisher","first-page":"12926","DOI":"10.1609\/aaai.v34i07.6991","volume":"34","author":"Y Zhang","year":"2020","unstructured":"Zhang, Y., Chen, Y., Bai, X., Yu, S., Yu, K., Li, Z., & Yang, K. (2020). Adaptive unimodal cost volume filtering for deep stereo matching. Proceedings of the AAAI Conference on Artificial Intelligence, 34, 12926\u201312934.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02362-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02362-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02362-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,7]],"date-time":"2025-06-07T05:58:39Z","timestamp":1749275919000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02362-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,1]]},"references-count":40,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["2362"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02362-1","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,1]]},"assertion":[{"value":"30 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 January 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 February 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}