{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T06:29:24Z","timestamp":1768976964533,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Special Foundations for the Development of Strategic Emerging Industries of Shenzhen","award":["Nos. JSGG 20211108092812020 JCYJ 20200109143035495 CJGJZD 2021"],"award-info":[{"award-number":["Nos. JSGG 20211108092812020 JCYJ 20200109143035495 CJGJZD 2021"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3611862","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:12Z","timestamp":1698391632000},"page":"2315-2325","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Patchmatch Stereo++: Patchmatch Binocular Stereo with Continuous Disparity Optimization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5019-0306","authenticated-orcid":false,"given":"Wenjia","family":"Ren","sequence":"first","affiliation":[{"name":"Tsinghua University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7509-3964","authenticated-orcid":false,"given":"Qingmin","family":"Liao","sequence":"additional","affiliation":[{"name":"Tsinghua University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3204-3271","authenticated-orcid":false,"given":"Zhijing","family":"Shao","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou) &amp; Prometheus Vision Technology, Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3089-6414","authenticated-orcid":false,"given":"Xiangru","family":"Lin","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hong Kong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6214-552X","authenticated-orcid":false,"given":"Xin","family":"Yue","sequence":"additional","affiliation":[{"name":"Tsinghua University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5229-9727","authenticated-orcid":false,"given":"Yu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Prometheus Vision Technology, Zhuhai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1191-9069","authenticated-orcid":false,"given":"Zongqing","family":"Lu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of 1999 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP).","volume":"6","author":"Banks J.","unstructured":"J. Banks, M. Bennamoun, K. Kubik, and P. Corke. 1999. A constraint to improve the reliability of stereo matching using the rank transform. In Proceedings of 1999 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP). Vol. 6, 3321--3324."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00567"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00260"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceeding of 9th International Conference on Learning Representations (ICLR).","author":"Dosovitskiy Alexey","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, and Mostafa Dehghani et al. 2021. An image is worth 16x16 words: transformers for image recognition at scale. In Proceeding of 9th International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.316"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00448"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00339"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2007.1166"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.17"},{"key":"e_1_3_2_1_10_1","series-title":"Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 596--613.","volume-title":"Stereonet: guided hierarchical refinement for real-time edge-aware depth prediction","author":"Khamis Sameh","unstructured":"Sameh Khamis, Sean Fanello, Christoph Rhemann, Adarsh Kowdle, Julien Valentin, and Shahram Izadi. 2018. Stereonet: guided hierarchical refinement for real-time edge-aware depth prediction. In Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), 596--613."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3032602"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01578"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00614"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00297"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00032"},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of IEEE International Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Mayer N.","unstructured":"N. Mayer, E. Ilg, P. H\u00e4usser, P. Fischer, D. Cremers, A. Dosovitskiy, and T. Brox. 2016. A large dataset to train convolutional networks for disparity, optical flow, and scene flow estimation. In Proceedings of IEEE International Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298925"},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the British Machine Vision Conference (BMVC), 1--12","author":"Micheal Bleyer Christoph Rhemann","year":"2011","unstructured":"Christoph Rhemann Micheal Bleyer and Carsten Rother. 2011. Patchmatch stereo - stereo matching with slanted support windows. In Proceedings of the British Machine Vision Conference (BMVC), 1--12."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.108"},{"key":"e_1_3_2_1_20_1","first-page":"5314","article-title":"On the synergies between machine learning and binocular stereo for depth estimation from images: a survey","volume":"44","author":"Poggi Matteo","year":"2021","unstructured":"Matteo Poggi, Fabio Tosi, Konstantinos Batsos, Philippos Mordohai, and Stefano Mattoccia. 2021. On the synergies between machine learning and binocular stereo for depth estimation from images: a survey. IEEE Transactions on Pattern Analysis and Machine Intelligence (T-PAMI), 44, 9, 5314--5330.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence (T-PAMI)"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2012.6232171"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.272"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01369"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01019"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01413"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593800"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Zachary Teed and Jia Deng. 2020. Raft: recurrent all-pairs field transforms for optical flow. Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics) 12347 402--419.","DOI":"10.1007\/978-3-030-58536-5_24"},{"key":"e_1_3_2_1_28_1","unstructured":"Viny Saajan Victor and Peter Neigel. 2021. Survey on semantic stereo matching \/ semantic depth estimation. CoRR abs\/2109.10123."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01397"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01264"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00203"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00566"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Guorun Yang Hengshuang Zhao Jianping Shi Zhidong Deng and Jiaya Jia. 2018. Segstereo: exploiting semantic information for disparity estimation. Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics) 660--676.","DOI":"10.1007\/978-3-030-01234-2_39"},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of The IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Yang J.","unstructured":"J. Yang, J. M. Alvarez, and M. Liu. 2022. Non-parametric depth distribution modelling based depth inference for multi-view stereo. In Proceedings of The IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2007.383211"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Yin Zhichao","year":"2019","unstructured":"Zhichao Yin, Trevor Darrell, and Fisher Yu. 2019. Hierarchical discrete distri-bution decomposition for match density estimation. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12267"},{"key":"e_1_3_2_1_38_1","volume-title":"Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR), 185--194","author":"Zhang Feihu","unstructured":"Feihu Zhang, Victor Prisacariu, Ruigang Yang, and Philip H.S. Torr. 2019. Ganet: guided aggregation net for end-to-end stereo matching. In Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR), 185--194."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_25"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00872"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611862","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3611862","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T23:59:33Z","timestamp":1755820773000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611862"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":40,"alternative-id":["10.1145\/3581783.3611862","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3611862","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}