{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:28:16Z","timestamp":1777656496638,"version":"3.51.4"},"publisher-location":"Cham","reference-count":58,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729454","type":"print"},{"value":"9783031729461","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,2]],"date-time":"2024-10-02T00:00:00Z","timestamp":1727827200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,2]],"date-time":"2024-10-02T00:00:00Z","timestamp":1727827200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72946-1_9","type":"book-chapter","created":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T19:02:08Z","timestamp":1727809328000},"page":"146-162","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Learning Representations from\u00a0Foundation Models for\u00a0Domain Generalized Stereo Matching"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6178-6532","authenticated-orcid":false,"given":"Yongjian","family":"Zhang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0429-0263","authenticated-orcid":false,"given":"Longguang","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3297-1824","authenticated-orcid":false,"given":"Kunhong","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8384-6981","authenticated-orcid":false,"given":"Yun","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7051-841X","authenticated-orcid":false,"given":"Yulan","family":"Guo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,2]]},"reference":[{"key":"9_CR1","doi-asserted-by":"crossref","unstructured":"Cai, C., Poggi, M., Mattoccia, S., Mordohai, P.: Matching-space stereo networks for cross-domain generalization. In: Struc, V., Fern\u00e1ndez, F.G. (eds.) 3DV (2020)","DOI":"10.1109\/3DV50981.2020.00046"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Chang, J., Chen, Y.: Pyramid stereo matching network. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00567"},{"key":"9_CR3","doi-asserted-by":"crossref","unstructured":"Chang, T., Yang, X., Zhang, T., Wang, M.: Domain generalized stereo matching via hierarchical visual transformation. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00922"},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"Chuah, W., Tennakoon, R.B., Hoseinnezhad, R., Bab-Hadiashar, A., Suter, D.: ITSA: an information-theoretic approach to automatic shortcut avoidance and domain generalization in stereo matching networks. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01268"},{"key":"9_CR5","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: ICLR (2021)"},{"key":"9_CR6","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? the kitti vision benchmark suite. In: CVPR (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"9_CR7","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1007\/978-3-031-19824-3_16","volume-title":"ECCV 2022","author":"W Guo","year":"2022","unstructured":"Guo, W., et al.: Context-enhanced stereo transformer. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13692, pp. 263\u2013279. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19824-3_16"},{"key":"9_CR8","doi-asserted-by":"crossref","unstructured":"Guo, X., Li, H., Yi, S., Ren, J.S.J., Wang, X.: Learning monocular depth by distilling cross-domain stereo networks. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01252-6_30"},{"key":"9_CR9","doi-asserted-by":"crossref","unstructured":"Guo, X., Yang, K., Yang, W., Wang, X., Li, H.: Group-wise correlation stereo network. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00339"},{"key":"9_CR10","first-page":"7786","volume":"25","author":"Y Guo","year":"2023","unstructured":"Guo, Y., Wang, Y., Wang, L., Wang, Z., Cheng, C.: Cvcnet: learning cost volume compression for efficient stereo matching. IEEE TMM 25, 7786\u20137799 (2023)","journal-title":"IEEE TMM"},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.B.: Momentum contrast for unsupervised visual representation learning. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"9_CR12","unstructured":"Hirschm\u00fcller, H.: Accurate and efficient stereo processing by semi-global matching and mutual information. In: CVPR (2005)"},{"key":"9_CR13","doi-asserted-by":"crossref","unstructured":"Kendall, A., Martirosyan, H., Dasgupta, S., Henry, P.: End-to-end learning of geometry and context for deep stereo regression. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.17"},{"key":"9_CR14","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et\u00a0al.: Segment anything. arXiv:2304.02643 (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"9_CR15","doi-asserted-by":"crossref","unstructured":"Li, K., Wang, L., Zhang, Y., Xue, K., Zhou, S., Guo, Y.: Los: local structure-guided stereo matching. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01867"},{"key":"9_CR16","doi-asserted-by":"crossref","unstructured":"Li, Z., et al.: Revisiting stereo depth estimation from a sequence-to-sequence perspective with transformers. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00614"},{"key":"9_CR17","doi-asserted-by":"crossref","unstructured":"Liang, Z., et al.: Learning for disparity estimation through feature constancy. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00297"},{"key":"9_CR18","doi-asserted-by":"crossref","unstructured":"Lipson, L., Teed, Z., Deng, J.: Raft-stereo: multilevel recurrent field transforms for stereo matching. In: 3DV (2021)","DOI":"10.1109\/3DV53792.2021.00032"},{"key":"9_CR19","doi-asserted-by":"crossref","unstructured":"Liu, B., Yu, H., Qi, G.: Graftnet: towards domain generalized stereo matching with a broad-spectrum and task-oriented feature. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01267"},{"key":"9_CR20","doi-asserted-by":"crossref","unstructured":"Liu, R., Yang, C., Sun, W., Wang, X., Li, H.: Stereogan: bridging synthetic-to-real domain gap by joint optimization of domain translation and stereo matching. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.01277"},{"key":"9_CR21","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"issue":"1","key":"9_CR22","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1177\/0278364916679498","volume":"36","author":"W Maddern","year":"2017","unstructured":"Maddern, W., Pascoe, G., Linegar, C., Newman, P.: 1 year, 1000 km: the oxford robotcar dataset. Int. J. Robot. Res. 36(1), 3\u201315 (2017)","journal-title":"Int. J. Robot. Res."},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Mayer, N., et al.: A large dataset to train convolutional networks for disparity, optical flow, and scene flow estimation. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.438"},{"key":"9_CR24","first-page":"60","volume":"140","author":"M Menze","year":"2018","unstructured":"Menze, M., Heipke, C., Geiger, A.: Object scene flow. JPRS 140, 60\u201376 (2018)","journal-title":"JPRS"},{"key":"9_CR25","unstructured":"Oquab, M., et al.: Dinov2: learning robust visual features without supervision (2023)"},{"key":"9_CR26","doi-asserted-by":"crossref","unstructured":"Pang, J., et al.: Zoom and learn: generalizing deep stereo matching to novel domains. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00221"},{"key":"9_CR27","doi-asserted-by":"crossref","unstructured":"Ranftl, R., Bochkovskiy, A., Koltun, V.: Vision transformers for dense prediction. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"9_CR28","doi-asserted-by":"crossref","unstructured":"Rao, Z., et al.: Masked representation learning for domain generalized stereo matching. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00526"},{"key":"9_CR29","doi-asserted-by":"crossref","unstructured":"Scharstein, D., et al.: High-resolution stereo datasets with subpixel-accurate ground truth. In: GCPR (2014)","DOI":"10.1007\/978-3-319-11752-2_3"},{"key":"9_CR30","doi-asserted-by":"crossref","unstructured":"Schops, T., et al.: A multi-view stereo benchmark with high-resolution images and multi-camera videos. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.272"},{"key":"9_CR31","doi-asserted-by":"crossref","unstructured":"Shen, Z., Dai, Y., Rao, Z.: Cfnet: cascade and fused cost volume for robust stereo matching. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01369"},{"key":"9_CR32","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"280","DOI":"10.1007\/978-3-031-19824-3_17","volume-title":"ECCV 2022","author":"Z Shen","year":"2022","unstructured":"Shen, Z., Dai, Y., Song, X., Rao, Z., Zhou, D., Zhang, L.: PCW-Net: pyramid combination and warping cost volume for stereo matching. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13692, pp. 280\u2013297. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19824-3_17"},{"key":"9_CR33","doi-asserted-by":"crossref","unstructured":"Song, X., Yang, G., Zhu, X., Zhou, H., Wang, Z., Shi, J.: Adastereo: a simple and efficient approach for adaptive stereo matching. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01019"},{"issue":"4","key":"9_CR34","doi-asserted-by":"publisher","first-page":"910","DOI":"10.1007\/s11263-019-01287-w","volume":"128","author":"X Song","year":"2020","unstructured":"Song, X., Zhao, X., Fang, L., Hu, H., Yu, Y.: Edgestereo: an effective multi-task learning network for stereo matching and edge detection. IJCV 128(4), 910\u2013930 (2020)","journal-title":"IJCV"},{"key":"9_CR35","doi-asserted-by":"crossref","unstructured":"Sun, J., Shen, Z., Wang, Y., Bao, H., Zhou, X.: LoFTR: detector-free local feature matching with transformers. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00881"},{"key":"9_CR36","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"402","DOI":"10.1007\/978-3-030-58536-5_24","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Z Teed","year":"2020","unstructured":"Teed, Z., Deng, J.: RAFT: recurrent all-pairs field transforms for optical flow. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12347, pp. 402\u2013419. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58536-5_24"},{"key":"9_CR37","doi-asserted-by":"crossref","unstructured":"Tonioni, A., Poggi, M., Mattoccia, S., Stefano, L.D.: Unsupervised adaptation for deep stereo. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.178"},{"key":"9_CR38","doi-asserted-by":"crossref","unstructured":"Tonioni, A., Rahnama, O., Joy, T., Stefano, L.D., Ajanthan, T., Torr, P.H.S.: Learning to adapt for stereo. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00989"},{"key":"9_CR39","doi-asserted-by":"crossref","unstructured":"Tonioni, A., Tosi, F., Poggi, M., Mattoccia, S., Stefano, L.D.: Real-time self-adaptive deep stereo. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00028"},{"key":"9_CR40","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS (2017)"},{"key":"9_CR41","doi-asserted-by":"publisher","first-page":"2108","DOI":"10.1109\/TPAMI.2020.3026899","volume":"44","author":"L Wang","year":"2020","unstructured":"Wang, L., et al.: Parallax attention for unsupervised stereo correspondence learning. IEEE TPAMI 44, 2108\u20132125 (2020)","journal-title":"IEEE TPAMI"},{"key":"9_CR42","doi-asserted-by":"crossref","unstructured":"Wang, L., et al.: Learning parallax attention for stereo image super-resolution. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01253"},{"issue":"3","key":"9_CR43","doi-asserted-by":"publisher","first-page":"6258","DOI":"10.1109\/LRA.2022.3164755","volume":"7","author":"Y Wang","year":"2022","unstructured":"Wang, Y., Wang, L., Wang, H., Guo, Y.: SPNet: learning stereo matching with slanted plane aggregation. IEEE Robot. Autom. Lett. 7(3), 6258\u20136265 (2022)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"9_CR44","doi-asserted-by":"crossref","unstructured":"Weinzaepfel, P., et al.: CroCo v2: improved cross-view completion pre-training for stereo matching and optical flow. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01647"},{"key":"9_CR45","unstructured":"Philippe, W., et al.: CroCo: self-supervised pre-training for 3D vision tasks by cross-view completion. In: NeurIPS (2022)"},{"key":"9_CR46","doi-asserted-by":"crossref","unstructured":"Xie, Z., et al.: Simmim: a simple framework for masked image modeling. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00943"},{"key":"9_CR47","doi-asserted-by":"crossref","unstructured":"Xu, H., Zhang, J.: Aanet: adaptive aggregation network for efficient stereo matching. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00203"},{"key":"9_CR48","doi-asserted-by":"crossref","unstructured":"Yang, G., Manela, J., Happold, M., Ramanan, D.: Hierarchical deep stereo matching on high-resolution images. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00566"},{"key":"9_CR49","doi-asserted-by":"crossref","unstructured":"Yang, G., Song, X., Huang, C., Deng, Z., Shi, J., Zhou, B.: Drivingstereo: a large-scale dataset for stereo matching in autonomous driving scenarios. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00099"},{"key":"9_CR50","doi-asserted-by":"crossref","unstructured":"Yang, G., Zhao, H., Shi, J., Deng, Z., Jia, J.: Segstereo: exploiting semantic information for disparity estimation. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01234-2_39"},{"key":"9_CR51","doi-asserted-by":"crossref","unstructured":"Yang, J., \u00c1lvarez, J.M., Liu, M.: Non-parametric depth distribution modelling based depth inference for multi-view stereo. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00843"},{"key":"9_CR52","doi-asserted-by":"crossref","unstructured":"Yang, L., Kang, B., Huang, Z., Xu, X., Feng, J., Zhao, H.: Depth anything: unleashing the power of large-scale unlabeled data. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.00987"},{"issue":"4","key":"9_CR53","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2014.2353642","volume":"37","author":"Q Yang","year":"2015","unstructured":"Yang, Q.: Stereo matching using tree filtering. IEEE TPAMI 37(4), 834\u2013846 (2015)","journal-title":"IEEE TPAMI"},{"key":"9_CR54","doi-asserted-by":"crossref","unstructured":"Zbontar, J., LeCun, Y.: Computing the stereo matching cost with a convolutional neural network. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298767"},{"key":"9_CR55","doi-asserted-by":"crossref","unstructured":"Zhang, F., Prisacariu, V.A., Yang, R., Torr, P.H.S.: Ga-net: guided aggregation net for end-to-end stereo matching. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00027"},{"key":"9_CR56","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"420","DOI":"10.1007\/978-3-030-58536-5_25","volume-title":"Computer Vision \u2013 ECCV 2020","author":"F Zhang","year":"2020","unstructured":"Zhang, F., Qi, X., Yang, R., Prisacariu, V., Wah, B., Torr, P.: Domain-invariant stereo matching networks. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12347, pp. 420\u2013439. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58536-5_25"},{"key":"9_CR57","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: Revisiting domain generalized stereo matching networks from a feature consistency perspective. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01266"},{"key":"9_CR58","doi-asserted-by":"crossref","unstructured":"Zhu, S., Liu, X.: Pmatch: paired masked image modeling for dense geometric matching. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.02098"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72946-1_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T19:04:24Z","timestamp":1727809464000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72946-1_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,2]]},"ISBN":["9783031729454","9783031729461"],"references-count":58,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72946-1_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,2]]},"assertion":[{"value":"2 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}