{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T17:59:20Z","timestamp":1775325560149,"version":"3.50.1"},"publisher-location":"Cham","reference-count":76,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031198205","type":"print"},{"value":"9783031198212","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19821-2_33","type":"book-chapter","created":{"date-parts":[[2022,10,22]],"date-time":"2022-10-22T12:12:59Z","timestamp":1666440779000},"page":"573-591","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":101,"title":["MVSTER: Epipolar Transformer for\u00a0Efficient Multi-view Stereo"],"prefix":"10.1007","author":[{"given":"Xiaofeng","family":"Wang","sequence":"first","affiliation":[]},{"given":"Zheng","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Guan","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Fangbo","family":"Qin","sequence":"additional","affiliation":[]},{"given":"Yun","family":"Ye","sequence":"additional","affiliation":[]},{"given":"Yijia","family":"He","sequence":"additional","affiliation":[]},{"given":"Xu","family":"Chi","sequence":"additional","affiliation":[]},{"given":"Xingang","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,23]]},"reference":[{"key":"33_CR1","doi-asserted-by":"crossref","unstructured":"Aan\u00e6s, H., Jensen, R.R., Vogiatzis, G., Tola, E., Dahl, A.B.: Large-scale data for multiple-view stereopsis. Int. J. Comput. Vis. 120, 153\u2013168 (2016)","DOI":"10.1007\/s11263-016-0902-9"},{"key":"33_CR2","doi-asserted-by":"crossref","unstructured":"Abnar, S., Zuidema, W.H.: Quantifying attention flow in transformers. In: Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.acl-main.385"},{"key":"33_CR3","unstructured":"Arjovsky, M., Chintala, S., Bottou, L.: Wasserstein GAN. arXiv preprint arXiv:1701.07875 (2017)"},{"key":"33_CR4","unstructured":"Bozic, A., Palafox, P., Thies, J., Dai, A., Nie\u00dfner, M.: TransFormerfusion: monocular RGB scene reconstruction using transformers. In: Advances in Neural Information Processing Systems (2021)"},{"key":"33_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"766","DOI":"10.1007\/978-3-540-88682-2_58","volume-title":"Computer Vision \u2013 ECCV 2008","author":"NDF Campbell","year":"2008","unstructured":"Campbell, N.D.F., Vogiatzis, G., Hern\u00e1ndez, C., Cipolla, R.: Using multiple hypotheses to improve depth-maps for multi-view stereo. In: Forsyth, D., Torr, P., Zisserman, A. (eds.) ECCV 2008. LNCS, vol. 5302, pp. 766\u2013779. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-88682-2_58"},{"key":"33_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"key":"33_CR7","unstructured":"Chen, M., et al.: Generative pretraining from pixels. In: International Conference on Machine Learning (2020)"},{"key":"33_CR8","doi-asserted-by":"crossref","unstructured":"Chen, R., Han, S., Xu, J., Su, H.: Point-based multi-view stereo network. In: IEEE International Conference on Computer Vision (2019)","DOI":"10.1109\/ICCV.2019.00162"},{"key":"33_CR9","doi-asserted-by":"crossref","unstructured":"Cheng, S., et al.: Deep stereo using adaptive thin volume representation with uncertainty awareness. In: IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.00260"},{"key":"33_CR10","doi-asserted-by":"crossref","unstructured":"Collins, R.T.: A space-sweep approach to true multi-image matching. In: IEEE Conference on Computer Vision and Pattern Recognition (1996)","DOI":"10.1109\/CVPR.1996.517097"},{"key":"33_CR11","unstructured":"Cuturi, M.: Sinkhorn distances: lightspeed computation of optimal transport. In: Advances in Neural Information Processing Systems (2013)"},{"key":"33_CR12","doi-asserted-by":"crossref","unstructured":"Dai, J., et al.: Deformable convolutional networks. In: IEEE International Conference on Computer Vision (2017)","DOI":"10.1109\/ICCV.2017.89"},{"key":"33_CR13","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (2019)"},{"key":"33_CR14","doi-asserted-by":"crossref","unstructured":"Ding, Y., et al.: TransMVSNet: global context-aware multi-view stereo network with transformers. arXiv preprint arXiv:2111.14600 (2021)","DOI":"10.1109\/CVPR52688.2022.00839"},{"key":"33_CR15","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (2021)"},{"key":"33_CR16","doi-asserted-by":"crossref","unstructured":"Dosovitskiy, A., et al.: FlowNet: learning optical flow with convolutional networks. In: IEEE International Conference on Computer Vision (2015)","DOI":"10.1109\/ICCV.2015.316"},{"key":"33_CR17","doi-asserted-by":"crossref","unstructured":"Duggal, S., Wang, S., Ma, W., Hu, R., Urtasun, R.: DeepPruner: learning efficient stereo matching via differentiable PatchMatch. In: IEEE International Conference on Computer Vision (2019)","DOI":"10.1109\/ICCV.2019.00448"},{"key":"33_CR18","doi-asserted-by":"crossref","unstructured":"Furukawa, Y., Ponce, J.: Accurate, dense, and robust multiview stereopsis. IEEE Trans. Pattern Anal. Mach. Intell. (2010)","DOI":"10.1109\/TPAMI.2009.161"},{"key":"33_CR19","doi-asserted-by":"crossref","unstructured":"Galliani, S., Lasinger, K., Schindler, K.: Massively parallel multiview stereopsis by surface normal diffusion. In: IEEE International Conference on Computer Vision (2015)","DOI":"10.1109\/ICCV.2015.106"},{"key":"33_CR20","unstructured":"Giang, K.T., Song, S., Jo, S.: Curvature-guided dynamic scale networks for multi-view stereo. arXiv preprint arXiv:2112.05999 (2021)"},{"key":"33_CR21","doi-asserted-by":"crossref","unstructured":"Godard, C., Aodha, O.M., Brostow, G.J.: Unsupervised monocular depth estimation with left-right consistency. In: IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.699"},{"key":"33_CR22","doi-asserted-by":"crossref","unstructured":"Godard, C., Aodha, O.M., Firman, M., Brostow, G.J.: Digging into self-supervised monocular depth estimation. In: IEEE International Conference on Computer Vision (2019)","DOI":"10.1109\/ICCV.2019.00393"},{"key":"33_CR23","doi-asserted-by":"crossref","unstructured":"Gu, X., Fan, Z., Zhu, S., Dai, Z., Tan, F., Tan, P.: Cascade cost volume for high-resolution multi-view stereo and stereo matching. In: IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.00257"},{"key":"33_CR24","doi-asserted-by":"crossref","unstructured":"He, C., Zeng, H., Huang, J., Hua, X., Zhang, L.: Structure aware single-stage 3d object detection from point cloud. In: IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.01189"},{"key":"33_CR25","doi-asserted-by":"crossref","unstructured":"He, Y., Yan, R., Fragkiadaki, K., Yu, S.: Epipolar transformer for multi-view human pose estimation. In: IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPRW50498.2020.00526"},{"key":"33_CR26","doi-asserted-by":"crossref","unstructured":"Ke, Q., Bennamoun, M., An, S., Sohel, F.A., Boussa\u00efd, F.: A new representation of skeleton sequences for 3d action recognition. In: IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.486"},{"key":"33_CR27","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: International Conference on Learning Representations (2015)"},{"key":"33_CR28","doi-asserted-by":"crossref","unstructured":"Knapitsch, A., Park, J., Zhou, Q., Koltun, V.: Tanks and temples: benchmarking large-scale scene reconstruction. ACM Trans. Graph. 36, 1\u201313 (2017)","DOI":"10.1145\/3072959.3073599"},{"key":"33_CR29","doi-asserted-by":"crossref","unstructured":"Lee, J.Y., DeGol, J., Zou, C., Hoiem, D.: PatchMatch-RL: Deep MVS with pixelwise depth, normal, and visibility. In: IEEE International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00610"},{"key":"33_CR30","doi-asserted-by":"crossref","unstructured":"Li, Z., et al.: Revisiting stereo depth estimation from a sequence-to-sequence perspective with transformers. In: IEEE International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00614"},{"key":"33_CR31","doi-asserted-by":"crossref","unstructured":"Lin, T., Doll\u00e1r, P., Girshick, R.B., He, K., Hariharan, B., Belongie, S.J.: Feature pyramid networks for object detection. In: IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"33_CR32","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. IEEE International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"33_CR33","doi-asserted-by":"crossref","unstructured":"Luo, S., Hu, W.: Diffusion probabilistic models for 3D point cloud generation. In: IEEE Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.00286"},{"key":"33_CR34","doi-asserted-by":"crossref","unstructured":"Ma, X., Gong, Y., Wang, Q., Huang, J., Chen, L., Yu, F.: EPP-MVSNet: epipolar-assembling based depth prediction for multi-view stereo. In: IEEE International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00568"},{"key":"33_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1007\/978-3-030-58452-8_24","volume-title":"Computer Vision \u2013 ECCV 2020","author":"B Mildenhall","year":"2020","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 405\u2013421. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_24"},{"key":"33_CR36","unstructured":"Mordan, T., Thome, N., H\u00e9naff, G., Cord, M.: Revisiting multi-task learning with ROCK: a deep residual auxiliary block for visual detection. In: Advances in Neural Information Processing Systems (2018)"},{"key":"33_CR37","doi-asserted-by":"crossref","unstructured":"Peng, R., Wang, R., Wang, Z., Lai, Y., Wang, R.: Rethinking depth estimation for multi-view stereo: a unified representation and focal loss. arXiv preprint arXiv:2201.01501 (2022)","DOI":"10.1109\/CVPR52688.2022.00845"},{"key":"33_CR38","doi-asserted-by":"crossref","unstructured":"Peyr\u00e9, G., Cuturi, M.: Computational optimal transport. Found. Trends Mach. Learn. (2019)","DOI":"10.1561\/9781680835519"},{"key":"33_CR39","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: PointNet: deep learning on point sets for 3D classification and segmentation. In: IEEE Conference on Computer Vision and Pattern Recognition (2017)"},{"key":"33_CR40","unstructured":"Qi, C.R., Yi, L., Su, H., Guibas, L.J.: PointNet++: deep hierarchical feature learning on point sets in a metric space. In: Advances in Neural Information Processing Systems (2017)"},{"key":"33_CR41","doi-asserted-by":"crossref","unstructured":"Qin, J., Wu, J., Xiao, X., Li, L., Wang, X.: Activation modulation and recalibration scheme for weakly supervised semantic segmentation. In: AAAI Conference on Artificial Intelligence (2021)","DOI":"10.1609\/aaai.v36i2.20108"},{"key":"33_CR42","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I.: Improving language understanding by generative pre-training. OpenAI Preprint (2018)"},{"key":"33_CR43","doi-asserted-by":"crossref","unstructured":"Sch\u00f6nberger, J.L., Frahm, J.: Structure-from-motion revisited. In: IEEE Conference on Computer Vision and Pattern Recognition (2016)","DOI":"10.1109\/CVPR.2016.445"},{"key":"33_CR44","doi-asserted-by":"crossref","unstructured":"Sch\u00f6ps, T., et al.: A multi-view stereo benchmark with high-resolution images and multi-camera videos. In: IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.272"},{"key":"33_CR45","doi-asserted-by":"crossref","unstructured":"Shen, Z., Dai, Y., Rao, Z.: CFNet: cascade and fused cost volume for robust stereo matching. In: IEEE Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.01369"},{"key":"33_CR46","doi-asserted-by":"crossref","unstructured":"Shi, S., et al.: PV-RCNN: point-voxel feature set abstraction for 3D object detection. In: IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.01054"},{"key":"33_CR47","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1007\/978-3-030-58589-1_7","volume-title":"Computer Vision \u2013 ECCV 2020","author":"A Sinha","year":"2020","unstructured":"Sinha, A., Murez, Z., Bartolozzi, J., Badrinarayanan, V., Rabinovich, A.: DELTAS: depth estimation by learning triangulation and densification of sparse points. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12366, pp. 104\u2013121. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58589-1_7"},{"key":"33_CR48","doi-asserted-by":"crossref","unstructured":"Tankovich, V., et al.: HitNet: hierarchical iterative tile refinement network for real-time stereo matching. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 14362\u201314372 (2021)","DOI":"10.1109\/CVPR46437.2021.01413"},{"key":"33_CR49","doi-asserted-by":"crossref","unstructured":"Tenney, I., Das, D., Pavlick, E.: BERT rediscovers the classical NLP pipeline. In: Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/P19-1452"},{"key":"33_CR50","doi-asserted-by":"crossref","unstructured":"Tola, E., Strecha, C., Fua, P.: Efficient large-scale multi-view stereo for ultra high-resolution image sets. Mach. Vis. Appl. 23, 903\u2013920 (2012)","DOI":"10.1007\/s00138-011-0346-8"},{"key":"33_CR51","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems (2017)"},{"key":"33_CR52","doi-asserted-by":"crossref","unstructured":"Wang, F., Galliani, S., Vogel, C., Pollefeys, M.: IterMVS: iterative probability estimation for efficient multi-view stereo. arXiv preprint arXiv:2112.05126 (2021)","DOI":"10.1109\/CVPR52688.2022.00841"},{"key":"33_CR53","doi-asserted-by":"crossref","unstructured":"Wang, F., Galliani, S., Vogel, C., Speciale, P., Pollefeys, M.: PatchmatchNet: learned multi-view PatchMatch stereo. In: IEEE Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.01397"},{"key":"33_CR54","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"108","DOI":"10.1007\/978-3-030-58548-8_7","volume-title":"Computer Vision \u2013 ECCV 2020","author":"H Wang","year":"2020","unstructured":"Wang, H., Zhu, Y., Green, B., Adam, H., Yuille, A., Chen, L.-C.: Axial-DeepLab: stand-alone axial-attention for panoptic segmentation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12349, pp. 108\u2013126. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58548-8_7"},{"key":"33_CR55","doi-asserted-by":"crossref","unstructured":"Watson, J., Aodha, O.M., Prisacariu, V., Brostow, G.J., Firman, M.: The temporal opportunist: self-supervised multi-frame monocular depth. In: IEEE Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.00122"},{"key":"33_CR56","unstructured":"Wei, Y., et al.: SurroundDepth: entangling surrounding views for self-supervised multi-camera depth estimation. arXiv preprint arXiv:2204.03636 (2022)"},{"key":"33_CR57","doi-asserted-by":"crossref","unstructured":"Wei, Z., Zhu, Q., Min, C., Chen, Y., Wang, G.: AA-RMVSNet: adaptive aggregation recurrent multi-view stereo network. In: IEEE International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00613"},{"key":"33_CR58","doi-asserted-by":"crossref","unstructured":"Xu, Q., Tao, W.: Multi-scale geometric consistency guided multi-view stereo. In: IEEE Conference on Computer Vision and Pattern Recognition (2019)","DOI":"10.1109\/CVPR.2019.00563"},{"key":"33_CR59","doi-asserted-by":"crossref","unstructured":"Xu, Q., Tao, W.: Learning inverse depth regression for multi-view stereo with correlation cost volume. In: AAAI Conference on Artificial Intelligence (2020)","DOI":"10.1609\/aaai.v34i07.6939"},{"key":"33_CR60","unstructured":"Xu, Q., Tao, W.: PVSNet: pixelwise visibility-aware multi-view stereo network. arXiv preprint arXiv:2007.07714 (2020)"},{"key":"33_CR61","doi-asserted-by":"crossref","unstructured":"Yan, J., et al.: Dense hybrid recurrent multi-view stereo net with dynamic consistency checking. In: European Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-58548-8_39"},{"key":"33_CR62","doi-asserted-by":"crossref","unstructured":"Yang, F., Yang, H., Fu, J., Lu, H., Guo, B.: Learning texture transformer network for image super-resolution. In: IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.00583"},{"key":"33_CR63","doi-asserted-by":"crossref","unstructured":"Yang, J., Mao, W., Alvarez, J.M., Liu, M.: Cost volume pyramid based depth inference for multi-view stereo. In: IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.00493"},{"key":"33_CR64","doi-asserted-by":"crossref","unstructured":"Yang, Z., Ren, Z., Shan, Q., Huang, Q.: MVS2D: efficient multi-view stereo via attention-driven 2D convolutions. arXiv preprint arXiv:2104.13325 (2021)","DOI":"10.1109\/CVPR52688.2022.00838"},{"key":"33_CR65","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"785","DOI":"10.1007\/978-3-030-01237-3_47","volume-title":"Computer Vision \u2013 ECCV 2018","author":"Y Yao","year":"2018","unstructured":"Yao, Y., Luo, Z., Li, S., Fang, T., Quan, L.: MVSNet: depth inference for unstructured multi-view stereo. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11212, pp. 785\u2013801. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01237-3_47"},{"key":"33_CR66","doi-asserted-by":"crossref","unstructured":"Yao, Y., Luo, Z., Li, S., Shen, T., Fang, T., Quan, L.: Recurrent MVSNet for high-resolution multi-view stereo depth inference. In: IEEE Conference on Computer Vision and Pattern Recognition (2019)","DOI":"10.1109\/CVPR.2019.00567"},{"key":"33_CR67","doi-asserted-by":"crossref","unstructured":"Yao, Y., et al.: BlendedMVS: a large-scale dataset for generalized multi-view stereo networks. In: IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.00186"},{"key":"33_CR68","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"766","DOI":"10.1007\/978-3-030-58545-7_44","volume-title":"Computer Vision \u2013 ECCV 2020","author":"H Yi","year":"2020","unstructured":"Yi, H., et al.: Pyramid multi-view stereo net with self-adaptive view aggregation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12354, pp. 766\u2013782. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58545-7_44"},{"key":"33_CR69","doi-asserted-by":"crossref","unstructured":"Yu, Z., Gao, S.: Fast-MVSNet: sparse-to-dense multi-view stereo with learned propagation and gauss-newton refinement. In: IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.00202"},{"key":"33_CR70","unstructured":"Zhang, J., Yao, Y., Li, S., Luo, Z., Fang, T.: Visibility-aware multi-view stereo network. In: British Machine Vision Conference (2020)"},{"key":"33_CR71","doi-asserted-by":"crossref","unstructured":"Zhang, X., Hu, Y., Wang, H., Cao, X., Zhang, B.: Long-range attention network for multi-view stereo. In: IEEE Winter Conference on Applications of Computer Vision (2021)","DOI":"10.1109\/WACV48630.2021.00383"},{"key":"33_CR72","unstructured":"Zhang, Y., et al.: BEVerse: unified perception and prediction in birds-eye-view for vision-centric autonomous driving. arXiv preprint arXiv:2205.09743 (2022)"},{"key":"33_CR73","doi-asserted-by":"crossref","unstructured":"Zhao, M., Zhang, J., Zhang, C., Zhang, W.: Leveraging heterogeneous auxiliary tasks to assist crowd counting. In: IEEE Conference on Computer Vision and Pattern Recognition (2019)","DOI":"10.1109\/CVPR.2019.01302"},{"key":"33_CR74","doi-asserted-by":"crossref","unstructured":"Zhao, Z., Wu, Z., Zhuang, Y., Li, B., Jia, J.: Tracking objects as pixel-wise distributions. arXiv preprint arXiv:2207.05518 (2022)","DOI":"10.1007\/978-3-031-20047-2_5"},{"key":"33_CR75","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Tuzel, O.: VoxelNet: end-to-end learning for point cloud based 3D object detection. In: IEEE Conference on Computer Vision and Pattern Recognition (2018)","DOI":"10.1109\/CVPR.2018.00472"},{"key":"33_CR76","unstructured":"Zhu, J., Peng, B., Li, W., Shen, H., Zhang, Z., Lei, J.: Multi-view stereo with transformer. arXiv preprint arXiv:2112.00336 (2021)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19821-2_33","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,22]],"date-time":"2022-10-22T12:55:10Z","timestamp":1666443310000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19821-2_33"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031198205","9783031198212"],"references-count":76,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19821-2_33","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"23 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}