{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:18:58Z","timestamp":1772907538191,"version":"3.50.1"},"publisher-location":"Cham","reference-count":68,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031198236","type":"print"},{"value":"9783031198243","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19824-3_5","type":"book-chapter","created":{"date-parts":[[2022,11,10]],"date-time":"2022-11-10T21:14:32Z","timestamp":1668114872000},"page":"69-86","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["NeXT: Towards High Quality Neural Radiance Fields via\u00a0Multi-skip Transformer"],"prefix":"10.1007","author":[{"given":"Yunxiao","family":"Wang","sequence":"first","affiliation":[]},{"given":"Yanjie","family":"Li","sequence":"additional","affiliation":[]},{"given":"Peidong","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Tao","family":"Dai","sequence":"additional","affiliation":[]},{"given":"Shu-Tao","family":"Xia","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,11,11]]},"reference":[{"key":"5_CR1","unstructured":"Arandjelovi\u0107, R., Zisserman, A.: Nerf in detail: learning to sample for view synthesis. arXiv preprint arXiv:2106.05264 (2021)"},{"key":"5_CR2","doi-asserted-by":"crossref","unstructured":"Barron, J.T., Mildenhall, B., Tancik, M., Hedman, P., Martin-Brualla, R., Srinivasan, P.P.: Mip-NeRF: a multiscale representation for anti-aliasing neural radiance fields. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5855\u20135864 (2021)","DOI":"10.1109\/ICCV48922.2021.00580"},{"key":"5_CR3","unstructured":"Bi, S., et al.: Neural reflectance fields for appearance acquisition. arXiv preprint arXiv:2008.03824 (2020)"},{"key":"5_CR4","doi-asserted-by":"crossref","unstructured":"Boss, M., Braun, R., Jampani, V., Barron, J.T., Liu, C., Lensch, H.: Nerd: neural reflectance decomposition from image collections. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12684\u201312694 (2021)","DOI":"10.1109\/ICCV48922.2021.01245"},{"key":"5_CR5","unstructured":"Bradbury, J., et al.: Jax: composable transformations of python+ numpy programs 2018. http:\/\/github.com\/google\/jax 4, 16 (2020)"},{"key":"5_CR6","doi-asserted-by":"crossref","unstructured":"Buehler, C., Bosse, M., McMillan, L., Gortler, S., Cohen, M.: Unstructured lumigraph rendering. In: Proceedings of the 28th Annual Conference on Computer Graphics and Interactive Techniques, pp. 425\u2013432 (2001)","DOI":"10.1145\/383259.383309"},{"key":"5_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"key":"5_CR8","doi-asserted-by":"crossref","unstructured":"Chan, E.R., Monteiro, M., Kellnhofer, P., Wu, J., Wetzstein, G.: Pi-GAN: periodic implicit generative adversarial networks for 3d-aware image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5799\u20135809 (2021)","DOI":"10.1109\/CVPR46437.2021.00574"},{"key":"5_CR9","doi-asserted-by":"crossref","unstructured":"Dai, Z., Cai, B., Lin, Y., Chen, J.: UP-DETR: unsupervised pre-training for object detection with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1601\u20131610 (2021)","DOI":"10.1109\/CVPR46437.2021.00165"},{"key":"5_CR10","doi-asserted-by":"crossref","unstructured":"Dai, Z., Yang, Z., Yang, Y., Carbonell, J., Le, Q.V., Salakhutdinov, R.: Transformer-xl: attentive language models beyond a fixed-length context. arXiv preprint arXiv:1901.02860 (2019)","DOI":"10.18653\/v1\/P19-1285"},{"key":"5_CR11","doi-asserted-by":"crossref","unstructured":"Davis, A., Levoy, M., Durand, F.: Unstructured light fields. In: Computer Graphics Forum, vol. 31, pp. 305\u2013314. Wiley Online Library (2012)","DOI":"10.1111\/j.1467-8659.2012.03009.x"},{"key":"5_CR12","doi-asserted-by":"crossref","unstructured":"Debevec, P.E., Taylor, C.J., Malik, J.: Modeling and rendering architecture from photographs: a hybrid geometry-and image-based approach. In: Proceedings of the 23rd annual conference on Computer Graphics and Interactive Techniques, pp. 11\u201320 (1996)","DOI":"10.1145\/237170.237191"},{"key":"5_CR13","unstructured":"Deng, B., Barron, J.T., Srinivasan, P.P.: JaxNeRF: an efficient JAX implementation of NeRF (2020)"},{"key":"5_CR14","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"5_CR15","unstructured":"Dosovitskiy, A., et al.: An image is worth 16 x 16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"5_CR16","doi-asserted-by":"crossref","unstructured":"Flynn, J., et al.: DeepView: view synthesis with learned gradient descent. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2367\u20132376 (2019)","DOI":"10.1109\/CVPR.2019.00247"},{"key":"5_CR17","doi-asserted-by":"crossref","unstructured":"Gafni, G., Thies, J., Zollhofer, M., Nie\u00dfner, M.: Dynamic neural radiance fields for monocular 4d facial avatar reconstruction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8649\u20138658 (2021)","DOI":"10.1109\/CVPR46437.2021.00854"},{"key":"5_CR18","unstructured":"Gao, C., Shih, Y., Lai, W.S., Liang, C.K., Huang, J.B.: Portrait neural radiance fields from a single image. arXiv preprint arXiv:2012.05903 (2020)"},{"key":"5_CR19","doi-asserted-by":"crossref","unstructured":"Garbin, S.J., Kowalski, M., Johnson, M., Shotton, J., Valentin, J.: FastNeRF: high-fidelity neural rendering at 200fps. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 14346\u201314355 (2021)","DOI":"10.1109\/ICCV48922.2021.01408"},{"key":"5_CR20","doi-asserted-by":"crossref","unstructured":"Gortler, S.J., Grzeszczuk, R., Szeliski, R., Cohen, M.F.: The lumigraph. In: Proceedings of the 23rd Annual Conference on Computer Graphics and Interactive Techniques, pp. 43\u201354 (1996)","DOI":"10.1145\/237170.237200"},{"key":"5_CR21","doi-asserted-by":"crossref","unstructured":"Hedman, P., Srinivasan, P.P., Mildenhall, B., Barron, J.T., Debevec, P.: Baking neural radiance fields for real-time view synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5875\u20135884 (2021)","DOI":"10.1109\/ICCV48922.2021.00582"},{"key":"5_CR22","doi-asserted-by":"crossref","unstructured":"Jain, A., Tancik, M., Abbeel, P.: Putting nerf on a diet: semantically consistent few-shot view synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5885\u20135894 (2021)","DOI":"10.1109\/ICCV48922.2021.00583"},{"key":"5_CR23","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"5_CR24","doi-asserted-by":"crossref","unstructured":"Levoy, M., Hanrahan, P.: Light field rendering. In: Proceedings of the 23rd Annual Conference on Computer Graphics and Interactive Techniques, pp. 31\u201342 (1996)","DOI":"10.1145\/237170.237199"},{"key":"5_CR25","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: TokenPose: learning keypoint tokens for human pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11313\u201311322 (2021)","DOI":"10.1109\/ICCV48922.2021.01112"},{"key":"5_CR26","doi-asserted-by":"crossref","unstructured":"Li, Z., Niklaus, S., Snavely, N., Wang, O.: Neural scene flow fields for space-time view synthesis of dynamic scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6498\u20136508 (2021)","DOI":"10.1109\/CVPR46437.2021.00643"},{"key":"5_CR27","doi-asserted-by":"crossref","unstructured":"Lindell, D.B., Martel, J.N., Wetzstein, G.: AutoInt: automatic integration for fast neural volume rendering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14556\u201314565 (2021)","DOI":"10.1109\/CVPR46437.2021.01432"},{"key":"5_CR28","first-page":"15651","volume":"33","author":"L Liu","year":"2020","unstructured":"Liu, L., Gu, J., Zaw Lin, K., Chua, T.S., Theobalt, C.: Neural sparse voxel fields. Adv. Neural Inf. Process. Syst. 33, 15651\u201315663 (2020)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"5_CR29","doi-asserted-by":"crossref","unstructured":"Lombardi, S., Simon, T., Saragih, J., Schwartz, G., Lehrmann, A., Sheikh, Y.: Neural volumes: learning dynamic renderable volumes from images. arXiv preprint arXiv:1906.07751 (2019)","DOI":"10.1145\/3306346.3323020"},{"issue":"4","key":"5_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3306346.3322980","volume":"38","author":"B Mildenhall","year":"2019","unstructured":"Mildenhall, B., et al.: Local light field fusion: practical view synthesis with prescriptive sampling guidelines. ACM Trans. Graph. (TOG) 38(4), 1\u201314 (2019)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"5_CR31","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1007\/978-3-030-58452-8_24","volume-title":"Computer Vision","author":"B Mildenhall","year":"2020","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 405\u2013421. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_24"},{"key":"5_CR32","doi-asserted-by":"crossref","unstructured":"Neff, T., et al.: Donerf: towards real-time rendering of neural radiance fields using depth oracle networks. arXiv e-prints pp. arXiv-2103 (2021)","DOI":"10.1111\/cgf.14340"},{"key":"5_CR33","doi-asserted-by":"crossref","unstructured":"Niemeyer, M., Geiger, A.: Giraffe: representing scenes as compositional generative neural feature fields. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11453\u201311464 (2021)","DOI":"10.1109\/CVPR46437.2021.01129"},{"key":"5_CR34","doi-asserted-by":"crossref","unstructured":"Niemeyer, M., Mescheder, L., Oechsle, M., Geiger, A.: Differentiable volumetric rendering: Learning implicit 3d representations without 3d supervision. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3504\u20133515 (2020)","DOI":"10.1109\/CVPR42600.2020.00356"},{"key":"5_CR35","doi-asserted-by":"crossref","unstructured":"Ost, J., Mannan, F., Thuerey, N., Knodt, J., Heide, F.: Neural scene graphs for dynamic scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2856\u20132865 (2021)","DOI":"10.1109\/CVPR46437.2021.00288"},{"key":"5_CR36","doi-asserted-by":"crossref","unstructured":"Park, K., et al.: Nerfies: Deformable neural radiance fields. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5865\u20135874 (2021)","DOI":"10.1109\/ICCV48922.2021.00581"},{"key":"5_CR37","doi-asserted-by":"crossref","unstructured":"Pumarola, A., Corona, E., Pons-Moll, G., Moreno-Noguer, F.: D-nerf: neural radiance fields for dynamic scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10318\u201310327 (2021)","DOI":"10.1109\/CVPR46437.2021.01018"},{"key":"5_CR38","doi-asserted-by":"crossref","unstructured":"Raj, A., et al.: Pva: pixel-aligned volumetric avatars. arXiv preprint arXiv:2101.02697 (2021)","DOI":"10.1109\/CVPR46437.2021.01156"},{"key":"5_CR39","doi-asserted-by":"crossref","unstructured":"Rebain, D., Jiang, W., Yazdani, S., Li, K., Yi, K.M., Tagliasacchi, A.: Derf: decomposed radiance fields. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14153\u201314161 (2021)","DOI":"10.1109\/CVPR46437.2021.01393"},{"key":"5_CR40","doi-asserted-by":"crossref","unstructured":"Reiser, C., Peng, S., Liao, Y., Geiger, A.: KiloNeRF: speeding up neural radiance fields with thousands of tiny MLPs. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 14335\u201314345 (2021)","DOI":"10.1109\/ICCV48922.2021.01407"},{"key":"5_CR41","unstructured":"Rematas, K., Martin-Brualla, R., Ferrari, V.: ShaRF: shape-conditioned radiance fields from a single view. arXiv preprint arXiv:2102.08860 (2021)"},{"key":"5_CR42","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"623","DOI":"10.1007\/978-3-030-58529-7_37","volume-title":"Computer Vision","author":"G Riegler","year":"2020","unstructured":"Riegler, G., Koltun, V.: Free view synthesis. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12364, pp. 623\u2013640. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58529-7_37"},{"key":"5_CR43","doi-asserted-by":"crossref","unstructured":"Riegler, G., Koltun, V.: Stable view synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12216\u201312225 (2021)","DOI":"10.1109\/CVPR46437.2021.01204"},{"key":"5_CR44","first-page":"20154","volume":"33","author":"K Schwarz","year":"2020","unstructured":"Schwarz, K., Liao, Y., Niemeyer, M., Geiger, A.: Graf: Generative radiance fields for 3d-aware image synthesis. Adv. Neural Inf. Process. Syst. 33, 20154\u201320166 (2020)","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"2","key":"5_CR45","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1023\/A:1008176507526","volume":"35","author":"SM Seitz","year":"1999","unstructured":"Seitz, S.M., Dyer, C.R.: Photorealistic scene reconstruction by voxel coloring. Int. J. Comput. Vision 35(2), 151\u2013173 (1999)","journal-title":"Int. J. Comput. Vision"},{"key":"5_CR46","doi-asserted-by":"crossref","unstructured":"Sitzmann, V., Thies, J., Heide, F., Nie\u00dfner, M., Wetzstein, G., Zollhofer, M.: DeepVoxels: learning persistent 3d feature embeddings. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2437\u20132446 (2019)","DOI":"10.1109\/CVPR.2019.00254"},{"key":"5_CR47","unstructured":"Sitzmann, V., Zollh\u00f6fer, M., Wetzstein, G.: Scene representation networks: continuous 3d-structure-aware neural scene representations. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"5_CR48","doi-asserted-by":"crossref","unstructured":"Srinivasan, P.P., Deng, B., Zhang, X., Tancik, M., Mildenhall, B., Barron, J.T.: NeRV: neural reflectance and visibility fields for relighting and view synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7495\u20137504 (2021)","DOI":"10.1109\/CVPR46437.2021.00741"},{"key":"5_CR49","doi-asserted-by":"crossref","unstructured":"Srinivasan, P.P., Tucker, R., Barron, J.T., Ramamoorthi, R., Ng, R., Snavely, N.: Pushing the boundaries of view extrapolation with multiplane images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 175\u2013184 (2019)","DOI":"10.1109\/CVPR.2019.00026"},{"key":"5_CR50","doi-asserted-by":"crossref","unstructured":"Strudel, R., Garcia, R., Laptev, I., Schmid, C.: Segmenter: transformer for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7262\u20137272 (2021)","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"5_CR51","doi-asserted-by":"crossref","unstructured":"Tancik, M., et al.: Learned initializations for optimizing coordinate-based neural representations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2846\u20132855 (2021)","DOI":"10.1109\/CVPR46437.2021.00287"},{"key":"5_CR52","unstructured":"Tolstikhin, I.O., et al.: MLP-Mixer: an all-MLP architecture for vision. In: Advances in Neural Information Processing Systems, vol. 34 (2021)"},{"key":"5_CR53","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, pp. 10347\u201310357. PMLR (2021)"},{"key":"5_CR54","doi-asserted-by":"crossref","unstructured":"Trevithick, A., Yang, B.: GRF: learning a general radiance field for 3d representation and rendering. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15182\u201315192 (2021)","DOI":"10.1109\/ICCV48922.2021.01490"},{"key":"5_CR55","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing systems vol. 30 (2017)"},{"key":"5_CR56","doi-asserted-by":"publisher","unstructured":"Waechter, M., Moehrle, N., Goesele, M.: Let there be color! large-scale texturing of 3d reconstructions. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) Computer Vision - ECCV 2014, ECCV 2014. Lecture Notes in Computer Science, vol. 8693, pp. 836\u2013850. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_54","DOI":"10.1007\/978-3-319-10602-1_54"},{"key":"5_CR57","doi-asserted-by":"crossref","unstructured":"Wang, Q., et al.: IBRNet: learning multi-view image-based rendering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4690\u20134699 (2021)","DOI":"10.1109\/CVPR46437.2021.00466"},{"issue":"4","key":"5_CR58","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"5_CR59","doi-asserted-by":"crossref","unstructured":"Wood, D.N., et al.: Surface light fields for 3d photography. In: Proceedings of the 27th Annual Conference on Computer Graphics and Interactive Techniques, pp. 287\u2013296 (2000)","DOI":"10.1145\/344779.344925"},{"key":"5_CR60","unstructured":"Xie, E., Wang, W., Yu, Z., Anandkumar, A., Alvarez, J.M., Luo, P.: SegFormer: simple and efficient design for semantic segmentation with transformers. In: Advances in Neural Information Processing Systems, vol. 34 (2021)"},{"key":"5_CR61","unstructured":"Yang, S., Quan, Z., Nie, M., Yang, W.: Transpose: towards explainable human pose estimation by transformer. arXiv e-prints pp. arXiv-2012 (2020)"},{"key":"5_CR62","first-page":"2492","volume":"33","author":"L Yariv","year":"2020","unstructured":"Yariv, L., et al.: Multiview neural surface reconstruction by disentangling geometry and appearance. Adv. Neural Inf. Process. Syst. 33, 2492\u20132502 (2020)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"5_CR63","doi-asserted-by":"crossref","unstructured":"Yu, A., Li, R., Tancik, M., Li, H., Ng, R., Kanazawa, A.: Plenoctrees for real-time rendering of neural radiance fields. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5752\u20135761 (2021)","DOI":"10.1109\/ICCV48922.2021.00570"},{"key":"5_CR64","doi-asserted-by":"crossref","unstructured":"Yu, A., Ye, V., Tancik, M., Kanazawa, A.: pixelNeRF: neural radiance fields from one or few images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4578\u20134587 (2021)","DOI":"10.1109\/CVPR46437.2021.00455"},{"key":"5_CR65","doi-asserted-by":"crossref","unstructured":"Yuan, L., et al.: Tokens-to-Token ViT: training vision transformers from scratch on ImageNet. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 558\u2013567 (2021)","DOI":"10.1109\/ICCV48922.2021.00060"},{"key":"5_CR66","unstructured":"Yuan, Y., et al.: HRformer: high-resolution transformer for dense prediction. arXiv preprint arXiv:2110.09408 (2021)"},{"key":"5_CR67","doi-asserted-by":"crossref","unstructured":"Zhou, T., Tucker, R., Flynn, J., Fyffe, G., Snavely, N.: Stereo magnification: learning view synthesis using multiplane images. arXiv preprint arXiv:1805.09817 (2018)","DOI":"10.1145\/3197517.3201323"},{"key":"5_CR68","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159 (2020)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19824-3_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,12]],"date-time":"2022-11-12T00:05:43Z","timestamp":1668211543000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19824-3_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031198236","9783031198243"],"references-count":68,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19824-3_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"11 November 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}