{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T14:18:18Z","timestamp":1743085098471,"version":"3.40.3"},"publisher-location":"Cham","reference-count":101,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031736490"},{"type":"electronic","value":"9783031736506"}],"license":[{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73650-6_15","type":"book-chapter","created":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T18:16:43Z","timestamp":1732126603000},"page":"251-272","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Visual Relationship Transformation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9437-7639","authenticated-orcid":false,"given":"Xiaoyu","family":"Xu","sequence":"first","affiliation":[]},{"given":"Jiayan","family":"Qiu","sequence":"additional","affiliation":[]},{"given":"Baosheng","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Zhou","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,21]]},"reference":[{"key":"15_CR1","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1007\/s11263-016-0902-9","volume":"120","author":"H Aan\u00e6s","year":"2016","unstructured":"Aan\u00e6s, H., Jensen, R.R., Vogiatzis, G., Tola, E., Dahl, A.B.: Large-scale data for multiple-view stereopsis. Int. J. Comput. Vision 120, 153\u2013168 (2016)","journal-title":"Int. J. Comput. Vision"},{"key":"15_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1007\/978-3-030-58452-8_25","volume-title":"Computer Vision \u2013 ECCV 2020","author":"P Achlioptas","year":"2020","unstructured":"Achlioptas, P., Abdelreheem, A., Xia, F., Elhoseiny, M., Guibas, L.: ReferIt3D: neural listeners for fine-grained 3d object identification in real-world scenes. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020, Part I. LNCS, vol. 12346, pp. 422\u2013440. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_25"},{"key":"15_CR3","unstructured":"Azizian, W., Lelarge, M.: Expressive power of invariant and equivariant graph neural networks. arXiv preprint arXiv:2006.15646 (2020)"},{"issue":"1","key":"15_CR4","doi-asserted-by":"publisher","first-page":"2453","DOI":"10.1038\/s41467-022-29939-5","volume":"13","author":"S Batzner","year":"2022","unstructured":"Batzner, S., et al.: E (3)-equivariant graph neural networks for data-efficient and accurate interatomic potentials. Nat. Commun. 13(1), 2453 (2022)","journal-title":"Nat. Commun."},{"key":"15_CR5","unstructured":"Bronstein, M.M., Bruna, J., Cohen, T., Veli\u010dkovi\u0107, P.: Geometric deep learning: grids, groups, graphs, geodesics, and gauges. arXiv preprint arXiv:2104.13478 (2021)"},{"key":"15_CR6","doi-asserted-by":"crossref","unstructured":"Cai, B., Huang, J., Jia, R., Lv, C., Fu, H.: Neuda: neural deformable anchor for high-fidelity implicit surface reconstruction. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.00819"},{"key":"15_CR7","doi-asserted-by":"crossref","unstructured":"Chang, A., Savva, M., Manning, C.D.: Learning spatial knowledge for text to 3d scene generation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 2028\u20132038 (2014)","DOI":"10.3115\/v1\/D14-1217"},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Chen, S., Shi, Z., Mettes, P., Snoek, C.G.: Social fabric: Tubelet compositions for video relation detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13485\u201313494 (2021)","DOI":"10.1109\/ICCV48922.2021.01323"},{"key":"15_CR9","doi-asserted-by":"crossref","unstructured":"Chen, T., Yu, W., Chen, R., Lin, L.: Knowledge-embedded routing network for scene graph generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6163\u20136171 (2019)","DOI":"10.1109\/CVPR.2019.00632"},{"key":"15_CR10","unstructured":"Cohen, T., Weiler, M., Kicanaoglu, B., Welling, M.: Gauge equivariant convolutional networks and the icosahedral CNN. In: International Conference on Machine Learning, pp. 1321\u20131330. PMLR (2019)"},{"key":"15_CR11","unstructured":"Cohen, T., Welling, M.: Group equivariant convolutional networks. In: International Conference on Machine Learning, pp. 2990\u20132999. PMLR (2016)"},{"key":"15_CR12","unstructured":"Cohen, T.S., Geiger, M., Weiler, M.: Intertwiners between induced representations (with applications to the theory of equivariant neural networks). arXiv preprint arXiv:1803.10743 (2018)"},{"key":"15_CR13","unstructured":"Cohen, T.S., Geiger, M., Weiler, M.: A general theory of equivariant CNNs on homogeneous spaces. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"15_CR14","unstructured":"Cohen, T.S., Welling, M.: Steerable cnns. arXiv preprint arXiv:1612.08498 (2016)"},{"key":"15_CR15","doi-asserted-by":"crossref","unstructured":"Dai, B., Zhang, Y., Lin, D.: Detecting visual relationships with deep relational networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3076\u20133086 (2017)","DOI":"10.1109\/CVPR.2017.352"},{"key":"15_CR16","doi-asserted-by":"crossref","unstructured":"Deng, C., Litany, O., Duan, Y., Poulenard, A., Tagliasacchi, A., Guibas, L.J.: Vector neurons: a general framework for so (3)-equivariant networks. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12200\u201312209 (2021)","DOI":"10.1109\/ICCV48922.2021.01198"},{"issue":"1","key":"15_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11263-011-0439-x","volume":"95","author":"C Desai","year":"2011","unstructured":"Desai, C., Ramanan, D., Fowlkes, C.C.: Discriminative models for multi-class object layout. Int. J. Comput. Vision 95(1), 1\u201312 (2011)","journal-title":"Int. J. Comput. Vision"},{"issue":"6394","key":"15_CR18","doi-asserted-by":"publisher","first-page":"1204","DOI":"10.1126\/science.aar6170","volume":"360","author":"SA Eslami","year":"2018","unstructured":"Eslami, S.A., et al.: Neural scene representation and rendering. Science 360(6394), 1204\u20131210 (2018)","journal-title":"Science"},{"issue":"12","key":"15_CR19","doi-asserted-by":"publisher","first-page":"2854","DOI":"10.1109\/TPAMI.2013.168","volume":"35","author":"A Farhadi","year":"2013","unstructured":"Farhadi, A., Sadeghi, M.A.: Phrasal recognition. IEEE Trans. Pattern Anal. Mach. Intell. 35(12), 2854\u20132865 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"15_CR20","doi-asserted-by":"crossref","unstructured":"Fisher, M., Savva, M., Hanrahan, P.: Characterizing structural relationships in scenes using graph kernels. In: ACM SIGGRAPH 2011 Papers, pp. 1\u201312 (2011)","DOI":"10.1145\/1964921.1964929"},{"key":"15_CR21","doi-asserted-by":"crossref","unstructured":"Fu, H., et\u00a0al.: 3d-front: 3d furnished rooms with layouts and semantics. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10933\u201310942 (2021)","DOI":"10.1109\/ICCV48922.2021.01075"},{"key":"15_CR22","unstructured":"Gehring, J., Auli, M., Grangier, D., Yarats, D., Dauphin, Y.N.: Convolutional sequence to sequence learning. In: International Conference on Machine Learning (ICML), pp. 1243\u20131252. PMLR (2017)"},{"key":"15_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1007\/978-3-540-88682-2_3","volume-title":"Computer Vision \u2013 ECCV 2008","author":"A Gupta","year":"2008","unstructured":"Gupta, A., Davis, L.S.: Beyond nouns: exploiting prepositions and comparative adjectives for learning visual classifiers. In: Forsyth, D., Torr, P., Zisserman, A. (eds.) ECCV 2008. LNCS, vol. 5302, pp. 16\u201329. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-88682-2_3"},{"key":"15_CR24","doi-asserted-by":"crossref","unstructured":"Hu, D., Zhang, Z., Hou, T., Liu, T., Fu, H., Gong, M.: Multiscale representation for real-time anti-aliasing neural rendering. arXiv preprint arXiv:2304.10075 (2023)","DOI":"10.1109\/ICCV51070.2023.01629"},{"key":"15_CR25","unstructured":"Insafutdinov, E., Dosovitskiy, A.: Unsupervised learning of shape and pose with differentiable point clouds. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"key":"15_CR26","volume-title":"Efficient Monte Carlo Methods for Light Transport in Scattering Media","author":"W Jarosz","year":"2008","unstructured":"Jarosz, W.: Efficient Monte Carlo Methods for Light Transport in Scattering Media. University of California, San Diego (2008)"},{"key":"15_CR27","doi-asserted-by":"crossref","unstructured":"Jensen, R., Dahl, A., Vogiatzis, G., Tola, E., Aan\u00e6s, H.: Large scale multi-view stereopsis evaluation. In: 2014 IEEE Conference on Computer Vision and Pattern Recognition, pp. 406\u2013413. IEEE (2014)","DOI":"10.1109\/CVPR.2014.59"},{"key":"15_CR28","doi-asserted-by":"crossref","unstructured":"Ji, W., et al.: Vidvrd 2021: the third grand challenge on video relation detection. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 4779\u20134783 (2021)","DOI":"10.1145\/3474085.3479232"},{"key":"15_CR29","doi-asserted-by":"crossref","unstructured":"Johnson, J., et al.: Image retrieval using scene graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3668\u20133678 (2015)","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"15_CR30","doi-asserted-by":"crossref","unstructured":"Kato, H., Ushiku, Y., Harada, T.: Neural 3d mesh renderer. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3907\u20133916 (2018)","DOI":"10.1109\/CVPR.2018.00411"},{"key":"15_CR31","unstructured":"Keriven, N., Peyr\u00e9, G.: Universal invariant and equivariant graph neural networks. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"15_CR32","doi-asserted-by":"crossref","unstructured":"Kopanas, G., Philip, J., Leimk\u00fchler, T., Drettakis, G.: Point-based neural rendering with per-view optimization. In: Computer Graphics Forum, vol.\u00a040, pp. 29\u201343. Wiley Online Library (2021)","DOI":"10.1111\/cgf.14339"},{"key":"15_CR33","doi-asserted-by":"crossref","unstructured":"Krishna, R., Chami, I., Bernstein, M., Fei-Fei, L.: Referring relationships. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6867\u20136876 (2018)","DOI":"10.1109\/CVPR.2018.00718"},{"key":"15_CR34","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna, R., et al.: Visual genome: connecting language and vision using crowdsourced dense image annotations. Int. J. Comput. Vision 123, 32\u201373 (2017)","journal-title":"Int. J. Comput. Vision"},{"key":"15_CR35","doi-asserted-by":"crossref","unstructured":"Lassner, C., Zollhofer, M.: Pulsar: efficient sphere-based neural rendering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1440\u20131449 (2021)","DOI":"10.1109\/CVPR46437.2021.00149"},{"key":"15_CR36","doi-asserted-by":"crossref","unstructured":"Levoy, M., Hanrahan, P.: Light field rendering. In: Proceedings of the 23rd Annual Conference on Computer Graphics and Interactive Techniques, pp. 31\u201342 (1996)","DOI":"10.1145\/237170.237199"},{"key":"15_CR37","doi-asserted-by":"crossref","unstructured":"Li, R., Zhang, S., Wan, B., He, X.: Bipartite graph network with adaptive message passing for unbiased scene graph generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11109\u201311119 (2021)","DOI":"10.1109\/CVPR46437.2021.01096"},{"key":"15_CR38","doi-asserted-by":"crossref","unstructured":"Li, Y., Yang, X., Shang, X., Chua, T.S.: Interventional video relation detection. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 4091\u20134099 (2021)","DOI":"10.1145\/3474085.3475540"},{"key":"15_CR39","doi-asserted-by":"crossref","unstructured":"Li, Y., Ouyang, W., Wang, X., Tang, X.: Vip-cnn: Visual phrase guided convolutional neural network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1347\u20131356 (2017)","DOI":"10.1109\/CVPR.2017.766"},{"key":"15_CR40","doi-asserted-by":"crossref","unstructured":"Liang, K., Guo, Y., Chang, H., Chen, X.: Visual relationship detection with deep structural ranking. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.12274"},{"key":"15_CR41","doi-asserted-by":"crossref","unstructured":"Liang, X., Lee, L., Xing, E.P.: Deep variation-structured reinforcement learning for visual relationship and attribute detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 848\u2013857 (2017)","DOI":"10.1109\/CVPR.2017.469"},{"key":"15_CR42","doi-asserted-by":"crossref","unstructured":"Lin, C.H., Kong, C., Lucey, S.: Learning efficient point cloud generation for dense 3d object reconstruction. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.12278"},{"key":"15_CR43","doi-asserted-by":"crossref","unstructured":"Lin, X., Ding, C., Zeng, J., Tao, D.: Gps-net: graph property sensing network for scene graph generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3746\u20133753 (2020)","DOI":"10.1109\/CVPR42600.2020.00380"},{"key":"15_CR44","unstructured":"Liu, B., Dong, Q., Hu, Z.: Hardness sampling for self-training based transductive zero-shot learning. In: CVPR, pp. 16499\u201316508 (2021)"},{"key":"15_CR45","unstructured":"Liu, B., Hu, L., Hu, Z., Dong, Q.: Hardboost: boosting zero-shot learning with hard classes. arXiv preprint arXiv:2201.05479 (2022)"},{"key":"15_CR46","doi-asserted-by":"crossref","unstructured":"Liu, C., Jin, Y., Xu, K., Gong, G., Mu, Y.: Beyond short-term snippet: video relation detection with spatio-temporal global context. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10840\u201310849 (2020)","DOI":"10.1109\/CVPR42600.2020.01085"},{"key":"15_CR47","doi-asserted-by":"crossref","unstructured":"Lombardi, S., Simon, T., Saragih, J., Schwartz, G., Lehrmann, A., Sheikh, Y.: Neural volumes: learning dynamic renderable volumes from images. arXiv preprint arXiv:1906.07751 (2019)","DOI":"10.1145\/3306346.3323020"},{"key":"15_CR48","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/978-3-319-10584-0_11","volume-title":"Computer Vision \u2013 ECCV 2014","author":"MM Loper","year":"2014","unstructured":"Loper, M.M., Black, M.J.: OpenDR: an approximate differentiable renderer. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014, Part VII. LNCS, vol. 8695, pp. 154\u2013169. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10584-0_11"},{"key":"15_CR49","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"852","DOI":"10.1007\/978-3-319-46448-0_51","volume-title":"Computer Vision \u2013 ECCV 2016","author":"C Lu","year":"2016","unstructured":"Lu, C., Krishna, R., Bernstein, M., Fei-Fei, L.: Visual relationship detection with language priors. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016, Part I. LNCS, vol. 9905, pp. 852\u2013869. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_51"},{"key":"15_CR50","doi-asserted-by":"crossref","unstructured":"Ma, L., et al.: Deblur-nerf: neural radiance fields from blurry images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12861\u201312870 (2022)","DOI":"10.1109\/CVPR52688.2022.01252"},{"key":"15_CR51","doi-asserted-by":"crossref","unstructured":"Mildenhall, B., Hedman, P., Martin-Brualla, R., Srinivasan, P.P., Barron, J.T.: Nerf in the dark: High dynamic range view synthesis from noisy raw images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16190\u201316199 (2022)","DOI":"10.1109\/CVPR52688.2022.01571"},{"issue":"1","key":"15_CR52","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: Nerf: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"15_CR53","doi-asserted-by":"crossref","unstructured":"Niemeyer, M., Mescheder, L., Oechsle, M., Geiger, A.: Differentiable volumetric rendering: learning implicit 3d representations without 3d supervision. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3504\u20133515 (2020)","DOI":"10.1109\/CVPR42600.2020.00356"},{"key":"15_CR54","doi-asserted-by":"crossref","unstructured":"Plummer, B.A., Mallya, A., Cervantes, C.M., Hockenmaier, J., Lazebnik, S.: Phrase localization and visual relationship detection with comprehensive image-language cues. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1928\u20131937 (2017)","DOI":"10.1109\/ICCV.2017.213"},{"key":"15_CR55","doi-asserted-by":"crossref","unstructured":"Qi, M., Li, W., Yang, Z., Wang, Y., Luo, J.: Attentive relational networks for mapping images to scene graphs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3957\u20133966 (2019)","DOI":"10.1109\/CVPR.2019.00408"},{"key":"15_CR56","doi-asserted-by":"crossref","unstructured":"Qian, X., Zhuang, Y., Li, Y., Xiao, S., Pu, S., Xiao, J.: Video relation detection with spatio-temporal graph. In: Proceedings of the 27th ACM International Conference on Multimedia, pp. 84\u201393 (2019)","DOI":"10.1145\/3343031.3351058"},{"issue":"2","key":"15_CR57","doi-asserted-by":"publisher","first-page":"745","DOI":"10.1109\/TPAMI.2019.2934052","volume":"43","author":"J Qiu","year":"2019","unstructured":"Qiu, J., Wang, X., Fua, P., Tao, D.: Matching seqlets: an unsupervised approach for locality preserving sequence matching. IEEE Trans. Pattern Anal. Mach. Intell. 43(2), 745\u2013752 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"15_CR58","doi-asserted-by":"crossref","unstructured":"Qiu, J., Wang, X., Maybank, S.J., Tao, D.: World from blur. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8493\u20138504 (2019)","DOI":"10.1109\/CVPR.2019.00869"},{"key":"15_CR59","doi-asserted-by":"crossref","unstructured":"Qiu, J., Yang, Y., Wang, X., Tao, D.: Hallucinating visual instances in total absentia. In: European Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-58558-7_16"},{"key":"15_CR60","doi-asserted-by":"crossref","unstructured":"Qiu, J., Yang, Y., Wang, X., Tao, D.: Scene essence. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8322\u20138333 (2021)","DOI":"10.1109\/CVPR46437.2021.00822"},{"key":"15_CR61","doi-asserted-by":"crossref","unstructured":"Rasouli, A., Kotseruba, I., Kunic, T., Tsotsos, J.K.: Pie: a large-scale dataset and models for pedestrian intention estimation and trajectory prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6262\u20136271 (2019)","DOI":"10.1109\/ICCV.2019.00636"},{"key":"15_CR62","doi-asserted-by":"crossref","unstructured":"Rematas, K., et al.: Urban radiance fields. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12932\u201312942 (2022)","DOI":"10.1109\/CVPR52688.2022.01259"},{"issue":"4","key":"15_CR63","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2766899","volume":"34","author":"P Ren","year":"2015","unstructured":"Ren, P., Dong, Y., Lin, S., Tong, X., Guo, B.: Image based relighting using neural networks. ACM Trans. Graph. (ToG) 34(4), 1\u201312 (2015)","journal-title":"ACM Trans. Graph. (ToG)"},{"issue":"6","key":"15_CR64","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2016","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39(6), 1137\u20131149 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"15_CR65","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015, Part III. LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28"},{"key":"15_CR66","doi-asserted-by":"crossref","unstructured":"Roveri, R., Rahmann, L., Oztireli, C., Gross, M.: A network architecture for point cloud classification via automatic depth images generation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4176\u20134184 (2018)","DOI":"10.1109\/CVPR.2018.00439"},{"issue":"6","key":"15_CR67","doi-asserted-by":"publisher","first-page":"869","DOI":"10.1016\/j.cag.2004.08.014","volume":"28","author":"M Sainz","year":"2004","unstructured":"Sainz, M., Pajarola, R.: Point-based rendering techniques. Comput. Graph. 28(6), 869\u2013879 (2004)","journal-title":"Comput. Graph."},{"key":"15_CR68","doi-asserted-by":"crossref","unstructured":"Schonberger, J.L., Frahm, J.M.: Structure-from-motion revisited. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4104\u20134113 (2016)","DOI":"10.1109\/CVPR.2016.445"},{"key":"15_CR69","doi-asserted-by":"crossref","unstructured":"Shang, X., Li, Y., Xiao, J., Ji, W., Chua, T.S.: Video visual relation detection via iterative inference. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 3654\u20133663 (2021)","DOI":"10.1145\/3474085.3475263"},{"key":"15_CR70","doi-asserted-by":"crossref","unstructured":"Shang, X., Ren, T., Guo, J., Zhang, H., Chua, T.S.: Video visual relation detection. In: Proceedings of the 25th ACM International Conference on Multimedia, pp. 1300\u20131308 (2017)","DOI":"10.1145\/3123266.3123380"},{"key":"15_CR71","unstructured":"Sitzmann, V., Zollh\u00f6fer, M., Wetzstein, G.: Scene representation networks: Continuous 3d-structure-aware neural scene representations. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"15_CR72","doi-asserted-by":"crossref","unstructured":"Su, Z., Shang, X., Chen, J., Jiang, Y.G., Qiu, Z., Chua, T.S.: Video relation detection via multiple hypothesis association. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 3127\u20133135 (2020)","DOI":"10.1145\/3394171.3413764"},{"key":"15_CR73","doi-asserted-by":"crossref","unstructured":"Sun, X., Ren, T., Zi, Y., Wu, G.: Video visual relation detection via multi-modal feature fusion. In: Proceedings of the 27th ACM International Conference on Multimedia, pp. 2657\u20132661 (2019)","DOI":"10.1145\/3343031.3356076"},{"key":"15_CR74","doi-asserted-by":"crossref","unstructured":"Tang, K., Niu, Y., Huang, J., Shi, J., Zhang, H.: Unbiased scene graph generation from biased training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3716\u20133725 (2020)","DOI":"10.1109\/CVPR42600.2020.00377"},{"key":"15_CR75","doi-asserted-by":"crossref","unstructured":"Tewari, A., et\u00a0al.: State of the art on neural rendering. In: Computer Graphics Forum, vol.\u00a039, pp. 701\u2013727. Wiley Online Library (2020)","DOI":"10.1111\/cgf.14022"},{"key":"15_CR76","doi-asserted-by":"crossref","unstructured":"Tsai, Y.H.H., Divvala, S., Morency, L.P., Salakhutdinov, R., Farhadi, A.: Video relationship reasoning using gated spatio-temporal energy graph. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10424\u201310433 (2019)","DOI":"10.1109\/CVPR.2019.01067"},{"key":"15_CR77","doi-asserted-by":"crossref","unstructured":"Turki, H., Ramanan, D., Satyanarayanan, M.: Mega-nerf: Scalable construction of large-scale nerfs for virtual fly-throughs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12922\u201312931 (2022)","DOI":"10.1109\/CVPR52688.2022.01258"},{"key":"15_CR78","doi-asserted-by":"crossref","unstructured":"Wang, C., et al.: Nerf-sr: high quality neural radiance fields using supersampling. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 6445\u20136454 (2022)","DOI":"10.1145\/3503161.3547808"},{"key":"15_CR79","doi-asserted-by":"crossref","unstructured":"Wang, J., Dong, Y., Tong, X., Lin, Z., Guo, B.: Kernel nystr\u00f6m method for light transport. In: ACM SIGGRAPH 2009 Papers, pp. 1\u201310 (2009)","DOI":"10.1145\/1576246.1531335"},{"issue":"4","key":"15_CR80","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"15_CR81","unstructured":"Weiler, M., Cesa, G.: General e (2)-equivariant steerable cnns. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"15_CR82","unstructured":"Weiler, M., Geiger, M., Welling, M., Boomsma, W., Cohen, T.S.: 3d steerable cnns: learning rotationally equivariant features in volumetric data. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"key":"15_CR83","doi-asserted-by":"crossref","unstructured":"Wiles, O., Gkioxari, G., Szeliski, R., Johnson, J.: Synsin: end-to-end view synthesis from a single image. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7467\u20137477 (2020)","DOI":"10.1109\/CVPR42600.2020.00749"},{"key":"15_CR84","unstructured":"Xiangli, Y., et al.: Citynerf: building nerf at city scale. arXiv preprint arXiv:2112.05504 (2021)"},{"key":"15_CR85","doi-asserted-by":"publisher","unstructured":"Xu, X., Qiu, J., Wang, X., Wang, Z.: Relationship spatialization for depth estimation. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision - ECCV 2022, ECCV 2022, LNCS, vol. 13697, pp. 615\u2013637. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19836-6_35","DOI":"10.1007\/978-3-031-19836-6_35"},{"key":"15_CR86","doi-asserted-by":"crossref","unstructured":"Yang, J., Lu, J., Lee, S., Batra, D., Parikh, D.: Graph r-cnn for scene graph generation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 670\u2013685 (2018)","DOI":"10.1007\/978-3-030-01246-5_41"},{"key":"15_CR87","doi-asserted-by":"crossref","unstructured":"Yang, Y., Qiu, J., Song, M., Tao, D., Wang, X.: Distilling knowledge from graph convolutional networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2020","DOI":"10.1109\/CVPR42600.2020.00710"},{"key":"15_CR88","doi-asserted-by":"crossref","unstructured":"Yao, B., Fei-Fei, L.: Modeling mutual context of object and human pose in human-object interaction activities. In: 2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, pp. 17\u201324. IEEE (2010)","DOI":"10.1109\/CVPR.2010.5540235"},{"key":"15_CR89","doi-asserted-by":"crossref","unstructured":"Yin, G., Sheng, L., Liu, B., Yu, N., Wang, X., Shao, J., Loy, C.C.: Zoom-net: mining deep feature interactions for visual relationship recognition. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 322\u2013338 (2018)","DOI":"10.1007\/978-3-030-01219-9_20"},{"key":"15_CR90","doi-asserted-by":"crossref","unstructured":"Yu, A., Ye, V., Tancik, M., Kanazawa, A.: pixelnerf: neural radiance fields from one or few images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4578\u20134587 (2021)","DOI":"10.1109\/CVPR46437.2021.00455"},{"key":"15_CR91","doi-asserted-by":"crossref","unstructured":"Yu, A., Ye, V., Tancik, M., Kanazawa, A.: pixelnerf: neural radiance fields from one or few images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4578\u20134587, June 2021","DOI":"10.1109\/CVPR46437.2021.00455"},{"key":"15_CR92","doi-asserted-by":"crossref","unstructured":"Yu, H., Li, R., Xie, S., Qiu, J.: Shadow-enlightened image outpainting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7850\u20137860 (2024)","DOI":"10.1109\/CVPR52733.2024.00750"},{"key":"15_CR93","doi-asserted-by":"crossref","unstructured":"Yu, R., Li, A., Morariu, V.I., Davis, L.S.: Visual relationship detection with internal and external linguistic knowledge distillation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1974\u20131982 (2017)","DOI":"10.1109\/ICCV.2017.121"},{"key":"15_CR94","doi-asserted-by":"crossref","unstructured":"Zhang, H., Kyaw, Z., Chang, S.F., Chua, T.S.: Visual translation embedding network for visual relation detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5532\u20135540 (2017)","DOI":"10.1109\/CVPR.2017.331"},{"key":"15_CR95","doi-asserted-by":"crossref","unstructured":"Zhang, J., Kalantidis, Y., Rohrbach, M., Paluri, M., Elgammal, A., Elhoseiny, M.: Large-scale visual relationship understanding. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a033, pp. 9185\u20139194 (2019)","DOI":"10.1609\/aaai.v33i01.33019185"},{"key":"15_CR96","doi-asserted-by":"publisher","unstructured":"Zhang, J., et al.: Digging into radiance grid for real-time view synthesis with detail preservation. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision - ECCV 2022, ECCV 2022, LNCS, vol. 13675, pp. 724\u2013740. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19784-0_42","DOI":"10.1007\/978-3-031-19784-0_42"},{"key":"15_CR97","doi-asserted-by":"crossref","unstructured":"Zheng, S., Chen, S., Jin, Q.: Vrdformer: end-to-end video visual relation detection with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18836\u201318846 (2022)","DOI":"10.1109\/CVPR52688.2022.01829"},{"key":"15_CR98","doi-asserted-by":"crossref","unstructured":"Zhou, T., Tucker, R., Flynn, J., Fyffe, G., Snavely, N.: Stereo magnification: learning view synthesis using multiplane images. arXiv preprint arXiv:1805.09817 (2018)","DOI":"10.1145\/3197517.3201323"},{"key":"15_CR99","doi-asserted-by":"crossref","unstructured":"Zhuang, B., Liu, L., Shen, C., Reid, I.: Towards context-aware interaction recognition for visual relationship detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 589\u2013598 (2017)","DOI":"10.1109\/ICCV.2017.71"},{"key":"15_CR100","doi-asserted-by":"crossref","unstructured":"Zwicker, M., Pfister, H., Van\u00a0Baar, J., Gross, M.: Surface splatting. In: Proceedings of the 28th Annual Conference on Computer Graphics and Interactive Techniques, pp. 371\u2013378 (2001)","DOI":"10.1145\/383259.383300"},{"issue":"3","key":"15_CR101","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1109\/TVCG.2002.1021576","volume":"8","author":"M Zwicker","year":"2002","unstructured":"Zwicker, M., Pfister, H., Van Baar, J., Gross, M.: Ewa splatting. IEEE Trans. Visual Comput. Graphics 8(3), 223\u2013238 (2002)","journal-title":"IEEE Trans. Visual Comput. Graphics"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73650-6_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T19:06:16Z","timestamp":1732129576000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73650-6_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,21]]},"ISBN":["9783031736490","9783031736506"],"references-count":101,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73650-6_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,21]]},"assertion":[{"value":"21 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}