{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,18]],"date-time":"2025-10-18T11:00:03Z","timestamp":1760785203512,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":36,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819785018"},{"type":"electronic","value":"9789819785025"}],"license":[{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8502-5_4","type":"book-chapter","created":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T14:03:04Z","timestamp":1730383384000},"page":"45-58","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["PanoDthNet: Depth Estimation Based on Indoor and Outdoor Panoramic Images"],"prefix":"10.1007","author":[{"given":"Jieyuan","family":"Cai","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingheng","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qingling","family":"Chang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yan","family":"Cui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,1]]},"reference":[{"key":"4_CR1","unstructured":"Lavreniuk, M., Bhat, S.F., M\u00fcller, M., Wonka, P.: Evp: Enhanced visual perception using inverse multi-attentive feature refinement and regularized image-text alignment (2023). arXiv:2312.08548"},{"key":"4_CR2","doi-asserted-by":"crossref","unstructured":"Yun, I., Shin, C., Lee, H., Lee, H.J., Rhee, C.E.: Egformer: equirectangular geometry-biased transformer for 360 depth estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6101\u20136112 (2023)","DOI":"10.1109\/ICCV51070.2023.00561"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Jin, L., Xu, Y., Zheng, J., Zhang, J., Tang, R., Xu, S., Yu, J., Gao, S.: Geometric structure based and regularized depth estimation from 360 indoor imagery. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 889\u2013898 (2020)","DOI":"10.1109\/CVPR42600.2020.00097"},{"key":"4_CR4","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et\u00a0al.: An image is worth 16x16 words: Transformers for image recognition at scale (2020). arXiv:2010.11929"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Wang, F.E., Yeh, Y.H., Sun, M., Chiu, W.C., Tsai, Y.H.: Bifuse: monocular 360 depth estimation via bi-projection fusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 462\u2013471 (2020)","DOI":"10.1109\/CVPR42600.2020.00054"},{"issue":"2","key":"4_CR6","doi-asserted-by":"publisher","first-page":"1519","DOI":"10.1109\/LRA.2021.3058957","volume":"6","author":"H Jiang","year":"2021","unstructured":"Jiang, H., Sheng, Z., Zhu, S., Dong, Z., Huang, R.: Unifuse: unidirectional fusion for 360 panorama depth estimation. IEEE Robot Autom Lett 6(2), 1519\u20131526 (2021)","journal-title":"IEEE Robot Autom Lett"},{"key":"4_CR7","doi-asserted-by":"crossref","unstructured":"Zhu, S., Brazil, G., Liu, X.: The edge of depth: Explicit constraints between segmentation and depth. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13116\u201313125 (2020)","DOI":"10.1109\/CVPR42600.2020.01313"},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Yun, I., Lee, H.J., Rhee, C.E.: Improving 360 monocular depth estimation via non-local dense prediction transformer and joint supervised and self-supervised learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a036, pp. 3224\u20133233 (2022)","DOI":"10.1609\/aaai.v36i3.20231"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Zhu, X., Hu, H., Lin, S., Dai, J.: Deformable convnets v2: More deformable, better results. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9308\u20139316 (2019)","DOI":"10.1109\/CVPR.2019.00953"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7794\u20137803 (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"4_CR11","doi-asserted-by":"crossref","unstructured":"Tang, F., Xu, Z., Huang, Q., Wang, J., Hou, X., Su, J., Liu, J.: Duat: dual-aggregation transformer network for medical image segmentation. In: Chinese Conference on Pattern Recognition and Computer Vision (PRCV), pp. 343\u2013356. Springer (2023)","DOI":"10.1007\/978-981-99-8469-5_27"},{"key":"4_CR12","doi-asserted-by":"crossref","unstructured":"Chang, A., Dai, A., Funkhouser, T., Halber, M., Niessner, M., Savva, M., Song, S., Zeng, A., Zhang, Y.: Matterport3d: Learning from rgb-d data in indoor environments (2017). arXiv:1709.06158","DOI":"10.1109\/3DV.2017.00081"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Albanis, G., Zioulis, N., Drakoulis, P., Gkitsas, V., Sterzentsenko, V., Alvarez, F., Zarpalas, D., Daras, P.: Pano3d: a holistic benchmark and a solid baseline for 360 depth estimation. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), pp. 3722\u20133732. IEEE (2021)","DOI":"10.1109\/CVPRW53098.2021.00413"},{"key":"4_CR14","unstructured":"Armeni, I., Sax, S., Zamir, A.R., Savarese, S.: Joint 2d-3d-semantic data for indoor scene understanding (2017). arXiv:1702.01105"},{"key":"4_CR15","unstructured":"Eigen, D., Puhrsch, C., Fergus, R.: Depth map prediction from a single image using a multi-scale deep network. Adv. Neural Inf. Process. Syst. 27 (2014)"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Zioulis, N., Karakottas, A., Zarpalas, D., Daras, P.: Omnidepth: Dense depth estimation for indoors spherical panoramas. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 448\u2013465 (2018)","DOI":"10.1007\/978-3-030-01231-1_28"},{"key":"4_CR17","unstructured":"Bhat, S.F., Alhashim, I., Wonka, P.: Adabins: Depth estimation using adaptive bins. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4009\u20134018 (2021)"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Zeng, W., Karaoglu, S., Gevers, T.: Joint 3d layout and depth prediction from a single indoor panorama image. In: European Conference on Computer Vision, pp. 666\u2013682. Springer (2020)","DOI":"10.1007\/978-3-030-58517-4_39"},{"key":"4_CR19","doi-asserted-by":"crossref","unstructured":"Eder, M., Moulon, P., Guan, L.: Pano popups: Indoor 3d reconstruction with a plane-aware network. In: 2019 International Conference on 3D Vision (3DV), pp. 76\u201384. IEEE (2019)","DOI":"10.1109\/3DV.2019.00018"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"Zioulis, N., Karakottas, A., Zarpalas, D., Alvarez, F., Daras, P.: Spherical view synthesis for self-supervised 360 depth estimation. In: 2019 International Conference on 3D Vision (3DV), pp. 690\u2013699. IEEE (2019)","DOI":"10.1109\/3DV.2019.00081"},{"key":"4_CR21","doi-asserted-by":"crossref","unstructured":"Zhou, K., Wang, K., Yang, K.: Padenet: An efficient and robust panoramic monocular depth estimation network for outdoor scenes. In: 2020 IEEE 23rd International Conference on Intelligent Transportation Systems (ITSC), pp.\u00a01\u20136. IEEE (2020)","DOI":"10.1109\/ITSC45102.2020.9294206"},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? the kitti vision benchmark suite. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 3354\u20133361. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"4_CR23","doi-asserted-by":"crossref","unstructured":"Ku, J., Harakeh, A., Waslander, S.L.: In defense of classical image processing: fast depth completion on the CPU. In: 2018 15th Conference on Computer and Robot Vision (CRV), pp. 16\u201322. IEEE (2018)","DOI":"10.1109\/CRV.2018.00013"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"Yu, W., Si, C., Zhou, P., Luo, M., Zhou, Y., Feng, J., Yan, S., Wang, X.: Metaformer baselines for vision. IEEE Trans. Pattern Anal. Mach. Intell. (2023)","DOI":"10.1109\/TPAMI.2023.3329173"},{"key":"4_CR25","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: Accelerating deep network training by reducing internal covariate shift. In: International Conference on Machine Learning, pp. 448\u2013456. PMLR (2015)"},{"key":"4_CR26","unstructured":"Glorot, X., Bordes, A., Bengio, Y.: Deep sparse rectifier neural networks. In: Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics, pp. 315\u2013323. JMLR Workshop and Conference Proceedings (2011)"},{"issue":"4","key":"4_CR27","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"LC Chen","year":"2017","unstructured":"Chen, L.C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: Deeplab: semantic image segmentation with deep convolutional nets, Atrous convolution, and fully connected CRFs. IEEE Trans. Pattern Anal. Mach. Intell. 40(4), 834\u2013848 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4_CR28","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"4_CR29","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., et\u00a0al.: Spatial transformer networks. Adv. Neural Inf. Process. Syst. 28 (2015)"},{"key":"4_CR30","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.Y., Kweon, I.S.: Cbam: Convolutional block attention module. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 3\u201319 (2018)","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"4_CR31","doi-asserted-by":"crossref","unstructured":"Shen, Z., Lin, C., Liao, K., Nie, L., Zheng, Z., Zhao, Y.: Panoformer: Panorama transformer for indoor 360$$^\\circ $$ depth estimation. In: European Conference on Computer Vision, pp. 195\u2013211. Springer (2022)","DOI":"10.1007\/978-3-031-19769-7_12"},{"key":"4_CR32","doi-asserted-by":"crossref","unstructured":"Ranftl, R., Bochkovskiy, A., Koltun, V.: Vision transformers for dense prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12179\u201312188 (2021)","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"4_CR33","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization (2014). arXiv:1412.6980"},{"key":"4_CR34","doi-asserted-by":"crossref","unstructured":"Sun, C., Sun, M., Chen, H.T.: Hohonet: 360 indoor holistic understanding with latent horizontal features. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2573\u20132582 (2021)","DOI":"10.1109\/CVPR46437.2021.00260"},{"key":"4_CR35","doi-asserted-by":"crossref","unstructured":"Yuan, W., Gu, X., Dai, Z., Zhu, S., Tan, P.: New CRFs: Neural window fully-connected CRFs for monocular depth estimation (2022). arXiv:2203.01502","DOI":"10.1109\/CVPR52688.2022.00389"},{"key":"4_CR36","doi-asserted-by":"crossref","unstructured":"Piccinelli, L., Sakaridis, C., Yu, F.: idisc: Internal discretization for monocular depth estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21477\u201321487 (2023)","DOI":"10.1109\/CVPR52729.2023.02057"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8502-5_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T14:16:42Z","timestamp":1730384202000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8502-5_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,1]]},"ISBN":["9789819785018","9789819785025"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8502-5_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,1]]},"assertion":[{"value":"1 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}