{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T19:58:22Z","timestamp":1770839902409,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":37,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819603473","type":"print"},{"value":"9789819603480","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,18]],"date-time":"2024-11-18T00:00:00Z","timestamp":1731888000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,18]],"date-time":"2024-11-18T00:00:00Z","timestamp":1731888000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-0348-0_20","type":"book-chapter","created":{"date-parts":[[2024,11,23]],"date-time":"2024-11-23T18:38:10Z","timestamp":1732387090000},"page":"273-290","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["FocDepthFormer: Transformer with\u00a0Latent LSTM for\u00a0Depth Estimation from\u00a0Focal Stack"],"prefix":"10.1007","author":[{"given":"Xueyang","family":"Kang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fengze","family":"Han","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Abdur R.","family":"Fayjie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Patrick","family":"Vandewalle","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kourosh","family":"Khoshelham","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dong","family":"Gong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,18]]},"reference":[{"key":"20_CR1","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac Aodha, O., Brostow, G.J.: Unsupervised monocular depth estimation with left-right consistency. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 270\u2013279 (2017)","DOI":"10.1109\/CVPR.2017.699"},{"key":"20_CR2","doi-asserted-by":"crossref","unstructured":"Pentland, A.P.: A new sense for depth of field. IEEE Trans. Pattern Anal. Mach. Intell. 523\u2013531 (1987)","DOI":"10.1109\/TPAMI.1987.4767940"},{"key":"20_CR3","doi-asserted-by":"crossref","unstructured":"Schonberger, J.L., Frahm, J.M.: Structure-from-motion revisited. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4104\u20134113 (2016)","DOI":"10.1109\/CVPR.2016.445"},{"key":"20_CR4","doi-asserted-by":"crossref","unstructured":"Ramamonjisoa, M., Firman, M., Watson, J., Lepetit, V., Turmukhambetov, D.: Single image depth prediction with wavelet decomposition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11089\u201311098 (2021)","DOI":"10.1109\/CVPR46437.2021.01094"},{"key":"20_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"525","DOI":"10.1007\/978-3-030-20893-6_33","volume-title":"Computer Vision \u2013 ACCV 2018","author":"C Hazirbas","year":"2019","unstructured":"Hazirbas, C., Soyer, S.G., Staab, M.C., Leal-Taix\u00e9, L., Cremers, D.: Deep depth from focus. In: Jawahar, C.V., Li, H., Mori, G., Schindler, K. (eds.) ACCV 2018. LNCS, vol. 11363, pp. 525\u2013541. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-20893-6_33"},{"key":"20_CR6","doi-asserted-by":"crossref","unstructured":"Yang, F., Huang, X., Zhou, Z.: Deep depth from focus with differential focus volume. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12642\u201312651 (2022)","DOI":"10.1109\/CVPR52688.2022.01231"},{"key":"20_CR7","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Wang, N.H., et al.: Bridging unsupervised and supervised depth from focus via all-in-focus supervision. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12621\u201312631 (2021)","DOI":"10.1109\/ICCV48922.2021.01239"},{"key":"20_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1007\/978-3-319-54187-7_2","volume-title":"Computer Vision \u2013 ACCV 2016","author":"K Honauer","year":"2017","unstructured":"Honauer, K., Johannsen, O., Kondermann, D., Goldluecke, B.: A dataset and evaluation methodology for depth estimation on 4D\u00a0light fields. In: Lai, S.-H., Lepetit, V., Nishino, K., Sato, Y. (eds.) ACCV 2016. LNCS, vol. 10113, pp. 19\u201334. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-54187-7_2"},{"key":"20_CR10","doi-asserted-by":"crossref","unstructured":"Pintore, G., Agus, M., Almansa, E., Schneider, J., Gobbetti, E.: SliceNet: deep dense depth estimation from a single indoor panorama using a slice-based representation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11536\u201311545 (2021)","DOI":"10.1109\/CVPR46437.2021.01137"},{"key":"20_CR11","unstructured":"Benavides, F.T., Ignatov, A., Timofte, R.: PhoneDepth: a dataset for monocular depth estimation on mobile devices. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3049\u20133056 (2022)"},{"key":"20_CR12","unstructured":"Barratt, S., Hannel, B.: Extracting the depth and all-in-focus image from a focal stack. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3451\u20133459 (2015)"},{"key":"20_CR13","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"613","DOI":"10.1007\/978-3-031-20044-1_35","volume-title":"ECCV 2022","author":"J Hornauer","year":"2022","unstructured":"Hornauer, J., Belagiannis, V.: Gradient-based uncertainty for monocular depth estimation. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13680, pp. 613\u2013630. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20044-1_35"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"Liu, C., Qiu, J., Jiang, M.: Light field reconstruction from focal stack based on Landweber iterative scheme. In: Mathematics in Imaging, pp. MM2C\u20133. Optica Publishing Group (2017)","DOI":"10.1364\/MATH.2017.MM2C.3"},{"key":"20_CR15","doi-asserted-by":"crossref","unstructured":"Suwajanakorn, S., Hernandez, C., Seitz, S.M.: Depth from focus with your mobile phone. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3497\u20133506 (2015)","DOI":"10.1109\/CVPR.2015.7298972"},{"key":"20_CR16","unstructured":"Xiong, Y., Shafer, S.A.: Depth from focusing and defocusing. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, pp. 68\u201373. IEEE (1993)"},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"Maximov, M., Galim, K., Leal-Taix\u00e9, L.: Focus on defocus: bridging the synthetic to real domain gap for depth estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1071\u20131080 (2020)","DOI":"10.1109\/CVPR42600.2020.00115"},{"key":"20_CR18","doi-asserted-by":"crossref","unstructured":"Ranftl, R., Bochkovskiy, A., Koltun, V.: Vision transformers for dense prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12179\u201312188 (2021)","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"20_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"746","DOI":"10.1007\/978-3-642-33715-4_54","volume-title":"Computer Vision \u2013 ECCV 2012","author":"N Silberman","year":"2012","unstructured":"Silberman, N., Hoiem, D., Kohli, P., Fergus, R.: Indoor segmentation and support inference from RGBD images. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7576, pp. 746\u2013760. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33715-4_54"},{"key":"20_CR20","unstructured":"Cho, J., Min, D., Kim, Y., Sohn, K.: DIML\/CVL RGB-D dataset: 2M RGB-D images of natural indoor and outdoor scenes. arXiv preprint arXiv:2110.11590 (2021)"},{"key":"20_CR21","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac Aodha, O., Firman, M., Brostow, G.J.: Digging into self-supervised monocular depth prediction. In: The International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00393"},{"key":"20_CR22","first-page":"30392","volume":"34","author":"T Xiao","year":"2021","unstructured":"Xiao, T., Singh, M., Mintun, E., Darrell, T., Doll\u00e1r, P., Girshick, R.: Early convolutions help transformers see better. Adv. Neural. Inf. Process. Syst. 34, 30392\u201330400 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"20_CR23","unstructured":"Hutchins, D., Schlag, I., Wu, Y., Dyer, E., Neyshabur, B.: Block-recurrent transformers. arXiv preprint arXiv:2203.07852 (2022)"},{"key":"20_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"603","DOI":"10.1007\/978-3-030-01228-1_36","volume-title":"Computer Vision \u2013 ECCV 2018","author":"N Xu","year":"2018","unstructured":"Xu, N., et al.: YouTube-VOS: sequence-to-sequence video object segmentation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11209, pp. 603\u2013619. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01228-1_36"},{"issue":"6","key":"20_CR25","doi-asserted-by":"publisher","first-page":"1059","DOI":"10.1007\/s11548-019-01958-6","volume":"14","author":"CI Nwoye","year":"2019","unstructured":"Nwoye, C.I., Mutter, D., Marescaux, J., Padoy, N.: Weakly supervised convolutional LSTM approach for tool tracking in laparoscopic videos. Int. J. Comput. Assist. Radiol. Surg. 14(6), 1059\u20131067 (2019)","journal-title":"Int. J. Comput. Assist. Radiol. Surg."},{"issue":"8","key":"20_CR26","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"20_CR27","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for Image recognition at scale. In: ICLR (2021)"},{"key":"20_CR28","doi-asserted-by":"crossref","unstructured":"Meng, X., Fan, C., Ming, Y., Yu, H.: CORNet: context-based ordinal regression network for monocular depth estimation. IEEE Trans. Circuits Syst. Video Technol. (2021)","DOI":"10.1109\/TCSVT.2021.3128505"},{"key":"20_CR29","doi-asserted-by":"crossref","unstructured":"Gur, S., Wolf, L.: Single image depth estimation trained via depth from defocus cues. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7683\u20137692 (2019)","DOI":"10.1109\/CVPR.2019.00787"},{"key":"20_CR30","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"20_CR31","unstructured":"Johannsen, O., et al.: A taxonomy and evaluation of dense light field depth estimation algorithms. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 82\u201399 (2017)"},{"issue":"1","key":"20_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s00138-020-01162-6","volume":"32","author":"S Anwar","year":"2021","unstructured":"Anwar, S., Hayder, Z., Porikli, F.: Deblur and deep depth from single defocus image. Mach. Vis. Appl. 32(1), 1\u201313 (2021)","journal-title":"Mach. Vis. Appl."},{"key":"20_CR33","unstructured":"Kang, X., Yuan, S.: Integrated visual-inertial odometry and image stabilization for image processing. In: Google Patents, US Patent App. 18\/035,479 (2023)"},{"key":"20_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"506","DOI":"10.1007\/978-3-030-01252-6_30","volume-title":"Computer Vision \u2013 ECCV 2018","author":"X Guo","year":"2018","unstructured":"Guo, X., Li, H., Yi, S., Ren, J., Wang, X.: Learning monocular depth by distilling cross-domain stereo networks. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11215, pp. 506\u2013523. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01252-6_30"},{"key":"20_CR35","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The kitti vision benchmark suite. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 3354\u20133361. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"20_CR36","doi-asserted-by":"crossref","unstructured":"Agarwal, A., Arora, C.: DepthFormer: multiscale vision transformer for monocular depth estimation with global local information fusion. In: 2022 IEEE International Conference on Image Processing (ICIP), pp. 3873\u20133877. IEEE (2022)","DOI":"10.1109\/ICIP46576.2022.9897187"},{"key":"20_CR37","doi-asserted-by":"crossref","unstructured":"Si, H., et al.: Fully self-supervised depth estimation from defocus clue. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9140\u20139149 (2023)","DOI":"10.1109\/CVPR52729.2023.00882"}],"container-title":["Lecture Notes in Computer Science","AI 2024: Advances in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-0348-0_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,23]],"date-time":"2024-11-23T19:04:21Z","timestamp":1732388661000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-0348-0_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,18]]},"ISBN":["9789819603473","9789819603480"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-0348-0_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,18]]},"assertion":[{"value":"18 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australasian Joint Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Melbourne, VIC","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 November 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"37","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ausai2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ajcai2024.org\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}