{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T23:39:39Z","timestamp":1767310779107,"version":"3.48.0"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032097835","type":"print"},{"value":"9783032097842","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-09784-2_8","type":"book-chapter","created":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T23:35:13Z","timestamp":1767310513000},"page":"74-84","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Temporally Stable Monocular Depth Estimation in\u00a0Surgical Vision"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2324-7033","authenticated-orcid":false,"given":"Jialang","family":"Xu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4614-5742","authenticated-orcid":false,"given":"Emanuele","family":"Colleoni","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4696-465X","authenticated-orcid":false,"given":"Nicolas","family":"Toussaint","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3672-2414","authenticated-orcid":false,"given":"Muhammad","family":"Asad","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2330-0973","authenticated-orcid":false,"given":"Ricardo","family":"Sanchez-Matilla","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0357-5996","authenticated-orcid":false,"given":"Evangelos B.","family":"Mazomenos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6573-322X","authenticated-orcid":false,"given":"Imanol","family":"Luengo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0980-3227","authenticated-orcid":false,"given":"Danail","family":"Stoyanov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,2]]},"reference":[{"key":"8_CR1","unstructured":"Alexey, D.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv: 2010.11929 (2020)"},{"key":"8_CR2","unstructured":"Allan, M., et\u00a0al.: Stereo correspondence and reconstruction of endoscopic data challenge. arXiv preprint arXiv:2101.01133 (2021)"},{"key":"8_CR3","doi-asserted-by":"crossref","unstructured":"Cao, Y., Li, Y., Zhang, H., Ren, C., Liu, Y.: Learning structure affinity for video depth estimation. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 190\u2013198 (2021)","DOI":"10.1145\/3474085.3475564"},{"key":"8_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2024.103310","volume":"98","author":"C Chen","year":"2024","unstructured":"Chen, C., et al.: MA-SAM: modality-agnostic SAM adaptation for 3D medical image segmentation. Med. Image Anal. 98, 103310 (2024)","journal-title":"Med. Image Anal."},{"key":"8_CR5","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1016\/j.cmpb.2018.02.006","volume":"158","author":"L Chen","year":"2018","unstructured":"Chen, L., Tang, W., John, N.W., Wan, T.R., Zhang, J.J.: Slam-based dense surface reconstruction in monocular minimally invasive surgery and its application to augmented reality. Comput. Methods Programs Biomed. 158, 135\u2013146 (2018)","journal-title":"Comput. Methods Programs Biomed."},{"issue":"1","key":"8_CR6","doi-asserted-by":"publisher","first-page":"371","DOI":"10.1109\/TMI.2020.3027442","volume":"40","author":"T Collins","year":"2020","unstructured":"Collins, T., et al.: Augmented reality guided laparoscopic surgery of the uterus. IEEE Trans. Med. Imaging 40(1), 371\u2013380 (2020)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"8_CR7","doi-asserted-by":"crossref","unstructured":"Cui, B., Islam, M., Bai, L., Ren, H.: Surgical-DINO: adapter learning of foundation models for depth estimation in endoscopic surgery. Int. J. Comput. Assisted Radiol. Surg., 1\u20138 (2024)","DOI":"10.1007\/s11548-024-03083-5"},{"key":"8_CR8","doi-asserted-by":"crossref","unstructured":"Cui, B., Islam, M., Bai, L., Wang, A., Ren, H.: EndoDAC: efficient adapting foundation model for self-supervised depth estimation from any endoscopic camera. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 208\u2013218 (2024)","DOI":"10.1007\/978-3-031-72089-5_20"},{"key":"8_CR9","unstructured":"Eigen, D., Puhrsch, C., Fergus, R.: Depth map prediction from a single image using a multi-scale deep network. In: Advances in Neural Information Processing Systems, vol.\u00a027 (2014)"},{"key":"8_CR10","unstructured":"Hendrycks, D., Gimpel, K.: Gaussian error linear units (GELUs). arXiv preprint arXiv:1606.08415 (2016)"},{"key":"8_CR11","unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. In: International Conference on Learning Representations (2022)"},{"key":"8_CR12","doi-asserted-by":"crossref","unstructured":"Khan, N., Penner, E., Lanman, D., Xiao, L.: Temporally consistent online depth estimation using point-based fusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9119\u20139129 (2023)","DOI":"10.1109\/CVPR52729.2023.00880"},{"key":"8_CR13","unstructured":"Kirillov, A., et\u00a0al.: Segment anything. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4015\u20134026 (2023)"},{"issue":"10","key":"8_CR14","doi-asserted-by":"publisher","first-page":"2185","DOI":"10.1109\/TMI.2018.2833868","volume":"37","author":"S Leonard","year":"2018","unstructured":"Leonard, S., et al.: Evaluation and stability analysis of video-based navigation system for functional endoscopic sinus surgery on in vivo clinical data. IEEE Trans. Med. Imaging 37(10), 2185\u20132195 (2018)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"8_CR15","doi-asserted-by":"crossref","unstructured":"Li, B., Liu, B., Zhu, M., Luo, X., Zhou, F.: Image intrinsic-based unsupervised monocular depth estimation in endoscopy. IEEE J. Biomed. Health Inf., 1\u201311 (2024)","DOI":"10.36227\/techrxiv.24345688.v2"},{"key":"8_CR16","doi-asserted-by":"crossref","unstructured":"Li, Z., et al.: Revisiting stereo depth estimation from a sequence-to-sequence perspective with transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6197\u20136206 (2021)","DOI":"10.1109\/ICCV48922.2021.00614"},{"key":"8_CR17","doi-asserted-by":"publisher","first-page":"230","DOI":"10.1016\/j.media.2018.06.005","volume":"48","author":"F Mahmood","year":"2018","unstructured":"Mahmood, F., Durr, N.J.: Deep learning and conditional random fields-based depth estimation and topographical reconstruction from conventional endoscopy. Med. Image Anal. 48, 230\u2013243 (2018)","journal-title":"Med. Image Anal."},{"key":"8_CR18","unstructured":"Oquab, M., et\u00a0al.: DINOv2: learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)"},{"key":"8_CR19","doi-asserted-by":"crossref","unstructured":"Ranftl, R., Bochkovskiy, A., Koltun, V.: Vision transformers for dense prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12159\u201312168 (2021)","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"8_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2021.102338","volume":"77","author":"S Shao","year":"2022","unstructured":"Shao, S.: Self-supervised monocular depth and ego-motion estimation in endoscopy: appearance flow to the rescue. Med. Image Anal. 77, 102338 (2022)","journal-title":"Med. Image Anal."},{"key":"8_CR21","doi-asserted-by":"crossref","unstructured":"Wang, Y., Pan, Z., Li, X., Cao, Z., Xian, K., Zhang, J.: Less is more: consistent video depth estimation with masked frames modeling. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 6347\u20136358 (2022)","DOI":"10.1145\/3503161.3547978"},{"issue":"1","key":"8_CR22","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1109\/TPAMI.2024.3476387","volume":"47","author":"Y Wang","year":"2025","unstructured":"Wang, Y., et al.: NVDS$$^{\\mathbf{+} }$$+: towards efficient and versatile neural stabilizer for video depth estimation. IEEE Trans. Pattern Anal. Mach. Intell. 47(1), 583\u2013600 (2025)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"8_CR23","doi-asserted-by":"crossref","unstructured":"Wang, Y., Long, Y., Fan, S.H., Dou, Q.: Neural rendering for stereo 3D reconstruction of deformable tissues in robotic surgery. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 431\u2013441 (2022)","DOI":"10.1007\/978-3-031-16449-1_41"},{"key":"8_CR24","doi-asserted-by":"crossref","unstructured":"Xu, H., Zhang, J., Cai, J., Rezatofighi, H., Tao, D.: GMFlow: learning optical flow via global matching. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8121\u20138130 (2022)","DOI":"10.1109\/CVPR52688.2022.00795"},{"key":"8_CR25","doi-asserted-by":"crossref","unstructured":"Yang, L., Kang, B., Huang, Z., Xu, X., Feng, J., Zhao, H.: Depth anything: unleashing the power of large-scale unlabeled data. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10371\u201310381 (2024)","DOI":"10.1109\/CVPR52733.2024.00987"},{"key":"8_CR26","unstructured":"Yang, L., et al.: Depth anything V2. In: The Thirty-Eighth Annual Conference on Neural Information Processing Systems (2024)"}],"container-title":["Lecture Notes in Computer Science","Collaborative Intelligence and Autonomy in Image-Guided Surgery"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-09784-2_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T23:35:15Z","timestamp":1767310515000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-09784-2_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032097835","9783032097842"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-09784-2_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"COLAS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Collaborative Intelligence and Autonomy in Image-Guided Surgery","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"colas2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/sites.google.com\/view\/miccai-2025-colas\/home","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}