{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T05:02:40Z","timestamp":1768885360272,"version":"3.49.0"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031781971","type":"print"},{"value":"9783031781988","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T00:00:00Z","timestamp":1733270400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T00:00:00Z","timestamp":1733270400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-78198-8_22","type":"book-chapter","created":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T09:04:03Z","timestamp":1733216643000},"page":"332-344","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Self-supervised Siamese Network Using Vision Transformer for Depth Estimation in Endoscopic Surgeries"],"prefix":"10.1007","author":[{"given":"Snigdha","family":"Agarwal","sequence":"first","affiliation":[]},{"given":"Neelam","family":"Sinha","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,4]]},"reference":[{"key":"22_CR1","unstructured":"Allan, M., Mcleod, J., Wang, C., Rosenthal, J.C., Hu, Z., Gard, N., Eisert, P., Fu, K.X., Zeffiro, T., Xia, W., et\u00a0al.: Stereo correspondence and reconstruction of endoscopic data challenge. arXiv preprint arXiv:2101.01133 (2021)"},{"key":"22_CR2","doi-asserted-by":"crossref","unstructured":"Assran, M., Caron, M., Misra, I., Bojanowski, P., Bordes, F., Vincent, P., Joulin, A., Rabbat, M., Ballas, N.: Masked siamese networks for label-efficient learning. In: European Conference on Computer Vision. pp. 456\u2013473. Springer (2022)","DOI":"10.1007\/978-3-031-19821-2_26"},{"key":"22_CR3","doi-asserted-by":"crossref","unstructured":"Bromley, J., Guyon, I., LeCun, Y., S\u00e4ckinger, E., Shah, R.: Signature verification using a\" siamese\" time delay neural network. Advances in neural information processing systems 6 (1993)","DOI":"10.1142\/9789812797926_0003"},{"key":"22_CR4","unstructured":"Chen, R.J., Bobrow, T.L., Athey, T., Mahmood, F., Durr, N.J.: Slam endoscopy enhanced by adversarial depth prediction. arXiv preprint arXiv:1907.00283 (2019)"},{"key":"22_CR5","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et\u00a0al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"22_CR6","unstructured":"Eigen, D., Puhrsch, C., Fergus, R.: Depth map prediction from a single image using a multi-scale deep network. Advances in neural information processing systems 27 (2014)"},{"key":"22_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-46484-8_45","volume-title":"Computer Vision \u2013 ECCV 2016","author":"R Garg","year":"2016","unstructured":"Garg, R., B.G., V.K., Carneiro, G., Reid, I.: Unsupervised CNN for Single View Depth Estimation: Geometry to the Rescue. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 740\u2013756. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_45"},{"key":"22_CR8","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac\u00a0Aodha, O., Brostow, G.J.: Unsupervised monocular depth estimation with left-right consistency. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 270\u2013279 (2017)","DOI":"10.1109\/CVPR.2017.699"},{"issue":"9","key":"22_CR9","doi-asserted-by":"publisher","first-page":"4676","DOI":"10.1109\/TIP.2018.2832296","volume":"27","author":"L He","year":"2018","unstructured":"He, L., Wang, G., Hu, Z.: Learning depth from single images with deep neural network embedding focal length. IEEE Trans. Image Process. 27(9), 4676\u20134689 (2018)","journal-title":"IEEE Trans. Image Process."},{"key":"22_CR10","doi-asserted-by":"crossref","unstructured":"Huang, B., Zheng, J.Q., Nguyen, A., Xu, C., Gkouzionis, I., Vyas, K., Tuch, D., Giannarou, S., Elson, D.S.: Self-supervised depth estimation in laparoscopic image using 3d geometric consistency. In: International Conference on Medical Image Computing and Computer-Assisted Intervention. pp. 13\u201322. Springer (2022)","DOI":"10.1007\/978-3-031-16449-1_2"},{"key":"22_CR11","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: Accelerating deep network training by reducing internal covariate shift. In: International conference on machine learning. pp. 448\u2013456. pmlr (2015)"},{"issue":"4","key":"22_CR12","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1016\/S0002-9610(01)00589-X","volume":"181","author":"SM Kavic","year":"2001","unstructured":"Kavic, S.M., Basson, M.D.: Complications of endoscopy. Am. J. Surg. 181(4), 319\u2013332 (2001)","journal-title":"Am. J. Surg."},{"issue":"7553","key":"22_CR13","first-page":"436","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. nature 521(7553), 436\u2013444 (2015)","journal-title":"Deep learning. nature"},{"issue":"10","key":"22_CR14","doi-asserted-by":"publisher","first-page":"2024","DOI":"10.1109\/TPAMI.2015.2505283","volume":"38","author":"F Liu","year":"2015","unstructured":"Liu, F., Shen, C., Lin, G., Reid, I.: Learning depth from single monocular images using deep convolutional neural fields. IEEE Trans. Pattern Anal. Mach. Intell. 38(10), 2024\u20132039 (2015)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"22_CR15","doi-asserted-by":"crossref","unstructured":"Lou, A., Noble, J.: Ws-sfmlearner: Self-supervised monocular depth and ego-motion estimation on surgical videos with unknown camera parameters. arXiv preprint arXiv:2308.11776 (2023)","DOI":"10.1117\/12.3008390"},{"key":"22_CR16","unstructured":"Mac\u00a0Aodha, O., Firman, M., Brostow, G.J., et\u00a0al.: Digging into self-supervised monocular depth estimation. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV)(2019). pp. 3827\u20133837 (2019)"},{"key":"22_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2021.102058","volume":"71","author":"KB Ozyoruk","year":"2021","unstructured":"Ozyoruk, K.B., Gokceler, G.I., Bobrow, T.L., Coskun, G., Incetan, K., Almalioglu, Y., Mahmood, F., Curto, E., Perdigoto, L., Oliveira, M., et al.: Endoslam dataset and an unsupervised monocular visual odometry and depth estimation approach for endoscopic videos. Med. Image Anal. 71, 102058 (2021)","journal-title":"Med. Image Anal."},{"key":"22_CR18","doi-asserted-by":"publisher","unstructured":"P.\u00a0Breedveld, H. G.\u00a0Stassen, D.W.M., Stassen, L.P.S.: Theoretical background and conceptual solution for depth perception and eye-hand coordination problems in laparoscopic surgery. Minimally Invasive Therapy & Allied Technologies 8(4), 227\u2013234 (1999). https:\/\/doi.org\/10.3109\/13645709909153166, https:\/\/doi.org\/10.3109\/13645709909153166","DOI":"10.3109\/13645709909153166"},{"issue":"4","key":"22_CR19","doi-asserted-by":"publisher","first-page":"7225","DOI":"10.1109\/LRA.2021.3095528","volume":"6","author":"D Recasens","year":"2021","unstructured":"Recasens, D., Lamarca, J., F\u00e1cil, J.M., Montiel, J., Civera, J.: Endo-depth-and-motion: Reconstruction and tracking in endoscopic videos using depth networks and photometric constraints. IEEE Robotics and Automation Letters 6(4), 7225\u20137232 (2021)","journal-title":"IEEE Robotics and Automation Letters"},{"key":"22_CR20","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: Convolutional networks for biomedical image segmentation. In: Medical image computing and computer-assisted intervention\u2013MICCAI 2015: 18th international conference, Munich, Germany, October 5-9, 2015, proceedings, part III 18. pp. 234\u2013241. Springer (2015)","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"22_CR21","unstructured":"Saxena, A., Chung, S., Ng, A.: Learning depth from single monocular images. In: Weiss, Y., Sch\u00f6lkopf, B., Platt, J. (eds.) Advances in Neural Information Processing Systems. vol.\u00a018. MIT Press (2005)"},{"issue":"5","key":"22_CR22","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1109\/TPAMI.2008.132","volume":"31","author":"A Saxena","year":"2008","unstructured":"Saxena, A., Sun, M., Ng, A.Y.: Make3d: Learning 3d scene structure from a single still image. IEEE Trans. Pattern Anal. Mach. Intell. 31(5), 824\u2013840 (2008)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"22_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2021.102338","volume":"77","author":"S Shao","year":"2022","unstructured":"Shao, S., Pei, Z., Chen, W., Zhu, W., Wu, X., Sun, D., Zhang, B.: Self-supervised monocular depth and ego-motion estimation in endoscopy: Appearance flow to the rescue. Med. Image Anal. 77, 102338 (2022)","journal-title":"Med. Image Anal."},{"issue":"Pt 1","key":"22_CR24","first-page":"275","volume":"13","author":"D Stoyanov","year":"2010","unstructured":"Stoyanov, D., Scarzanella, M.V., Pratt, P., Yang, G.Z.: Real-time stereo reconstruction in robotically assisted minimally invasive surgery. Med Image Comput Comput Assist Interv 13(Pt 1), 275\u2013282 (2010)","journal-title":"Med Image Comput Comput Assist Interv"},{"key":"22_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1007\/978-3-030-80432-9_26","volume-title":"Medical Image Understanding and Analysis","author":"Z Yang","year":"2021","unstructured":"Yang, Z., Simon, R., Li, Y., Linte, C.A.: Dense Depth Estimation from Stereo Endoscopy Videos Using Unsupervised Optical Flow Methods. In: Papie\u017c, B.W., Yaqub, M., Jiao, J., Namburete, A.I.L., Noble, J.A. (eds.) MIUA 2021. LNCS, vol. 12722, pp. 337\u2013349. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-80432-9_26"},{"key":"22_CR26","unstructured":"Ye, M., Johns, E., Handa, A., Zhang, L., Pratt, P., Yang, G.Z.: Self-supervised siamese learning on stereo image pairs for depth estimation in robotic surgery. arXiv preprint arXiv:1705.08260 (2017)"},{"key":"22_CR27","doi-asserted-by":"publisher","unstructured":"Zhai, M., Xiang, X., Lv, N., Kong, X.: Optical flow and scene flow estimation: A survey. Pattern Recognition 114, 107861 (2021). https:\/\/doi.org\/10.1016\/j.patcog.2021.107861, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0031320321000480","DOI":"10.1016\/j.patcog.2021.107861"},{"key":"22_CR28","doi-asserted-by":"crossref","unstructured":"Zhou, T., Brown, M., Snavely, N., Lowe, D.G.: Unsupervised learning of depth and ego-motion from video. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 1851\u20131858 (2017)","DOI":"10.1109\/CVPR.2017.700"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-78198-8_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T10:20:26Z","timestamp":1733221226000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-78198-8_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,4]]},"ISBN":["9783031781971","9783031781988"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-78198-8_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,4]]},"assertion":[{"value":"4 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kolkata","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpr2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icpr2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}