{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:12:38Z","timestamp":1775578358326,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2023M730599"],"award-info":[{"award-number":["2023M730599"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62202137"],"award-info":[{"award-number":["62202137"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Natural Science Foundation of China","award":["62306097"],"award-info":[{"award-number":["62306097"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681286","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"8100-8109","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["ColVO: Colonoscopic Visual Odometry Considering Geometric and Photometric Consistency"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2130-9122","authenticated-orcid":false,"given":"Ruyu","family":"Liu","sequence":"first","affiliation":[{"name":"School of Information Science and Technology, Hangzhou Normal University &amp; Quanzhou Institute of Equipment Manufacturing, Haixi Institutes of Chinese Academy of Sciences, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5423-9496","authenticated-orcid":false,"given":"Zhengzhe","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Hangzhou Normal University, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2118-6786","authenticated-orcid":false,"given":"Haoyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Hangzhou Normal University, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6264-5854","authenticated-orcid":false,"given":"Guodao","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Digital Media Technology, Hangzhou Dianzi University, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7844-6035","authenticated-orcid":false,"given":"Jianhua","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Tianjin University of Technology, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6618-0908","authenticated-orcid":false,"family":"Sunbo","sequence":"additional","affiliation":[{"name":"Quanzhou Institute of Equipment Manufacturing, Haixi Institutes of Chinese Academy of Sciences, Quanzhou, Fujian, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9680-5126","authenticated-orcid":false,"given":"Weiguo","family":"Sheng","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Hangzhou Normal University, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5133-6688","authenticated-orcid":false,"given":"Xiufeng","family":"Liu","sequence":"additional","affiliation":[{"name":"the Department of Technology, Management and Economics, Technical University of Denmark, Lyngby, Denmark"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1100-0631","authenticated-orcid":false,"given":"Yaochu","family":"Jin","sequence":"additional","affiliation":[{"name":"Trustworthy and General AI Lab, School of Engineering, Westlake University, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00077"},{"key":"e_1_3_2_1_2_1","volume-title":"Ming- Ming Cheng, and Ian Reid","author":"Bian Jiawang","year":"2019","unstructured":"Jiawang Bian, Zhichao Li, NaiyanWang, Huangying Zhan, Chunhua Shen, Ming- Ming Cheng, and Ian Reid. 2019. Unsupervised scale-consistent depth and egomotion learning from monocular video. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_3_1","series-title":"Journal of physics: Conference series","volume-title":"Self-supervised dense depth prediction in monocular endoscope video for 3D liver surface reconstruction","author":"Cao Zhengtao","year":"2050","unstructured":"Zhengtao Cao,Wenfeng Huang, Xiangyun Liao, Xuesong Deng, and QiongWang. 2021. Self-supervised dense depth prediction in monocular endoscope video for 3D liver surface reconstruction. In Journal of physics: Conference series, Vol. 1883. IOP Publishing, 012050."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00894"},{"key":"e_1_3_2_1_5_1","volume-title":"Direct sparse odometry","author":"Engel Jakob","year":"2017","unstructured":"Jakob Engel, Vladlen Koltun, and Daniel Cremers. 2017. Direct sparse odometry. IEEE transactions on pattern analysis and machine intelligence 40, 3 (2017), 611-- 625."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00393"},{"key":"e_1_3_2_1_7_1","volume-title":"IEEE international conference on computer vision. 1234--1241","author":"Higo Tomoaki","unstructured":"Tomoaki Higo, Yasuyuki Matsushita, Neel Joshi, and Katsushi Ikeuchi. [n. d.]. A hand-held photometric stereo camera for 3-d modeling. In IEEE international conference on computer vision. 1234--1241."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612456"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2021.101990"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612021"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811842"},{"key":"e_1_3_2_1_12_1","volume-title":"Unsupervised-learning-based continuous depth and motion estimation with monocular endoscopy for virtual reality minimally invasive surgery","author":"Li Ling","year":"2020","unstructured":"Ling Li, Xiaojian Li, Shanlin Yang, Shuai Ding, Alireza Jolfaei, and Xi Zheng. 2020. Unsupervised-learning-based continuous depth and motion estimation with monocular endoscopy for virtual reality minimally invasive surgery. IEEE transactions on industrial informatics 17, 6 (2020), 3920--3928."},{"key":"e_1_3_2_1_13_1","volume-title":"Sparseto- dense coarse-to-fine depth estimation for colonoscopy. Computers in biology and medicine 160","author":"Liu Ruyu","year":"2023","unstructured":"Ruyu Liu, Zhengzhe Liu, Jiaming Lu, Guodao Zhang, Zhigui Zuo, Bo Sun, Jianhua Zhang, Weiguo Sheng, Ran Guo, Lejun Zhang, and Xiaozhen Hua. 2023. Sparseto- dense coarse-to-fine depth estimation for colonoscopy. Computers in biology and medicine 160 (2023), 106983."},{"key":"e_1_3_2_1_14_1","volume-title":"IEEE international conference on robotics and automation. IEEE, 7476--7482","author":"Liu Ruyu","year":"2023","unstructured":"Ruyu Liu, Zhengzhe Liu, Haoyu Zhang, Guodao Zhang, Zhigui Zuo, and Weiguo Sheng. 2023. Dense depth completion based on multi-scale confidence and selfattention mechanism for intestinal endoscopy. In IEEE international conference on robotics and automation. IEEE, 7476--7482."},{"key":"e_1_3_2_1_15_1","volume-title":"Russell H Taylor, and Mathias Unberath.","author":"Liu Xingtong","year":"2020","unstructured":"Xingtong Liu, Ayushi Sinha, Masaru Ishii, Gregory D Hager, Austin Reiter, Russell H Taylor, and Mathias Unberath. 2020. Dense depth estimation in monocular endoscopy with self-supervised learning methods. IEEE transactions on medical imaging 39, 5 (2020), 1438--1447."},{"key":"e_1_3_2_1_16_1","volume-title":"OR 2.0 context-aware operating theaters, computer assisted robotic endoscopy, clinical image-based procedures, and skin image analysis","author":"Liu Xingtong","unstructured":"Xingtong Liu, Ayushi Sinha, Mathias Unberath, Masaru Ishii, Gregory D Hager, Russell H Taylor, and Austin Reiter. 2018. Self-supervised learning for dense depth estimation in monocular endoscopy. In OR 2.0 context-aware operating theaters, computer assisted robotic endoscopy, clinical image-based procedures, and skin image analysis. Springer, 128--138."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3275584"},{"key":"e_1_3_2_1_18_1","volume-title":"RNNSLAM: Reconstructing the 3D colon to visualize missing regions during a colonoscopy. Medical image analysis 72","author":"Ma Ruibin","year":"2021","unstructured":"Ruibin Ma, Rui Wang, Yubo Zhang, Stephen Pizer, Sarah K McGill, Julian Rosenman, and Jan-Michael Frahm. 2021. RNNSLAM: Reconstructing the 3D colon to visualize missing regions during a colonoscopy. Medical image analysis 72 (2021), 102100."},{"key":"e_1_3_2_1_19_1","volume-title":"Deep learning and conditional random fields-based depth estimation and topographical reconstruction from conventional endoscopy. Medical image analysis 48","author":"Mahmood Faisal","year":"2018","unstructured":"Faisal Mahmood and Nicholas J Durr. 2018. Deep learning and conditional random fields-based depth estimation and topographical reconstruction from conventional endoscopy. Medical image analysis 48 (2018), 230--243."},{"key":"e_1_3_2_1_20_1","volume-title":"Jose Maria Martinez Montiel, and Juan D Tardos","author":"Mur-Artal Raul","year":"2015","unstructured":"Raul Mur-Artal, Jose Maria Martinez Montiel, and Juan D Tardos. 2015. ORBSLAM: A versatile and accurate monocular SLAM system. IEEE transactions on robotics 31, 5 (2015), 1147--1163."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Masahiro Oda Hayato Itoh Kiyohito Tanaka Hirotsugu Takabatake Masaki Mori Hiroshi Natori and Kensaku Mori. 2022. Depth estimation from single-shot monocular endoscope image using image domain adaptation and edge-aware depth estimation. Computer methods in biomechanics and biomedical engineering: Imaging & visualization 10 3 (2022) 266--273.","DOI":"10.1080\/21681163.2021.2012835"},{"key":"e_1_3_2_1_22_1","volume-title":"Taylor L Bobrow, Gulfize Coskun, Kagan Incetan, Yasin Almalioglu, Faisal Mahmood, Eva Curto, Luis Perdigoto, Marina Oliveira, et al.","author":"Ozyoruk Kutsev Bengisu","year":"2021","unstructured":"Kutsev Bengisu Ozyoruk, Guliz Irem Gokceler, Taylor L Bobrow, Gulfize Coskun, Kagan Incetan, Yasin Almalioglu, Faisal Mahmood, Eva Curto, Luis Perdigoto, Marina Oliveira, et al. 2021. EndoSLAM dataset and an unsupervised monocular visual odometry and depth estimation approach for endoscopic videos. Medical image analysis 71 (2021), 102058."},{"key":"e_1_3_2_1_23_1","unstructured":"Adam Paszke Sam Gross Soumith Chintala Gregory Chanan Edward Yang Zachary DeVito Zeming Lin Alban Desmaison Luca Antiga and Adam Lerer. 2017. Automatic differentiation in pytorch. (2017)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aei.2021.101427"},{"key":"e_1_3_2_1_25_1","volume-title":"Omer F Ahmad, Paul Riordan, Mirek Janatka, Laurence B Lovat, and Danail Stoyanov.","author":"Rau Anita","year":"2019","unstructured":"Anita Rau, PJ Eddie Edwards, Omer F Ahmad, Paul Riordan, Mirek Janatka, Laurence B Lovat, and Danail Stoyanov. 2019. Implicit domain adaptation with conditional generative adversarial networks for depth prediction in endoscopy. International journal of computer assisted radiology and surgery 14, 7 (2019), 1167-- 1176."},{"key":"e_1_3_2_1_26_1","volume-title":"Endo-depth-and-motion: Reconstruction and tracking in endoscopic videos using depth networks and photometric constraints","author":"Recasens David","year":"2021","unstructured":"David Recasens, Jos\u00e9 Lamarca, Jos\u00e9 M F\u00e1cil, JMM Montiel, and Javier Civera. 2021. Endo-depth-and-motion: Reconstruction and tracking in endoscopic videos using depth networks and photometric constraints. IEEE robotics and automation letters 6, 4 (2021), 7225--7232."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16437-8_13"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_2_1_29_1","volume-title":"Semantic 3D object maps for everyday manipulation in human living environments. KI-K\u00fcnstliche intelligenz 24, 4","author":"Rusu Radu Bogdan","year":"2010","unstructured":"Radu Bogdan Rusu. 2010. Semantic 3D object maps for everyday manipulation in human living environments. KI-K\u00fcnstliche intelligenz 24, 4 (2010), 345--348."},{"key":"e_1_3_2_1_30_1","volume-title":"Self-supervised monocular depth and ego- Motion estimation in endoscopy: Appearance flow to the rescue. Medical image analysis 77","author":"Shao Shuwei","year":"2022","unstructured":"Shuwei Shao, Zhongcai Pei, Weihai Chen, Wentao Zhu, Xingming Wu, Dianmin Sun, and Baochang Zhang. 2022. Self-supervised monocular depth and ego- Motion estimation in endoscopy: Appearance flow to the rescue. Medical image analysis 77 (2022), 102338."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2017.10.014"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461129"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593623"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11548-017-1609-2"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00216"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision. 12727--12736","author":"Zhan Yunlong","year":"2021","unstructured":"LijunWang, YifanWang, LinzhaoWang, Yunlong Zhan, YingWang, and Huchuan Lu. 2021. Can scale-consistent monocular depth be learned in a self-supervised scale-invariant manner?. In Proceedings of the IEEE\/CVF international conference on computer vision. 12727--12736."},{"key":"e_1_3_2_1_37_1","volume-title":"Deep multimodal fusion by channel exchanging. Advances in neural information processing systems 33","author":"Wang Yikai","year":"2020","unstructured":"Yikai Wang, Wenbing Huang, Fuchun Sun, Tingyang Xu, Yu Rong, and Junzhou Huang. 2020. Deep multimodal fusion by channel exchanging. Advances in neural information processing systems 33 (2020), 4835--4845."},{"key":"e_1_3_2_1_38_1","volume-title":"Image quality assessment: From error visibility to structural similarity","author":"Wang Zhou","year":"2004","unstructured":"Zhou Wang, Alan C Bovik, Hamid R Sheikh, and Eero P Simoncelli. 2004. Image quality assessment: From error visibility to structural similarity. IEEE transactions on image processing 13, 4 (2004), 600--612."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3273924"},{"key":"e_1_3_2_1_40_1","volume-title":"Learning-based depth and pose estimation for monocular endoscope with loss generalization","author":"Widya Aji Resindra","unstructured":"Aji Resindra Widya, Yusuke Monno, Masatoshi Okutomi, Sho Suzuki, Takuji Gotoda, and Kenji Miki. 2021. Learning-based depth and pose estimation for monocular endoscope with loss generalization. In IEEE engineering in medicine & biology Society. IEEE, 3547--3552."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00534"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611751"},{"key":"e_1_3_2_1_43_1","volume-title":"Gps-net: Graphbased photometric stereo network. Advances in neural information processing systems 33","author":"Yao Zhuokun","year":"2020","unstructured":"Zhuokun Yao, Kun Li, Ying Fu, Haofeng Hu, and Boxin Shi. 2020. Gps-net: Graphbased photometric stereo network. Advances in neural information processing systems 33 (2020), 10306--10316."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611830"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00043"},{"key":"e_1_3_2_1_46_1","volume-title":"A template-based 3D reconstruction of colon structures and textures from stereo colonoscopic images","author":"Zhang Shuai","year":"2020","unstructured":"Shuai Zhang, Liang Zhao, Shoudong Huang, Menglong Ye, and Qi Hao. 2020. A template-based 3D reconstruction of colon structures and textures from stereo colonoscopic images. IEEE transactions on medical robotics and bionics 3, 1 (2020), 85--95."},{"key":"e_1_3_2_1_47_1","unstructured":"Yubo Zhang Jan-Michael Frahm Samuel Ehrenstein Sarah K McGill Julian G Rosenman ShuxianWang and StephenMPizer. 2021. ColDE: A depth estimation framework for colonoscopy reconstruction. arXiv preprint arXiv:2111.10371 (2021)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612000"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.700"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681286","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681286","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:43Z","timestamp":1750295863000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681286"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":49,"alternative-id":["10.1145\/3664647.3681286","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681286","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}