{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T10:10:47Z","timestamp":1766139047270,"version":"3.40.4"},"reference-count":68,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Hong Kong Research Grants Council","award":["CRF C4026-21GF","CRF C4063-18G","GRF 14203323","GRF 14216022","GRF 14211420"],"award-info":[{"award-number":["CRF C4026-21GF","CRF C4063-18G","GRF 14203323","GRF 14216022","GRF 14211420"]}]},{"DOI":"10.13039\/501100001809","name":"NSFC\/RGC Joint Research Scheme","doi-asserted-by":"publisher","award":["N_CUHK420\/22"],"award-info":[{"award-number":["N_CUHK420\/22"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shenzhen\u2013Hong Kong\u2013Macau Technology Research Program","award":["202108233000303"],"award-info":[{"award-number":["202108233000303"]}]},{"DOI":"10.13039\/501100021171","name":"Basic and Applied Basic Research Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2021B1515120035"],"award-info":[{"award-number":["2021B1515120035"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shenzhen Key Laboratory of Robotics Perception and Intelligence through the Southern University of Science and Technology","award":["ZDSYS20200810171800001"],"award-info":[{"award-number":["ZDSYS20200810171800001"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Automat. Sci. Eng."],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/tase.2025.3530791","type":"journal-article","created":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T18:47:34Z","timestamp":1737053254000},"page":"11717-11730","source":"Crossref","is-referenced-by-count":5,"title":["V\u00b2-SfMLearner: Learning Monocular Depth and Ego-Motion for Multimodal Wireless Capsule Endoscopy"],"prefix":"10.1109","volume":"22","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9762-6821","authenticated-orcid":false,"given":"Long","family":"Bai","sequence":"first","affiliation":[{"name":"Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7900-8032","authenticated-orcid":false,"given":"Beilei","family":"Cui","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong, China"}]},{"given":"Liangyu","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong, China"}]},{"given":"Yanheng","family":"Li","sequence":"additional","affiliation":[{"name":"School of Creative Media, City University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2698-9569","authenticated-orcid":false,"given":"Shilong","family":"Yao","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, City University of Hong Kong, Hong Kong, China"}]},{"given":"Sishen","family":"Yuan","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong, China"}]},{"given":"Yanan","family":"Wu","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4170-4798","authenticated-orcid":false,"given":"Yang","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Mechanical Engineering, Hubei University of Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5255-5898","authenticated-orcid":false,"given":"Max Q.-H.","family":"Meng","sequence":"additional","affiliation":[{"name":"Shenzhen Key Laboratory of Robotics Perception and Intelligence and the Department of Electronic and Electrical Engineering, Southern University of Science and Technology, Shenzhen, China"}]},{"given":"Zhen","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Gastroenterology, Qilu Hospital of Shandong University, Jinan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3180-7347","authenticated-orcid":false,"given":"Weiping","family":"Ding","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence and Computer Science, Nantong University, Nantong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6488-1551","authenticated-orcid":false,"given":"Hongliang","family":"Ren","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, The Chinese University of Hong Kong, Hong Kong, China"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1","article-title":"Global cancer facts & figures","volume":"3","author":"Center","year":"2011","journal-title":"Atlanta, Amer. Cancer Soc."},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1053\/j.gastro.2020.02.068"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72104-5_29"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2022.3197442"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2022.3201966"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2023.107412"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2023.10.632"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2019.2936645"},{"key":"ref9","article-title":"Surgical-LVLM: Learning to adapt large vision-language model for grounded visual question answering in robotic surgery","author":"Wang","year":"2024","journal-title":"arXiv:2405.10948"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ISBI56570.2024.10635466"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2016.2610579"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-43999-5_4"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TMECH.2015.2488361"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2020.3013954"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2020.2965144"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TIM.2021.3069488"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TIM.2021.3075776"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2013.2282997"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2018.2856109"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2017.10.014"},{"key":"ref21","first-page":"197","article-title":"Endo-4DGS: Endoscopic monocular scene reconstruction with 4D Gaussian splatting","volume-title":"Proc. Int. Conf. Med. Image Comput. Comput.-Assist. Intervent","author":"Yi-ming"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO64047.2024.10907406"},{"key":"ref23","first-page":"1","article-title":"Depth map prediction from a single image using a multi-scale deep network","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"27","author":"Eigen"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2021.102338"},{"key":"ref25","first-page":"1","article-title":"Unsupervised scale-consistent depth and ego-motion learning from monocular video","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Bian"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093334"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00393"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01441"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.700"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19812-0_33"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2020.3011067"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2021.102058"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1515\/cdbme-2020-0004"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2024.3352390"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICTRC.2015.7156427"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3178473"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.4028\/www.scientific.net\/AMM.347-350.942"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/SPW50608.2020.00028"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/s42979-021-00773-8"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2021.101990"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2021.3064065"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2018.2833868"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2020.3032831"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2018.06.005"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2024.3423791"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01778"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/s11548-024-03083-5"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72089-5_20"},{"key":"ref49","article-title":"DINOv2: Learning robust visual features without supervision","author":"Oquab","year":"2023","journal-title":"arXiv:2304.07193"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00987"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.11.015"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.699"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2019.04.014"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00745"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1016\/j.compmedimag.2024.102390"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2023.106983"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981480"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO55434.2022.10012018"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981203"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3286937"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989603"},{"issue":"115","key":"ref63","first-page":"778","article-title":"Comparison of the diagnostic yield of","volume":"59","author":"Hong","year":"2012","journal-title":"Hepato-Gastroenterology"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2022.3176356"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1702.01992"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00360"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"ref68","article-title":"Benchmarking neural network robustness to common corruptions and surface variations","author":"Hendrycks","year":"2018","journal-title":"arXiv:1807.01697"}],"container-title":["IEEE Transactions on Automation Science and Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/8856\/10839176\/10843755.pdf?arnumber=10843755","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,14]],"date-time":"2025-04-14T17:42:18Z","timestamp":1744652538000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10843755\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":68,"URL":"https:\/\/doi.org\/10.1109\/tase.2025.3530791","relation":{},"ISSN":["1545-5955","1558-3783"],"issn-type":[{"type":"print","value":"1545-5955"},{"type":"electronic","value":"1558-3783"}],"subject":[],"published":{"date-parts":[[2025]]}}}