{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,2]],"date-time":"2025-12-02T15:08:34Z","timestamp":1764688114521},"reference-count":55,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,8,19]],"date-time":"2024-08-19T00:00:00Z","timestamp":1724025600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,8,19]],"date-time":"2024-08-19T00:00:00Z","timestamp":1724025600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1007\/s00138-024-01586-4","type":"journal-article","created":{"date-parts":[[2024,8,19]],"date-time":"2024-08-19T13:02:42Z","timestamp":1724072562000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Dyna-MSDepth: multi-scale self-supervised monocular depth estimation network for visual SLAM in dynamic scenes"],"prefix":"10.1007","volume":"35","author":[{"given":"Jianjun","family":"Yao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yingzhao","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiajia","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,8,19]]},"reference":[{"key":"1586_CR1","doi-asserted-by":"publisher","unstructured":"Guillaume, T., Evangeline, P., Benazouz, B., et al.: On line mapping and global positioning for autonomous driving in urban environment based on evidential SLAM. In: Paper Presented at the IEEE Intelligent Vehicles Symposium, Seoul, South Korea, 28 June\u20131 July (2015). https:\/\/doi.org\/10.1109\/IVS.2015.7225785","DOI":"10.1109\/IVS.2015.7225785"},{"issue":"6","key":"1586_CR2","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1016\/j.isprsjprs.2022.03.018","volume":"188","author":"E Mostafa","year":"2022","unstructured":"Mostafa, E., Rongjun, Q.: Cross-view slam solver: global pose estimation of monocular ground-level video frames for 3d reconstruction using a reference 3d model from satellite images. ISPRS J. Photogramm. Remote. Sens. 188(6), 62\u201374 (2022). https:\/\/doi.org\/10.1016\/j.isprsjprs.2022.03.018","journal-title":"ISPRS J. Photogramm. Remote. Sens."},{"key":"1586_CR3","doi-asserted-by":"publisher","unstructured":"Kumar, R.S., Singh, C.D., Ziad, A.-H., et al.: Poni: potential functions for objectgoal navigation with interaction-free learning. In: Paper Presented at the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, New Orleans, USA, 19\u201324 June (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01832","DOI":"10.1109\/CVPR52688.2022.01832"},{"key":"1586_CR4","doi-asserted-by":"publisher","unstructured":"Qi, L., Yue, W., Yilun, W., et al.: Hdmapnet: an online hd map construction and evaluation framework. In: Paper Presented at the International Conference on Robotics and Automation, Philadelphia, USA, 23\u201327 May (2022). https:\/\/doi.org\/10.1109\/icra46639.2022.9812383","DOI":"10.1109\/icra46639.2022.9812383"},{"issue":"12","key":"1586_CR5","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1016\/j.robot.2017.09.010","volume":"98","author":"Y Georges","year":"2017","unstructured":"Georges, Y., Daniel, A., Elie, S., et al.: Keyframe-based monocular slam: design, survey, and future directions. Robot. Auton. Syst. 98(12), 67\u201388 (2017). https:\/\/doi.org\/10.1016\/j.robot.2017.09.010","journal-title":"Robot. Auton. Syst."},{"key":"1586_CR6","doi-asserted-by":"publisher","unstructured":"Hanwei, Z., Hideaki, U., Shintaro, O., et al.: MOTSLAM: MOT-assisted monocular dynamic SLAM using single-view depth estimation. In: Paper Presented at the IEEE\/RSJ International Conference on Intelligent Robots and Systems, Kyoto, Japan, 23\u201317 October (2022). https:\/\/doi.org\/10.1109\/IROS47612.2022.9982280","DOI":"10.1109\/IROS47612.2022.9982280"},{"issue":"6","key":"1586_CR7","doi-asserted-by":"publisher","first-page":"1874","DOI":"10.1109\/TRO.2021.3075644","volume":"37","author":"C Carlos","year":"2021","unstructured":"Carlos, C., Richard, E., G\u00f3mez, R.J.J., et al.: Orb-slam3: an accurate open-source library for visual, visual-inertial, and multimap slam. IEEE Trans. Robot. 37(6), 1874\u20131890 (2021). https:\/\/doi.org\/10.1109\/TRO.2021.3075644","journal-title":"IEEE Trans. Robot."},{"issue":"4","key":"1586_CR8","doi-asserted-by":"publisher","first-page":"8721","DOI":"10.1109\/LRA.2022.3188118","volume":"7","author":"G Riccardo","year":"2022","unstructured":"Riccardo, G., Wolfgang, S., Armin, W., et al.: Challenges of slam in extremely unstructured environments: the DLR planetary stereo, solid-state lidar, inertial dataset. IEEE Robot. Autom. Lett. 7(4), 8721\u20138728 (2022). https:\/\/doi.org\/10.1109\/LRA.2022.3188118","journal-title":"IEEE Robot. Autom. Lett."},{"key":"1586_CR9","doi-asserted-by":"publisher","unstructured":"Xiaoyang, L., Liang, L., Mengmeng, W., et al.: Hr-depth: high resolution self-supervised monocular depth estimation. In: Paper Presented at the AAAI Conference on Artificial Intelligence, Vancouver, Canada, 2\u20139 February (2021). https:\/\/doi.org\/10.1609\/aaai.v35i3.16329","DOI":"10.1609\/aaai.v35i3.16329"},{"key":"1586_CR10","doi-asserted-by":"publisher","unstructured":"Cl\u00e9ment, G., Oisin, M.A., Michael, F., et al.: Digging into self-supervised monocular depth estimation. In: Paper Presented at the IEEE\/CVF International Conference on Computer Vision, Seoul, Korea, 27 October\u20132 November (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00393","DOI":"10.1109\/ICCV.2019.00393"},{"issue":"9","key":"1586_CR11","doi-asserted-by":"publisher","first-page":"2548","DOI":"10.1007\/s11263-021-01484-6","volume":"129","author":"B JiaWang","year":"2021","unstructured":"JiaWang, B., Huangying, Z., Naiyan, W., et al.: Unsupervised scale-consistent depth learning from video. Int. J. Comput. Vis. 129(9), 2548\u20132564 (2021). https:\/\/doi.org\/10.1007\/s11263-021-01484-6","journal-title":"Int. J. Comput. Vis."},{"issue":"1","key":"1586_CR12","doi-asserted-by":"publisher","first-page":"497","DOI":"10.1109\/TPAMI.2023.3322549","volume":"46","author":"L Sun","year":"2023","unstructured":"Sun, L., Bian, J., Zhan, H., et al.: Sc-depthv3: robust self-supervised monocular depth estimation for dynamic scenes. IEEE Trans. Pattern Anal. Mach. Intell. 46(1), 497\u2013508 (2023). https:\/\/doi.org\/10.1109\/TPAMI.2023.3322549","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"12","key":"1586_CR13","doi-asserted-by":"publisher","first-page":"9802","DOI":"10.1109\/TPAMI.2021.3136220","volume":"44","author":"B JiaWang","year":"2021","unstructured":"JiaWang, B., Huangying, Z., Naiyan, W., et al.: Auto-rectify network for unsupervised indoor depth estimation. IEEE Trans. Pattern Anal. Mach. Intell. 44(12), 9802\u20139813 (2021). https:\/\/doi.org\/10.1109\/TPAMI.2021.3136220","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"3","key":"1586_CR14","doi-asserted-by":"publisher","first-page":"734","DOI":"10.1109\/TRO.2019.2899783","volume":"35","author":"G-O Ruben","year":"2019","unstructured":"Ruben, G.-O., Francisco-Angel, M., David, Z.-N., et al.: Pl-slam: a stereo slam system through the combination of points and line segments. IEEE Trans. Robot. 35(3), 734\u2013746 (2019). https:\/\/doi.org\/10.1109\/TRO.2019.2899783","journal-title":"IEEE Trans. Robot."},{"key":"1586_CR15","doi-asserted-by":"publisher","unstructured":"Sturm, J., Nikolas, E., Felix, E., et al.: A benchmark for the evaluation of RGB-D SLAM systems. In: Paper Presented at the IEEE\/RSJ International Conference on Intelligent Robots and Systems, Portugal, 12\u201315 October (2012). https:\/\/doi.org\/10.1109\/IROS.2012.6385773","DOI":"10.1109\/IROS.2012.6385773"},{"issue":"6","key":"1586_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2022.3190031","volume":"71","author":"Y Jun","year":"2022","unstructured":"Jun, Y., Dongting, L., Fei, Y., et al.: A novel lidar-assisted monocular visual slam framework for mobile robots in outdoor environments. IEEE Trans. Instrum. Meas. 71(6), 1\u201311 (2022). https:\/\/doi.org\/10.1109\/TIM.2022.3190031","journal-title":"IEEE Trans. Instrum. Meas."},{"issue":"2","key":"1586_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3758\/s13428-022-01941-1","volume":"7","author":"K Ayush","year":"2022","unstructured":"Ayush, K., Shrinivas, P., Eli, P., et al.: Comparison of visual SLAM and IMU in tracking head movement outdoors. Behav. Res. Methods 7(2), 1\u201313 (2022). https:\/\/doi.org\/10.3758\/s13428-022-01941-1","journal-title":"Behav. Res. Methods"},{"issue":"8","key":"1586_CR18","doi-asserted-by":"publisher","first-page":"134543","DOI":"10.1109\/ACCESS.2021.3116380","volume":"9","author":"TJ Luke","year":"2021","unstructured":"Luke, T.J., Lam, P.S., Abdesselam, B.: D-net: a generalised and optimised deep network for monocular depth estimation. IEEE Access 9(8), 134543\u2013134555 (2021). https:\/\/doi.org\/10.1109\/ACCESS.2021.3116380","journal-title":"IEEE Access"},{"issue":"5","key":"1586_CR19","doi-asserted-by":"publisher","first-page":"1255","DOI":"10.1109\/TRO.2017.2705103","volume":"33","author":"M-A Raul","year":"2017","unstructured":"Raul, M.-A., Tardos, J.D.: Orb-slam2: an open-source slam system for monocular, stereo, and RGB-D cameras. IEEE Trans. Robot. 33(5), 1255\u20131262 (2017). https:\/\/doi.org\/10.1109\/TRO.2017.2705103","journal-title":"IEEE Trans. Robot."},{"key":"1586_CR20","doi-asserted-by":"publisher","unstructured":"Huangying, Z., Saroj, W.C., Jia-Wang, B., et al.: Visual odometry revisited: what should be learnt? In: Paper Presented at the IEEE International Conference on Robotics and Automation, Xian, China, 31 May\u20135 June (2020). https:\/\/doi.org\/10.1109\/ICRA40945.2020.9197374","DOI":"10.1109\/ICRA40945.2020.9197374"},{"key":"1586_CR21","doi-asserted-by":"publisher","unstructured":"Dingfu, Z., Yuchao, D., Hongdong, L.: Reliable scale estimation and correction for monocular visual odometry. In: Paper Presented at the IEEE Intelligent Vehicles Symposium, Gothenburg, Sweden, 19\u201322 June (2016). https:\/\/doi.org\/10.1109\/IVS.2016.7535431","DOI":"10.1109\/IVS.2016.7535431"},{"key":"1586_CR22","doi-asserted-by":"publisher","unstructured":"Fran\u00e7ani, A.O., Maximo, M.R.O.A.: Dense prediction transformer for scale estimation in monocular visual odometry. In: Paper Presented at the Latin American Robotics Symposium, S\u00e3o Bernardo do Campo, Brazil, 18\u201321 October (2022). https:\/\/doi.org\/10.1109\/LARS\/SBR\/WRE56824.2022.9995735","DOI":"10.1109\/LARS\/SBR\/WRE56824.2022.9995735"},{"key":"1586_CR23","doi-asserted-by":"publisher","unstructured":"Danpeng, C., Shuai, W., Weijian, X., et al.: VIP-SLAM: an efficient tightly-coupled RGB-D visual inertial planar SLAM. In: Paper Presented at the IEEE International Conference on Robotics and Automation, Philadelphia, USA, 23\u201327 May (2022). https:\/\/doi.org\/10.1109\/ICRA46639.2022.9812354","DOI":"10.1109\/ICRA46639.2022.9812354"},{"key":"1586_CR24","doi-asserted-by":"publisher","unstructured":"Wei, Y., Yifan, L., Chunhua, S., et al.: Enforcing geometric constraints of virtual normal for depth prediction. In: Paper Presented at the IEEE\/CVF International Conference on Computer Vision, Seoul, Korea, 27 October\u201302 November (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00578","DOI":"10.1109\/ICCV.2019.00578"},{"key":"1586_CR25","doi-asserted-by":"publisher","unstructured":"Lam, H., Phong, N.-H., Jiri, M., et al.: Guiding monocular depth estimation using depth-attention volume. In: Paper Presented at the European Conference on Computer Vision, Glasgow, US, 23\u201327 August (2020). https:\/\/doi.org\/10.1007\/978-3-030-58574-7_35","DOI":"10.1007\/978-3-030-58574-7_35"},{"key":"1586_CR26","doi-asserted-by":"publisher","unstructured":"Matteo, P., Filippo, A., Fabio, T., et al.: On the uncertainty of self-supervised monocular depth estimation. In: Paper Presented at the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Seattle, USA, 13\u201319 June (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.00329","DOI":"10.1109\/CVPR42600.2020.00329"},{"key":"1586_CR27","doi-asserted-by":"publisher","unstructured":"Marvin, K., Jan-Aike, T., Jonas, M., et al.: Self-supervised monocular depth estimation: solving the dynamic object problem by semantic guidance. In: Paper Presented at the European Conference on Computer Vision, Glasgow, US, 23\u201327 August (2020). https:\/\/doi.org\/10.1007\/978-3-030-58565-5_35","DOI":"10.1007\/978-3-030-58565-5_35"},{"key":"1586_CR28","doi-asserted-by":"publisher","unstructured":"Cheng, Z., James Chenhao\u00a0Liang, G.T., et al.: Adversarial training of self-supervised monocular depth estimation against physical-world attacks. In: Paper Presented at the Eleventh International Conference on Learning Representations, Kigali, Rwanda, 01\u201305 May (2023). https:\/\/doi.org\/10.48550\/arXiv.2301.13487","DOI":"10.48550\/arXiv.2301.13487"},{"key":"1586_CR29","doi-asserted-by":"publisher","unstructured":"Cheng, Z., James\u00a0Liang, H.C., et al.: Physical attack on monocular depth estimation with optimal adversarial patches. In: Paper Presented at the European Conference on Computer Vision, Tel Aviv, Israel, 23\u201327 October (2022). https:\/\/doi.org\/10.1007\/978-3-031-19839-7_30","DOI":"10.1007\/978-3-031-19839-7_30"},{"key":"1586_CR30","doi-asserted-by":"publisher","unstructured":"Cheng, Z., Hongjun\u00a0Choi, J.L., et al.: Fusion is not enough: single modal attacks on fusion models for 3D object detection. In: Paper Presented at the Eleventh International Conference on Learning Representations, Vienna, Austria, 07\u201311 May (2024). https:\/\/doi.org\/10.48550\/arXiv.2304.14614","DOI":"10.48550\/arXiv.2304.14614"},{"key":"1586_CR31","doi-asserted-by":"publisher","unstructured":"Chao, Y., Zuxin, L., Xin-Jun, L., et al.: DS-SLAM: a semantic visual SLAM towards dynamic environments. In: Paper Presented at the IEEE\/RSJ International Conference on Intelligent Robots and Systems, Madrid, Spain, 01\u201305 October (2018). https:\/\/doi.org\/10.1109\/IROS.2018.8593691","DOI":"10.1109\/IROS.2018.8593691"},{"issue":"4","key":"1586_CR32","doi-asserted-by":"publisher","first-page":"4076","DOI":"10.1109\/LRA.2018.2860039","volume":"3","author":"B Berta","year":"2018","unstructured":"Berta, B., F\u00e1cil, J.M., Javier, C., et al.: Dynaslam: tracking, mapping, and inpainting in dynamic scenes. IEEE Robot. Autom. Lett. 3(4), 4076\u20134083 (2018). https:\/\/doi.org\/10.1109\/LRA.2018.2860039","journal-title":"IEEE Robot. Autom. Lett."},{"issue":"1","key":"1586_CR33","doi-asserted-by":"publisher","first-page":"95301","DOI":"10.1109\/ACCESS.2020.2994348","volume":"8","author":"C Linyan","year":"2020","unstructured":"Linyan, C., Chaowei, M.: Sdf-slam: semantic depth filter slam for dynamic environments. IEEE Access 8(1), 95301\u201395311 (2020). https:\/\/doi.org\/10.1109\/ACCESS.2020.2994348","journal-title":"IEEE Access"},{"issue":"4","key":"1586_CR34","doi-asserted-by":"publisher","first-page":"9573","DOI":"10.1109\/LRA.2022.3191193","volume":"7","author":"L Jianheng","year":"2022","unstructured":"Jianheng, L., Xuanfu, L., Yueqian, L., et al.: RGB-D inertial odometry for a resource-restricted robot in dynamic environments. IEEE Robot. Autom. Lett. 7(4), 9573\u20139580 (2022). https:\/\/doi.org\/10.1109\/LRA.2022.3191193","journal-title":"IEEE Robot. Autom. Lett."},{"key":"1586_CR35","doi-asserted-by":"publisher","unstructured":"Shihao, S., Yilin, C., Wenshan, W., et al.: DytanVO: joint refinement of visual odometry and motion segmentation in dynamic environments. In: Paper Presented at the IEEE International Conference on Robotics and Automation, London, United Kingdom, 29 May\u201302 June (2023). https:\/\/doi.org\/10.1109\/ICRA48891.2023.10161306","DOI":"10.1109\/ICRA48891.2023.10161306"},{"issue":"3","key":"1586_CR36","doi-asserted-by":"publisher","first-page":"5191","DOI":"10.1109\/LRA.2021.3068640","volume":"6","author":"B Berta","year":"2021","unstructured":"Berta, B., Carlos, C., Tard\u00f3s, J.D., et al.: Dynaslam II: tightly-coupled multi-object tracking and slam. IEEE Robot. Autom. Lett. 6(3), 5191\u20135198 (2021). https:\/\/doi.org\/10.1109\/LRA.2021.3068640","journal-title":"IEEE Robot. Autom. Lett."},{"key":"1586_CR37","doi-asserted-by":"publisher","unstructured":"Yanwei, P., Tiancai, W., Muhammad, A.R., et al.: Efficient featurized image pyramid network for single shot detector. In: Paper Presented at the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Long Beach, USA, 15\u201320 June (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00751","DOI":"10.1109\/CVPR.2019.00751"},{"key":"1586_CR38","doi-asserted-by":"publisher","unstructured":"Gouthamaan, M., Swaminathan, J.: Focal-WNet: an architecture unifying convolution and attention for depth estimation. In: Paper Presented at the IEEE 7th International conference for Convergence in Technology, Mumbai, India, 07\u201309 April (2022). https:\/\/doi.org\/10.1109\/I2CT54291.2022.9824488","DOI":"10.1109\/I2CT54291.2022.9824488"},{"key":"1586_CR39","doi-asserted-by":"publisher","unstructured":"Junjie, K., Qifei, W., Yilin, W., et al.: Musiq: multi-scale image quality transformer. In: Paper Presented at the IEEE\/CVF International Conference on Computer Vision, Montreal, Canada, 10\u201317 October (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.00510","DOI":"10.1109\/ICCV48922.2021.00510"},{"issue":"8","key":"1586_CR40","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.patcog.2021.108348","volume":"122","author":"Y Lina","year":"2022","unstructured":"Lina, Y., Fengqi, Z., Shen-Pei, W.P., et al.: Multi-scale spatial-spectral fusion based on multi-input fusion calculation and coordinate attention for hyperspectral image classification. Pattern Recogn. 122(8), 1\u201313 (2022). https:\/\/doi.org\/10.1016\/j.patcog.2021.108348","journal-title":"Pattern Recogn."},{"issue":"2","key":"1586_CR41","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1016\/j.neucom.2021.10.076","volume":"469","author":"L Peng","year":"2022","unstructured":"Peng, L., Tran, T.C., Bin, K., et al.: Cada: multi-scale collaborative adversarial domain adaptation for unsupervised optic disc and cup segmentation. Neurocomputing 469(2), 209\u2013220 (2022). https:\/\/doi.org\/10.1016\/j.neucom.2021.10.076","journal-title":"Neurocomputing"},{"issue":"12","key":"1586_CR42","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1007\/s11063-021-10620-9","volume":"51","author":"JA Kumar","year":"2022","unstructured":"Kumar, J.A., Rajeev, S.: Detection of copy-move forgery in digital image using multi-scale, multi-stage deep learning model. Neural Process. Lett. 51(12), 75\u2013100 (2022). https:\/\/doi.org\/10.1007\/s11063-021-10620-9","journal-title":"Neural Process. Lett."},{"issue":"6","key":"1586_CR43","doi-asserted-by":"publisher","first-page":"12556","DOI":"10.1007\/s10489-021-03121-8","volume":"52","author":"Z Xinxin","year":"2022","unstructured":"Xinxin, Z., Long, Z.: Sa-fpn: an effective feature pyramid network for crowded human detection. Appl. Intell. 52(6), 12556\u201312568 (2022). https:\/\/doi.org\/10.1007\/s10489-021-03121-8","journal-title":"Appl. Intell."},{"issue":"8","key":"1586_CR44","doi-asserted-by":"publisher","first-page":"15547","DOI":"10.1007\/s10489-022-03220-0","volume":"52","author":"L Yuancheng","year":"2022","unstructured":"Yuancheng, L., Shenglong, Z., Hui, C.: Attention-based fusion factor in fpn for object detection. Appl. Intell. 52(8), 15547\u201315556 (2022). https:\/\/doi.org\/10.1007\/s10489-022-03220-0","journal-title":"Appl. Intell."},{"key":"1586_CR45","doi-asserted-by":"publisher","unstructured":"Ravi, G., Kumar, B.V., Gustavo, C., et al.: Unsupervised cnn for single view depth estimation: geometry to the rescue. In: Paper Presented at the European Conference on Computer Vision, Amsterdam, Netherlands, 10\u201316 October (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_45","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"1586_CR46","doi-asserted-by":"publisher","unstructured":"Tinghui, Z., Matthew, B., Noah, S., et al.: Unsupervised learning of depth and ego-motion from video. In: Paper Presented at the IEEE Conference on Computer Vision and Pattern Recognition, Honolulu, USA, 21\u201326 July (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.700","DOI":"10.1109\/CVPR.2017.700"},{"issue":"14","key":"1586_CR47","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.image.2023.116921","volume":"112","author":"W Zige","year":"2023","unstructured":"Zige, W., Zhen, C., Congxuan, Z., et al.: Lcif-net: local criss-cross attention based optical flow method using multi-scale image features and feature pyramid. Signal Process. Image Commun. 112(14), 1\u201313 (2023). https:\/\/doi.org\/10.1016\/j.image.2023.116921","journal-title":"Signal Process. Image Commun."},{"key":"1586_CR48","doi-asserted-by":"publisher","unstructured":"Dong, N., Rui, L., Ling, W., et al.: Pyramid architecture for multi-scale processing in point cloud segmentation. In: Paper Presented at the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, New Orleans, USA, 18\u201324 June (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01677","DOI":"10.1109\/CVPR52688.2022.01677"},{"issue":"4","key":"1586_CR49","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1778765.1778862","volume":"29","author":"S Kalyan","year":"2010","unstructured":"Kalyan, S., Johnson, M.K., Wojciech, M., et al.: Multi-scale image harmonization. ACM Trans. Graph. 29(4), 1\u201310 (2010). https:\/\/doi.org\/10.1145\/1778765.1778862","journal-title":"ACM Trans. Graph."},{"issue":"4","key":"1586_CR50","doi-asserted-by":"publisher","first-page":"10353","DOI":"10.48550\/arXiv.2207.14284","volume":"35","author":"R Yongming","year":"2022","unstructured":"Yongming, R., Wenliang, Z., Yansong, T., et al.: Hornet: efficient high-order spatial interactions with recursive gated convolutions. Adv. Neural Inf. Process. Syst. 35(4), 10353\u201310366 (2022). https:\/\/doi.org\/10.48550\/arXiv.2207.14284","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1586_CR51","doi-asserted-by":"publisher","unstructured":"Sanghyun, W., Shoubhik, D., Ronghang, H., et al.: Convnext v2: co-designing and scaling convnets with masked autoencoders, pp. 1\u201316 (2023) arXiv:2301.00808. https:\/\/doi.org\/10.48550\/arXiv.2301.00808","DOI":"10.48550\/arXiv.2301.00808"},{"key":"1586_CR52","doi-asserted-by":"publisher","unstructured":"Ding, X., Zhang, X., Zhou, Y., et al.: Scaling up your kernels to 31$$\\times $$31: revisiting large kernel design in CNNs. In: Paper Presented at the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, New Orleans, USA, 18\u201324 June (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01166","DOI":"10.1109\/CVPR52688.2022.01166"},{"key":"1586_CR53","doi-asserted-by":"publisher","unstructured":"Cl\u00e9ment, G., Oisin, M.A., Michael, F., et al.: Digging into self-supervised monocular depth estimation. In: Paper Presented at the IEEE\/CVF International Conference on Computer Vision, Seoul, Korea, 27 October\u201302 November (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00393","DOI":"10.1109\/ICCV.2019.00393"},{"key":"1586_CR54","doi-asserted-by":"publisher","unstructured":"Wei, Y., Jianming, Z., Oliver, W., et al.: Learning to recover 3d scene shape from a single image. In: Paper Presented at the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Nashville, USA, 20\u201325 June (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.00027","DOI":"10.1109\/CVPR46437.2021.00027"},{"key":"1586_CR55","doi-asserted-by":"publisher","unstructured":"John, L., Zhuang, L., Ozan, S., et al.: MSeg: a composite dataset for multi-domain semantic segmentation. In: Paper Presented at the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Seattle, USA, 13\u201319 June (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.00295","DOI":"10.1109\/CVPR42600.2020.00295"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01586-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-024-01586-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01586-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T04:06:44Z","timestamp":1726027604000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-024-01586-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,19]]},"references-count":55,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,9]]}},"alternative-id":["1586"],"URL":"https:\/\/doi.org\/10.1007\/s00138-024-01586-4","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"type":"print","value":"0932-8092"},{"type":"electronic","value":"1432-1769"}],"subject":[],"published":{"date-parts":[[2024,8,19]]},"assertion":[{"value":"21 May 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 March 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 July 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 August 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"The research did not involves human participants or animals.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}}],"article-number":"115"}}