{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T14:32:53Z","timestamp":1763389973053,"version":"3.44.0"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,6,2]],"date-time":"2025-06-02T00:00:00Z","timestamp":1748822400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,6,2]],"date-time":"2025-06-02T00:00:00Z","timestamp":1748822400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s11263-025-02438-y","type":"journal-article","created":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T22:47:23Z","timestamp":1748818043000},"page":"5483-5504","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["RGB-D Visual Perception for Occluded Scenes via Event Camera"],"prefix":"10.1007","volume":"133","author":[{"given":"Siqi","family":"Li","sequence":"first","affiliation":[]},{"given":"Zongze","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Yipeng","family":"Li","sequence":"additional","affiliation":[]},{"given":"Zhou","family":"Xue","sequence":"additional","affiliation":[]},{"given":"Yu-Shen","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Yue","family":"Gao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,2]]},"reference":[{"issue":"1","key":"2438_CR1","first-page":"361","volume":"44","author":"H Akolkar","year":"2020","unstructured":"Akolkar, H., Ieng, S.-H., & Benosman, R. (2020). Real-time high speed motion prediction using fast aperture-robust event-driven visual flow. IEEE Transactions on Pattern Analysis and Machine Intelligence, 44(1), 361\u2013372.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2438_CR2","doi-asserted-by":"crossref","unstructured":"Bae, G., Budvytis, I., & Cipolla, R. (2022). Multi-view depth estimation by fusing single-view depth probability with multi-view geometry. In IEEE Conference on Computer Vision and Pattern Recognition (pp. 2842\u20132851).","DOI":"10.1109\/CVPR52688.2022.00286"},{"issue":"9","key":"2438_CR3","doi-asserted-by":"publisher","first-page":"2548","DOI":"10.1007\/s11263-021-01484-6","volume":"129","author":"J-W Bian","year":"2021","unstructured":"Bian, J.-W., Zhan, H., Wang, N., Li, Z., Zhang, L., Shen, C., Cheng, M.-M., & Reid, I. (2021). Unsupervised scale-consistent depth learning from video. International Journal of Computer Vision, 129(9), 2548\u20132564.","journal-title":"International Journal of Computer Vision"},{"issue":"10","key":"2438_CR4","doi-asserted-by":"publisher","first-page":"2333","DOI":"10.1109\/JSSC.2014.2342715","volume":"49","author":"C Brandli","year":"2014","unstructured":"Brandli, C., Berner, R., Yang, M., Liu, S.-C., & Delbruck, T. (2014). A 240$$\\times $$ 180 130 dB 3 $$\\mu $$s latency global shutter spatiotemporal vision sensor. IEEE Journal of Solid-State Circuits, 49(10), 2333\u20132341.","journal-title":"IEEE Journal of Solid-State Circuits"},{"key":"2438_CR5","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., & Fei-Fei, L. (2009). ImageNet: A large-scale hierarchical image database. In IEEE Conference on Computer Vision and Pattern Recognition (pp. 248\u2013255). IEEE.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"2438_CR6","unstructured":"Eigen, D., Puhrsch, C., & Fergus, R. (2014). Depth map prediction from a single image using a multi-scale deep network. Advances in Neural Information Processing Systems27."},{"issue":"3","key":"2438_CR7","doi-asserted-by":"publisher","first-page":"3181","DOI":"10.1109\/TPAMI.2022.3182052","volume":"45","author":"Y Gao","year":"2023","unstructured":"Gao, Y., Feng, Y., Ji, S., & Ji, R. (2023). $$\\text{ HGNN}^+$$: General hypergraph neural networks. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(3), 3181\u20133199.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"3","key":"2438_CR8","doi-asserted-by":"publisher","first-page":"601","DOI":"10.1007\/s11263-019-01209-w","volume":"128","author":"D Gehrig","year":"2020","unstructured":"Gehrig, D., Rebecq, H., Gallego, G., & Scaramuzza, D. (2020). EKLT: Asynchronous photometric feature tracking using events and frames. International Journal of Computer Vision, 128(3), 601\u2013618.","journal-title":"International Journal of Computer Vision"},{"issue":"2","key":"2438_CR9","doi-asserted-by":"publisher","first-page":"2822","DOI":"10.1109\/LRA.2021.3060707","volume":"6","author":"D Gehrig","year":"2021","unstructured":"Gehrig, D., R\u00fcegg, M., Gehrig, M., Hidalgo-Carri\u00f3, J., & Scaramuzza, D. (2021). Combining events and frames using recurrent asynchronous multimodal networks for monocular depth prediction. IEEE Robotics and Automation Letters, 6(2), 2822\u20132829.","journal-title":"IEEE Robotics and Automation Letters"},{"key":"2438_CR10","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac\u00a0Aodha, O., & Brostow, G. J. (2017). Unsupervised monocular depth estimation with left-right consistency. In IEEE Conference on Computer Vision and Pattern Recognition (pp. 270\u2013279).","DOI":"10.1109\/CVPR.2017.699"},{"key":"2438_CR11","doi-asserted-by":"crossref","unstructured":"Hidalgo-Carri\u00f3, J., Gehrig, D., & Scaramuzza, D. (2020). Learning monocular dense depth from events. In International Conference 3D Vision (pp. 534\u2013542). IEEE.","DOI":"10.1109\/3DV50981.2020.00063"},{"key":"2438_CR12","doi-asserted-by":"crossref","unstructured":"Levin, A., Lischinski, D., & Weiss, Y. (2004). Colorization using optimization. In ACM SIGGRAPH (pp. 689\u2013694).","DOI":"10.1145\/1186562.1015780"},{"key":"2438_CR13","doi-asserted-by":"crossref","unstructured":"Li, S.-Q., Gao, Y., & Dai, Q.-H. (2022). Image De-occlusion via event-enhanced Multi-modal Fusion Hybrid Network. Machine Intelligence Research, 1\u201312.","DOI":"10.1007\/s11633-022-1350-3"},{"key":"2438_CR14","doi-asserted-by":"crossref","unstructured":"Liao, W., Zhang, X., Yu, L., Lin, S., Yang, W., & Qiao, N. (2022). Synthetic aperture imaging with events and frames. In: IEEE Conference on Computer Vision and Pattern Recognition (pp. 17735\u201317744).","DOI":"10.1109\/CVPR52688.2022.01721"},{"key":"2438_CR15","doi-asserted-by":"crossref","unstructured":"Liu, G., Reda, F. A., Shih, K. J., Wang, T.-C., Tao, A., & Catanzaro, B. (2018). Image inpainting for irregular holes using partial convolutions. In European Conference on Computer Vision (pp. 85\u2013100).","DOI":"10.1007\/978-3-030-01252-6_6"},{"issue":"10","key":"2438_CR16","doi-asserted-by":"publisher","first-page":"2024","DOI":"10.1109\/TPAMI.2015.2505283","volume":"38","author":"F Liu","year":"2015","unstructured":"Liu, F., Shen, C., Lin, G., & Reid, I. (2015). Learning depth from single monocular images using deep convolutional neural fields. IEEE Transactions on Pattern Analysis and Machine Intelligence, 38(10), 2024\u20132039.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2438_CR17","unstructured":"Loshchilov, I., & Hutter, F. (2017). SGDR: Stochastic gradient descent with warm restarts. In International Conference on Learning Representations."},{"key":"2438_CR18","unstructured":"Loshchilov, I., & Hutter, F. (2019). Decoupled weight decay regularization. IEEE Conference on Computer Vision and Pattern Recognition."},{"issue":"4","key":"2438_CR19","doi-asserted-by":"publisher","first-page":"900","DOI":"10.1007\/s11263-020-01410-2","volume":"129","author":"M Mostafavi","year":"2021","unstructured":"Mostafavi, M., Wang, L., & Yoon, K.-J. (2021). Learning to reconstruct HDR images from events, with applications to depth and flow prediction. International Journal of Computer Vision, 129(4), 900\u2013920.","journal-title":"International Journal of Computer Vision"},{"issue":"12","key":"2438_CR20","doi-asserted-by":"publisher","first-page":"1381","DOI":"10.1007\/s11263-018-1106-2","volume":"126","author":"G Munda","year":"2018","unstructured":"Munda, G., Reinbacher, C., & Pock, T. (2018). Real-time intensity-image reconstruction for event cameras using manifold regularisation. International Journal of Computer Vision, 126(12), 1381\u20131393.","journal-title":"International Journal of Computer Vision"},{"key":"2438_CR21","doi-asserted-by":"crossref","unstructured":"Patni, S., Agarwal, A., & Arora, C. (2024). ECoDepth: Effective conditioning of diffusion models for monocular depth estimation. In IEEE Conference on Computer Vision and Pattern Recognition (pp. 28285\u201328295).","DOI":"10.1109\/CVPR52733.2024.02672"},{"issue":"2","key":"2438_CR22","doi-asserted-by":"crossref","first-page":"566","DOI":"10.1109\/JSSC.2007.914337","volume":"43","author":"L Patrick","year":"2008","unstructured":"Patrick, L., Christoph, P., & Tobi, D. (2008). A 128$$\\times $$ 128 120 dB 15 $$\\mu $$s latency asynchronous temporal contrast vision sensor. IEEE Journal of Solid-State Circuits, 43(2), 566\u2013576.","journal-title":"IEEE Journal of Solid-State Circuits"},{"issue":"1","key":"2438_CR23","doi-asserted-by":"publisher","first-page":"174","DOI":"10.1016\/j.patcog.2012.06.014","volume":"46","author":"Z Pei","year":"2013","unstructured":"Pei, Z., Zhang, Y., Chen, X., & Yang, Y.-H. (2013). Synthetic aperture imaging using pixel labeling via energy minimization. Pattern Recognition, 46(1), 174\u2013187.","journal-title":"Pattern Recognition"},{"issue":"12","key":"2438_CR24","doi-asserted-by":"publisher","first-page":"1394","DOI":"10.1007\/s11263-017-1050-6","volume":"126","author":"H Rebecq","year":"2018","unstructured":"Rebecq, H., Gallego, G., Mueggler, E., & Scaramuzza, D. (2018). EMVS: Event-based multi-view stereo-3D reconstruction with an event camera in real-time. International Journal of Computer Vision, 126(12), 1394\u20131414.","journal-title":"International Journal of Computer Vision"},{"key":"2438_CR25","unstructured":"Rebecq, H., Ranftl, R., Koltun, V., & Scaramuzza, D. (2019). High speed and high dynamic range video with an event camera. IEEE Transactions on Pattern Analysis and Machine Intelligence."},{"key":"2438_CR26","doi-asserted-by":"crossref","unstructured":"Silberman, N., Hoiem, D., Kohli, P., & Fergus, R. (2012). Indoor segmentation and support inference from RGBD images. In The European Conference on Computer Vision (pp. 746\u2013760). Springer.","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"2438_CR27","unstructured":"Simonyan, K., & Zisserman, A. (2015). Very deep convolutional networks for large-scale image recognition. IEEE Conference on Computer Vision and Pattern Recognition."},{"key":"2438_CR28","unstructured":"Sobel, I., Feldman, G., et al. (1968). A 3$$\\times $$3 isotropic gradient operator for image processing. A Talk at the Stanford Artificial Project in (pp. 271\u2013272)."},{"key":"2438_CR29","doi-asserted-by":"crossref","unstructured":"Su, Z., Liu, W., Yu, Z., Hu, D., Liao, Q., Tian, Q., Pietik\u00e4inen, M., & Liu, L. (2021). Pixel difference networks for efficient edge detection. In International Conference on Computer Vision (pp. 5117\u20135127).","DOI":"10.1109\/ICCV48922.2021.00507"},{"key":"2438_CR30","unstructured":"Teed, Z., & Deng, J. (2020). DeepV2D: Video to depth with differentiable structure from motion. The International Conference on Learning Representations."},{"key":"2438_CR31","doi-asserted-by":"crossref","unstructured":"Vaish, V., Levoy, M., Szeliski, R., Zitnick, C. L., & Kang, S. B. (2006). Reconstructing occluded surfaces using synthetic apertures: Stereo, focus and robust measures. In IEEE Conference on Computer Vision and Pattern Recognition (Vol. 2, pp. 2331\u20132338). IEEE.","DOI":"10.1109\/CVPR.2006.244"},{"key":"2438_CR32","doi-asserted-by":"crossref","unstructured":"Vaish, V., Wilburn, B., Joshi, N., & Levoy, M. (2004). Using plane + parallax for calibrating dense camera arrays. In IEEE Conference on Computer Vision and Pattern Recognition (Vol. 1, p. IEEE)","DOI":"10.1109\/CVPR.2004.1315006"},{"key":"2438_CR33","doi-asserted-by":"crossref","unstructured":"Wang, P., Shen, X., Lin, Z., Cohen, S., Price, B., & Yuille, A. L. (2015). Towards unified depth and semantic prediction from a single image. In IEEE Conference on Computer Vision and Pattern Recognition (pp. 2800\u20132809).","DOI":"10.1109\/CVPR.2015.7298897"},{"key":"2438_CR34","doi-asserted-by":"crossref","unstructured":"Wang, Y., Pan, Z., Li, X., Cao, Z., Xian, K., & Zhang, J. (2022). Less is more: Consistent video depth estimation with masked frames modeling. In ACM International Conference on Multimedia (pp. 6347\u20136358).","DOI":"10.1145\/3503161.3547978"},{"key":"2438_CR35","doi-asserted-by":"crossref","unstructured":"Wang, Y., Wu, T., Yang, J., Wang, L., An, W., & Guo, Y. (2020). DeOccNet: Learning to see through foreground occlusions in light fields. In IEEE Winter Conference on Applications of Computer Vision (pp. 118\u2013127).","DOI":"10.1109\/WACV45572.2020.9093448"},{"key":"2438_CR36","doi-asserted-by":"crossref","unstructured":"Wei, X., Zhang, Y., Li, Z., Fu, Y., & Xue, X. (2020). DeepSfM: Structure from motion via deep bundle adjustment. In The European Conference on Computer Vision (pp. 230\u2013247). Springer.","DOI":"10.1007\/978-3-030-58452-8_14"},{"key":"2438_CR37","doi-asserted-by":"crossref","unstructured":"Xiao, Y., Li, L., Li, X., & Yao, J. (2022). DeepMLE: A robust deep maximum likelihood estimator for two-view structure from motion. In IEEE Conference on Intelligent Robots and Systems (pp. 10643\u201310650).","DOI":"10.1109\/IROS47612.2022.9981975"},{"key":"2438_CR38","doi-asserted-by":"crossref","unstructured":"Xue, Y., Li, H., Leutenegger, S., & St\u00fcckler, J. (2024). Event-based non-rigid reconstruction of low-rank parametrized deformations from contours. International Journal of Computer Vision 1\u201319.","DOI":"10.1007\/s11263-024-02011-z"},{"key":"2438_CR39","doi-asserted-by":"crossref","unstructured":"Yu, L., Zhang, X., Liao, W., Yang, W., & Xia, G.-S. (2023). Learning to see through with events. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(7), 8660\u20138678.","DOI":"10.1109\/TPAMI.2022.3227448"},{"key":"2438_CR40","doi-asserted-by":"crossref","unstructured":"Yuan, W., Gu, X., Dai, Z., Zhu, S., & Tan, P. (2022). NeWCRFs: Neural window fully-connected CRFs for monocular depth estimation. In IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR52688.2022.00389"},{"key":"2438_CR41","doi-asserted-by":"crossref","unstructured":"Zamir, S. W., Arora, A., Khan, S., Hayat, M., Khan, F. S., & Yang, M.-H. (2022). Restormer: Efficient transformer for high-resolution image restoration. In IEEE Conference on Computer Vision and Pattern Recognition (pp. 5728\u20135739).","DOI":"10.1109\/CVPR52688.2022.00564"},{"key":"2438_CR42","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A. A., Shechtman, E., & Wang, O. (2018). The unreasonable effectiveness of deep features as a perceptual metric. In IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2018.00068"},{"issue":"5","key":"2438_CR43","doi-asserted-by":"publisher","first-page":"1284","DOI":"10.1007\/s11263-023-01754-5","volume":"131","author":"C Zhou","year":"2023","unstructured":"Zhou, C., Teng, M., Han, J., Liang, J., Xu, C., Cao, G., & Shi, B. (2023). Deblurring low-light images with events. International Journal of Computer Vision, 131(5), 1284\u20131298.","journal-title":"International Journal of Computer Vision"},{"key":"2438_CR44","doi-asserted-by":"crossref","unstructured":"Zhu, S., & Liu, X. (2023). LightedDepth: Video depth estimation in light of limited inference view angles. In IEEE Conference on Computer Vision and Pattern Recognition (pp. 5003\u20135012).","DOI":"10.1109\/CVPR52729.2023.00484"},{"key":"2438_CR45","doi-asserted-by":"crossref","unstructured":"Zihao\u00a0Zhu, A., Yuan, L., Chaney, K., & Daniilidis, K. (2018). Unsupervised event-based optical flow using motion compensation. In European Conference on Computer Vision Workshops.","DOI":"10.1007\/978-3-030-11024-6_54"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02438-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02438-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02438-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T16:40:14Z","timestamp":1757176814000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02438-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,2]]},"references-count":45,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["2438"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02438-y","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"type":"print","value":"0920-5691"},{"type":"electronic","value":"1573-1405"}],"subject":[],"published":{"date-parts":[[2025,6,2]]},"assertion":[{"value":"23 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 March 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 June 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}