{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:46:05Z","timestamp":1765547165269,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Shanghai Municipal Science and Technology Major Project","award":["2021SHZDZX0102"],"award-info":[{"award-number":["2021SHZDZX0102"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62173283, 61977046"],"award-info":[{"award-number":["62173283, 61977046"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key R&D Program of China","award":["2021ZD0112600"],"award-info":[{"award-number":["2021ZD0112600"]}]},{"name":"Shanghai Science and Technology Program","award":["21JC1400600"],"award-info":[{"award-number":["21JC1400600"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612042","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:12Z","timestamp":1698391632000},"page":"2553-2563","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Multi-Frame Self-Supervised Depth Estimation with Multi-Scale Feature Fusion in Dynamic Scenes"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8558-4388","authenticated-orcid":false,"given":"Jiquan","family":"Zhong","sequence":"first","affiliation":[{"name":"Xiamen University, Xiamen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4285-6520","authenticated-orcid":false,"given":"Xiaolin","family":"Huang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University &amp; Key Laboratory of System Control and Information Processing, Ministry of Education of China, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2564-645X","authenticated-orcid":false,"given":"Xiao","family":"Yu","sequence":"additional","affiliation":[{"name":"Xiamen University &amp; Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Bian Jiawang","year":"2019","unstructured":"Jiawang Bian, Zhichao Li, Naiyan Wang, Huangying Zhan, Chunhua Shen, Ming-Ming Cheng, and Ian Reid. 2019. Unsupervised scale-consistent depth and ego-motion learning from monocular video. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00716"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.304"},{"key":"e_1_3_2_1_5_1","volume-title":"Tel Aviv","author":"Feng Ziyue","year":"2022","unstructured":"Ziyue Feng, Liang Yang, Longlong Jing, Haiyan Wang, YingLi Tian, and Bing Li. 2022. Disentangling Object Motion and Occlusion for Unsupervised Multi-frame Monocular Depth. In Computer Vision--ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23-27, 2022, Proceedings, Part XXXII. Springer, 228--244."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00393"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00907"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00256"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00026"},{"key":"e_1_3_2_1_12_1","volume-title":"Semantically-Guided Representation Learning for Self-Supervised Monocular Depth. In International Conference on Learning Representations.","author":"Guizilini Vitor","year":"2019","unstructured":"Vitor Guizilini, Rui Hou, Jie Li, Rares Ambrus, and Adrien Gaidon. 2019. Semantically-Guided Representation Learning for Self-Supervised Monocular Depth. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01261"},{"volume-title":"Multiple view geometry in computer vision","author":"Hartley Richard","key":"e_1_3_2_1_14_1","unstructured":"Richard Hartley and Andrew Zisserman. 2003. Multiple view geometry in computer vision. Cambridge University Press."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19812-0_33"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2007.1166"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00172"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341074"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00481"},{"key":"e_1_3_2_1_21_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16281"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00482"},{"key":"e_1_3_2_1_24_1","volume-title":"Proceedings of the Conference on Robot Learning. PMLR","author":"Li Hanhan","year":"2021","unstructured":"Hanhan Li, Ariel Gordon, Hang Zhao, Vincent Casser, and Anelia Angelova. 2021. Unsupervised monocular depth learning in dynamic scenes. In Proceedings of the Conference on Robot Learning. PMLR, 1908--1917."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01445-z"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126513"},{"key":"e_1_3_2_1_27_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3017478"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01252"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00092"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00012"},{"key":"e_1_3_2_1_32_1","volume-title":"Sfm-net: Learning of structure and motion from video. arXiv preprint arXiv:1704.07804","author":"Vijayanarasimhan Sudheendra","year":"2017","unstructured":"Sudheendra Vijayanarasimhan, Susanna Ricco, Cordelia Schmid, Rahul Sukthankar, and Katerina Fragkiadaki. 2017. Sfm-net: Learning of structure and motion from video. arXiv preprint arXiv:1704.07804 (2017)."},{"key":"e_1_3_2_1_33_1","volume-title":"3D Hierarchical Refinement and Augmentation for Unsupervised Learning of Depth and Pose from Monocular Video","author":"Wang Guangming","year":"2022","unstructured":"Guangming Wang, Jiquan Zhong, Shijie Zhao, Wenhua Wu, Zhe Liu, and Hesheng Wang. 2022. 3D Hierarchical Refinement and Augmentation for Unsupervised Learning of Depth and Pose from Monocular Video. IEEE Transactions on Circuits and Systems for Video Technology, Vol. DOI (2022), 10.1109\/TCSVT.2022.3215587."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2983686"},{"key":"e_1_3_2_1_35_1","volume-title":"Self-supervised joint learning framework of depth estimation via implicit cues. arXiv preprint arXiv:2006.09876","author":"Wang Jianrong","year":"2020","unstructured":"Jianrong Wang, Ge Zhang, Zhenyu Wu, XueWei Li, and Li Liu. 2020b. Self-supervised joint learning framework of depth estimation via implicit cues. arXiv preprint arXiv:2006.09876 (2020)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00570"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819861"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00122"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548394"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9340802"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00136"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00212"},{"key":"e_1_3_2_1_43_1","volume-title":"Self-Supervised Monocular Depth Estimation with Internal Feature Fusion. In British Machine Vision Conference (BMVC).","author":"Zhou Hang","year":"2021","unstructured":"Hang Zhou, David Greenwood, and Sarah Taylor. 2021. Self-Supervised Monocular Depth Estimation with Internal Feature Fusion. In British Machine Vision Conference (BMVC)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.700"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548381"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Ottawa ON Canada","acronym":"MM '23"},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612042","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612042","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:08:56Z","timestamp":1755821336000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612042"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":45,"alternative-id":["10.1145\/3581783.3612042","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612042","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}