{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,18]],"date-time":"2026-02-18T01:51:20Z","timestamp":1771379480663,"version":"3.50.1"},"publisher-location":"Cham","reference-count":67,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031726484","type":"print"},{"value":"9783031726491","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72649-1_18","type":"book-chapter","created":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T07:01:50Z","timestamp":1727593310000},"page":"312-330","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Masked Video and\u00a0Body-Worn IMU Autoencoder for\u00a0Egocentric Action Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1792-6654","authenticated-orcid":false,"given":"Mingfang","family":"Zhang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8067-6227","authenticated-orcid":false,"given":"Yifei","family":"Huang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8460-8763","authenticated-orcid":false,"given":"Ruicong","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0097-4537","authenticated-orcid":false,"given":"Yoichi","family":"Sato","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,30]]},"reference":[{"issue":"1","key":"18_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3448083","volume":"5","author":"A Abedin","year":"2021","unstructured":"Abedin, A., Ehsanpour, M., Shi, Q., Rezatofighi, H., Ranasinghe, D.C.: Attend and discriminate: beyond the state-of-the-art for human activity recognition using wearable sensors. Proc. ACM Interact. Mobile Wearab. Ubiquit. Technol. 5(1), 1\u201322 (2021)","journal-title":"Proc. ACM Interact. Mobile Wearab. Ubiquit. Technol."},{"key":"18_CR2","doi-asserted-by":"crossref","unstructured":"Arnab, A., Dehghani, M., Heigold, G., Sun, C., Lu\u010di\u0107, M., Schmid, C.: Vivit: a video vision transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6836\u20136846 (2021)","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"18_CR3","doi-asserted-by":"publisher","unstructured":"Bansal, S., Arora, C., Jawahar, C.V.: My view is the best view: procedure learning from\u00a0egocentric videos. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022, Part XIII, pp. 657\u2013675. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19778-9_38","DOI":"10.1007\/978-3-031-19778-9_38"},{"key":"18_CR4","unstructured":"Bertasius, G., Wang, H., Torresani, L.: Is space-time attention all you need for video understanding? In: International Conference on Machine Learning, p.\u00a04 (2021)"},{"key":"18_CR5","doi-asserted-by":"crossref","unstructured":"Bock, M., H\u00f6lzemann, A., Moeller, M., Van\u00a0Laerhoven, K.: Improving deep learning for HAR with shallow LSTMs. In: Proceedings of the 2021 ACM International Symposium on Wearable Computers, pp. 7\u201312 (2021)","DOI":"10.1145\/3460421.3480419"},{"key":"18_CR6","doi-asserted-by":"crossref","unstructured":"Bock, M., Kuehne, H., Van\u00a0Laerhoven, K., Moeller, M.: Wear: an outdoor sports for wearable and egocentric activity recognition. arXiv preprint arXiv:2304.05088 (2023)","DOI":"10.1145\/3699776"},{"key":"18_CR7","doi-asserted-by":"crossref","unstructured":"Brooks, T., Mildenhall, B., Xue, T., Chen, J., Sharlet, D., Barron, J.T.: Unprocessing images for learned raw denoising. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11036\u201311045 (2019)","DOI":"10.1109\/CVPR.2019.01129"},{"key":"18_CR8","doi-asserted-by":"crossref","unstructured":"Chen, R., Chen, J., Liang, Z., Gao, H., Lin, S.: Darklight networks for action recognition in the dark. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 846\u2013852 (2021)","DOI":"10.1109\/CVPRW53098.2021.00094"},{"key":"18_CR9","unstructured":"Chung, J., Gulcehre, C., Cho, K., Bengio, Y.: Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555 (2014)"},{"key":"18_CR10","doi-asserted-by":"crossref","unstructured":"Damen, D., et\u00a0al.: Rescaling egocentric vision: collection, pipeline and challenges for epic-kitchens-100. Int. J. Comput. Vis. 1\u201323 (2022)","DOI":"10.1007\/s11263-021-01531-2"},{"key":"18_CR11","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"18_CR12","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: ICLR (2021)"},{"key":"18_CR13","doi-asserted-by":"crossref","unstructured":"Du, Y., Wang, W., Wang, L.: Hierarchical recurrent neural network for skeleton based action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1110\u20131118 (2015)","DOI":"10.1109\/CVPR.2015.7298714"},{"key":"18_CR14","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: Slowfast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6202\u20136211 (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"key":"18_CR15","first-page":"35946","volume":"35","author":"C Feichtenhofer","year":"2022","unstructured":"Feichtenhofer, C., Li, Y., He, K., et al.: Masked autoencoders as spatiotemporal learners. Adv. Neural. Inf. Process. Syst. 35, 35946\u201335958 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR16","unstructured":"Geng, X., Liu, H., Lee, L., Schuurmans, D., Levine, S., Abbeel, P.: Multimodal masked autoencoders learn transferable representations. arXiv preprint arXiv:2205.14204 (2022)"},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Georgescu, M.I., Fonseca, E., Ionescu, R.T., Lucic, M., Schmid, C., Arnab, A.: Audiovisual masked autoencoders. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 16144\u201316154 (2023)","DOI":"10.1109\/ICCV51070.2023.01479"},{"key":"18_CR18","doi-asserted-by":"crossref","unstructured":"Gong, X., et al.: Mmg-ego4d: multimodal generalization in egocentric action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6481\u20136491 (2023)","DOI":"10.1109\/CVPR52729.2023.00627"},{"key":"18_CR19","unstructured":"Gong, Y., et al.: Contrastive audio-visual masked autoencoder. arXiv preprint arXiv:2210.07839 (2022)"},{"key":"18_CR20","unstructured":"Grauman, K., et\u00a0al.: Ego4d: around the world in 3,000 hours of egocentric video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18995\u201319012 (2022)"},{"key":"18_CR21","unstructured":"Grauman, K., et\u00a0al.: Ego-exo4d: understanding skilled human activity from first-and third-person perspectives. arXiv preprint arXiv:2311.18259 (2023)"},{"key":"18_CR22","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., Girshick, R.: Masked autoencoders are scalable vision learners. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16000\u201316009 (2022)","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"18_CR23","doi-asserted-by":"crossref","unstructured":"Hou, Z., et al.: Graphmae: self-supervised masked graph autoencoders. In: Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp. 594\u2013604 (2022)","DOI":"10.1145\/3534678.3539321"},{"key":"18_CR24","unstructured":"Huang, P.Y., et al.: Masked autoencoders that listen. Adv. Neural. Inf. Process. Syst. 35, 28708\u201328720 (2022)"},{"key":"18_CR25","doi-asserted-by":"publisher","first-page":"7795","DOI":"10.1109\/TIP.2020.3007841","volume":"29","author":"Y Huang","year":"2020","unstructured":"Huang, Y., Cai, M., Li, Z., Lu, F., Sato, Y.: Mutual context network for jointly estimating egocentric gaze and action. IEEE Trans. Image Process. 29, 7795\u20137806 (2020)","journal-title":"IEEE Trans. Image Process."},{"key":"18_CR26","doi-asserted-by":"crossref","unstructured":"Huang, Y., Cai, M., Li, Z., Sato, Y.: Predicting gaze in egocentric video by learning task-dependent attention transition. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 754\u2013769 (2018)","DOI":"10.1007\/978-3-030-01225-0_46"},{"key":"18_CR27","doi-asserted-by":"crossref","unstructured":"Huang, Y., et\u00a0al.: Egoexolearn: a dataset for bridging asynchronous ego-and exo-centric view of procedural activities in real world. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22072\u201322086 (2024)","DOI":"10.1109\/CVPR52733.2024.02084"},{"key":"18_CR28","doi-asserted-by":"crossref","unstructured":"Huang, Y., Sugano, Y., Sato, Y.: Improving action segmentation via graph-based temporal reasoning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14024\u201314034 (2020)","DOI":"10.1109\/CVPR42600.2020.01404"},{"key":"18_CR29","doi-asserted-by":"crossref","unstructured":"Kazakos, E., Nagrani, A., Zisserman, A., Damen, D.: Epic-fusion: audio-visual temporal binding for egocentric action recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5492\u20135501 (2019)","DOI":"10.1109\/ICCV.2019.00559"},{"key":"18_CR30","unstructured":"Kwon, G., Cai, Z., Ravichandran, A., Bas, E., Bhotika, R., Soatto, S.: Masked vision and language modeling for multi-modal representation learning. arXiv preprint arXiv:2208.02131 (2022)"},{"key":"18_CR31","doi-asserted-by":"crossref","unstructured":"Liu, R., Ohkawa, T., Zhang, M., Sato, Y.: Single-to-dual-view adaptation for egocentric 3d hand pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 677\u2013686 (2024)","DOI":"10.1109\/CVPR52733.2024.00071"},{"issue":"1","key":"18_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3380999","volume":"4","author":"S Liu","year":"2020","unstructured":"Liu, S., Yao, S., Li, J., Liu, D., Wang, T., Shao, H., Abdelzaher, T.: Giobalfusion: a global attentional deep learning framework for multisensor information fusion. Proc. ACM Interact. Mobile Wearab. Ubiquit. Technol. 4(1), 1\u201327 (2020)","journal-title":"Proc. ACM Interact. Mobile Wearab. Ubiquit. Technol."},{"key":"18_CR33","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Hoi4d: a 4d egocentric dataset for category-level human-object interaction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21013\u201321022 (2022)","DOI":"10.1109\/CVPR52688.2022.02034"},{"key":"18_CR34","doi-asserted-by":"crossref","unstructured":"Liu, Z., Ning, J., Cao, Y., Wei, Y., Zhang, Z., Lin, S., Hu, H.: Video swin transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3202\u20133211 (2022)","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"18_CR35","doi-asserted-by":"crossref","unstructured":"Liu, Z., Shen, Y., Lakshminarasimhan, V.B., Liang, P.P., Zadeh, A., Morency, L.P.: Efficient low-rank multimodal fusion with modality-specific factors. arXiv preprint arXiv:1806.00064 (2018)","DOI":"10.18653\/v1\/P18-1209"},{"key":"18_CR36","doi-asserted-by":"crossref","unstructured":"Ma, H., Li, W., Zhang, X., Gao, S., Lu, S.: Attnsense: multi-level attention mechanism for multimodal human activity recognition. In: IJCAI, pp. 3109\u20133115 (2019)","DOI":"10.24963\/ijcai.2019\/431"},{"key":"18_CR37","doi-asserted-by":"crossref","unstructured":"Murahari, V.S., Pl\u00f6tz, T.: On attention models for human activity recognition. In: Proceedings of the 2018 ACM International Symposium on Wearable Computers, pp. 100\u2013103 (2018)","DOI":"10.1145\/3267242.3267287"},{"key":"18_CR38","doi-asserted-by":"crossref","unstructured":"Nakamura, K., Ohashi, H., Okada, M.: Sensor-augmented egocentric-video captioning with dynamic modal attention. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 4220\u20134229 (2021)","DOI":"10.1145\/3474085.3475557"},{"key":"18_CR39","doi-asserted-by":"crossref","unstructured":"Ord\u00f3\u00f1ez, F.J., Roggen, D.: Deep convolutional and LSTM recurrent neural networks for multimodal wearable activity recognition. Sensors 16(1), 115 (2016)","DOI":"10.3390\/s16010115"},{"key":"18_CR40","doi-asserted-by":"crossref","unstructured":"Pang, Y., Wang, W., Tay, F.E., Liu, W., Tian, Y., Yuan, L.: Masked autoencoders for point cloud self-supervised learning. In: European Conference on Computer Vision, pp. 604\u2013621. Springer (2022)","DOI":"10.1007\/978-3-031-20086-1_35"},{"key":"18_CR41","doi-asserted-by":"crossref","unstructured":"Poria, S., Chaturvedi, I., Cambria, E., Hussain, A.: Convolutional MKL based multimodal emotion recognition and sentiment analysis. In: 2016 IEEE 16th International Conference on Data Mining (ICDM), pp. 439\u2013448. IEEE (2016)","DOI":"10.1109\/ICDM.2016.0055"},{"key":"18_CR42","doi-asserted-by":"crossref","unstructured":"Sener, F., et al: Assembly101: a large-scale multi-view video dataset for understanding procedural activities. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21096\u201321106 (2022)","DOI":"10.1109\/CVPR52688.2022.02042"},{"key":"18_CR43","unstructured":"Somasundaram, K., et al.: Project aria: a new tool for egocentric multi-modal AI research. arXiv preprint arXiv:2308.13561 (2023)"},{"key":"18_CR44","doi-asserted-by":"crossref","unstructured":"Sudhakaran, S., Escalera, S., Lanz, O.: LSTA: long short-term attention for egocentric action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9954\u20139963 (2019)","DOI":"10.1109\/CVPR.2019.01019"},{"key":"18_CR45","unstructured":"Tang, C.I., Perez-Pozuelo, I., Spathis, D., Mascolo, C.: Exploring contrastive learning in human activity recognition for healthcare. arXiv preprint arXiv:2011.11542 (2020)"},{"key":"18_CR46","unstructured":"Tateno, M., Yagi, T., Furuta, R., Sato, Y.: Learning object states from actions via large language models. arXiv preprint arXiv:2405.01090 (2024)"},{"key":"18_CR47","unstructured":"Tong, Z., Song, Y., Wang, J., Wang, L.: Videomae: masked autoencoders are data-efficient learners for self-supervised video pre-training. Adv. Neural. Inf. Process. Syst. 35, 10078\u201310093 (2022)"},{"key":"18_CR48","unstructured":"De\u00a0la Torre, F.,et al.: Guide to the Carnegie Mellon University Multimodal Activity (CMU-MMAC) Database (2009)"},{"key":"18_CR49","unstructured":"Tsutsui, S., Desai, R., Ridgeway, K.: How you move your head tells what you do: self-supervised video representation learning with egocentric cameras and IMU sensors. arXiv preprint arXiv:2110.01680 (2021)"},{"key":"18_CR50","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"18_CR51","doi-asserted-by":"crossref","unstructured":"Vincent, P., Larochelle, H., Bengio, Y., Manzagol, P.A.: Extracting and composing robust features with denoising autoencoders. In: International Conference on Machine Learning, pp. 1096\u20131103 (2008)","DOI":"10.1145\/1390156.1390294"},{"key":"18_CR52","doi-asserted-by":"crossref","unstructured":"Wang, H., Singh, M.K., Torresani, L.: Ego-only: egocentric action detection without exocentric transferring. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5250\u20135261 (2023)","DOI":"10.1109\/ICCV51070.2023.00484"},{"key":"18_CR53","doi-asserted-by":"crossref","unstructured":"Wang, L., Li, W., Li, W., Van\u00a0Gool, L.: Appearance-and-relation networks for video classification. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1430\u20131439 (2018)","DOI":"10.1109\/CVPR.2018.00155"},{"key":"18_CR54","doi-asserted-by":"crossref","unstructured":"Wei, K., Fu, Y., Yang, J., Huang, H.: A physics-based noise formation model for extreme low-light raw denoising. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2758\u20132767 (2020)","DOI":"10.1109\/CVPR42600.2020.00283"},{"key":"18_CR55","unstructured":"Xiao, F., Lee, Y.J., Grauman, K., Malik, J., Feichtenhofer, C.: Audiovisual slowfast networks for video recognition. arXiv preprint arXiv:2001.08740 (2020)"},{"key":"18_CR56","doi-asserted-by":"crossref","unstructured":"Xu, C., Chai, D., He, J., Zhang, X., Duan, S.: Innohar: a deep neural network for complex human activity recognition. IEEE Access 7, 9893\u20139902 (2019)","DOI":"10.1109\/ACCESS.2018.2890675"},{"key":"18_CR57","doi-asserted-by":"crossref","unstructured":"Xu, H., Zhou, P., Tan, R., Li, M., Shen, G.: Limu-bert: unleashing the potential of unlabeled data for IMU sensing applications. In: Proceedings of the 19th ACM Conference on Embedded Networked Sensor Systems, pp. 220\u2013233 (2021)","DOI":"10.1145\/3485730.3485937"},{"key":"18_CR58","doi-asserted-by":"crossref","unstructured":"Xu, J., et al.: Retrieval-augmented egocentric video captioning. arXiv preprint arXiv:2401.00789 (2024)","DOI":"10.1109\/CVPR52733.2024.01284"},{"key":"18_CR59","unstructured":"Xu, K., Hu, W., Leskovec, J., Jegelka, S.: How powerful are graph neural networks? arXiv preprint arXiv:1810.00826 (2018)"},{"key":"18_CR60","doi-asserted-by":"crossref","unstructured":"Yan, S., Xiong, Y., Lin, D.: Spatial temporal graph convolutional networks for skeleton-based action recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"18_CR61","doi-asserted-by":"crossref","unstructured":"Yang, L., Huang, Y., Sugano, Y., Sato, Y.: Interact before align: leveraging cross-modal knowledge for domain adaptive action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14722\u201314732 (2022)","DOI":"10.1109\/CVPR52688.2022.01431"},{"key":"18_CR62","doi-asserted-by":"crossref","unstructured":"Yao, S., Hu, S., Zhao, Y., Zhang, A., Abdelzaher, T.: Deepsense: a unified deep learning framework for time-series mobile sensing data processing. In: Proceedings of the 26th International Conference on World Wide Web, pp. 351\u2013360 (2017)","DOI":"10.1145\/3038912.3052577"},{"key":"18_CR63","doi-asserted-by":"crossref","unstructured":"Yao, S., et al.: Sadeepsense: self-attention deep learning framework for heterogeneous on-device sensors in internet of things applications. In: IEEE INFOCOM 2019-IEEE Conference on Computer Communications, pp. 1243\u20131251. IEEE (2019)","DOI":"10.1109\/INFOCOM.2019.8737500"},{"key":"18_CR64","unstructured":"Yuan, H., Chan, S., Creagh, A.P., Tong, C., Clifton, D.A., Doherty, A.: Self-supervised learning for human activity recognition using 700,000 person-days of wearable data. arXiv preprint arXiv:2206.02909 (2022)"},{"issue":"12","key":"18_CR65","doi-asserted-by":"publisher","first-page":"9464","DOI":"10.1109\/TPAMI.2021.3130302","volume":"44","author":"M Zhang","year":"2021","unstructured":"Zhang, M., Zheng, Y., Lu, F.: Optical flow in the dark. IEEE Trans. Pattern Anal. Mach. Intell. 44(12), 9464\u20139476 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"18_CR66","doi-asserted-by":"publisher","unstructured":"Zhang, S., et al.: EgoBody: human body shape and\u00a0motion of\u00a0interacting people from\u00a0head-mounted devices. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022, Part VI, pp. 180\u2013200. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20068-7_11","DOI":"10.1007\/978-3-031-20068-7_11"},{"key":"18_CR67","doi-asserted-by":"crossref","unstructured":"Zheng, Y., Zhang, M., Lu, F.: Optical flow in the dark. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6749\u20136757 (2020)","DOI":"10.1109\/CVPR42600.2020.00678"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72649-1_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T21:19:08Z","timestamp":1732828748000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72649-1_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,30]]},"ISBN":["9783031726484","9783031726491"],"references-count":67,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72649-1_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,30]]},"assertion":[{"value":"30 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}