{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:20:47Z","timestamp":1765340447421,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62302041"],"award-info":[{"award-number":["62302041"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"China National Postdoctoral Program","award":["BX20230469"],"award-info":[{"award-number":["BX20230469"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754912","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:47:18Z","timestamp":1761374838000},"page":"7490-7499","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Revealing Latent Information: A Physics-inspired Self-supervised Pre-training Framework for Noisy and Sparse Events"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6487-0441","authenticated-orcid":false,"given":"Lin","family":"Zhu","sequence":"first","affiliation":[{"name":"Beijing Institute of Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4697-1910","authenticated-orcid":false,"given":"Ruonan","family":"Liu","sequence":"additional","affiliation":[{"name":"Beijing Institute of Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6117-6745","authenticated-orcid":false,"given":"Xiao","family":"Wang","sequence":"additional","affiliation":[{"name":"Anhui University, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1953-3339","authenticated-orcid":false,"given":"Lizhi","family":"Wang","sequence":"additional","affiliation":[{"name":"Beijing Normal University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2587-1702","authenticated-orcid":false,"given":"Hua","family":"Huang","sequence":"additional","affiliation":[{"name":"Beijing Normal University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00205"},{"key":"e_1_3_2_1_2_1","volume-title":"International Conference on Machine Learning. PMLR, 1298-1312","author":"Baevski Alexei","year":"2022","unstructured":"Alexei Baevski, Wei-Ning Hsu, Qiantong Xu, Arun Babu, Jiatao Gu, and Michael Auli. 2022. Data2vec: A general framework for self-supervised learning in speech, vision and language. In International Conference on Machine Learning. PMLR, 1298-1312."},{"key":"e_1_3_2_1_3_1","volume-title":"Beit: Bert pre-training of image transformers. arXiv preprint arXiv:2106.08254","author":"Bao Hangbo","year":"2021","unstructured":"Hangbo Bao, Li Dong, Songhao Piao, and Furu Wei. 2021. Beit: Bert pre-training of image transformers. arXiv preprint arXiv:2106.08254 (2021)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3023597"},{"key":"e_1_3_2_1_5_1","volume-title":"DDD17: End-to-end DAVIS driving dataset. arXiv preprint arXiv:1711.01458","author":"Binas Jonathan","year":"2017","unstructured":"Jonathan Binas, Daniel Neil, Shih-Chii Liu, and Tobi Delbruck. 2017. DDD17: End-to-end DAVIS driving dataset. arXiv preprint arXiv:1711.01458 (2017)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2014.2342715"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00209"},{"key":"e_1_3_2_1_8_1","volume-title":"Unsupervised learning of visual features by contrasting cluster assignments. Advances in neural information processing systems","author":"Caron Mathilde","year":"2020","unstructured":"Mathilde Caron, Ishan Misra, Julien Mairal, Priya Goyal, Piotr Bojanowski, and Armand Joulin. 2020. Unsupervised learning of visual features by contrasting cluster assignments. Advances in neural information processing systems, Vol. 33 (2020), 9912-9924."},{"key":"e_1_3_2_1_9_1","volume-title":"International conference on machine learning. PMLR, 1597-1607","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020. A simple framework for contrastive learning of visual representations. In International conference on machine learning. PMLR, 1597-1607."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00950"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS.2010.5537149"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00124"},{"key":"e_1_3_2_1_14_1","volume-title":"A Dynamic Graph CNN with Cross-Representation Distillation for Event-Based Recognition. arXiv preprint arXiv:2302.04177","author":"Deng Yongjian","year":"2023","unstructured":"Yongjian Deng, Hao Chen, Bochen Xie, Hai Liu, and Youfu Li. 2023. A Dynamic Graph CNN with Cross-Representation Distillation for Event-Based Recognition. arXiv preprint arXiv:2302.04177 (2023)."},{"key":"e_1_3_2_1_15_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Guillermo Gallego Tobi Delbr\u00fcck Garrick Orchard Chiara Bartolozzi Brian Taba Andrea Censi Stefan Leutenegger Andrew J Davison J\u00f6rg Conradt Kostas Daniilidis et al. 2020. Event-based vision: A survey. IEEE transactions on pattern analysis and machine intelligence Vol. 44 1 (2020) 154-180.","DOI":"10.1109\/TPAMI.2020.3008413"},{"key":"e_1_3_2_1_17_1","volume-title":"Convmae: Masked convolution meets masked autoencoders. arXiv preprint arXiv:2205.03892","author":"Gao Peng","year":"2022","unstructured":"Peng Gao, Teli Ma, Hongsheng Li, Ziyi Lin, Jifeng Dai, and Yu Qiao. 2022. Convmae: Masked convolution meets masked autoencoders. arXiv preprint arXiv:2205.03892 (2022)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00573"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3068942"},{"key":"e_1_3_2_1_20_1","volume-title":"Zhaohan Guo, Mohammad Gheshlaghi Azar, et al.","author":"Grill Jean-Bastien","year":"2020","unstructured":"Jean-Bastien Grill, Florian Strub, Florent Altch\u00e9, Corentin Tallec, Pierre Richemond, Elena Buchatskaya, Carl Doersch, Bernardo Avila Pires, Zhaohan Guo, Mohammad Gheshlaghi Azar, et al., 2020. Bootstrap your own latent-a new approach to self-supervised learning. Advances in neural information processing systems, Vol. 33 (2020), 21271-21284."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02190"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"e_1_3_2_1_24_1","volume-title":"Milan: Masked image pretraining on language assisted representation. arXiv preprint arXiv:2208.06049","author":"Hou Zejiang","year":"2022","unstructured":"Zejiang Hou, Fei Sun, Yen-Kuang Chen, Yuan Xie, and Sun-Yuan Kung. 2022. Milan: Masked image pretraining on language assisted representation. arXiv preprint arXiv:2208.06049 (2022)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00144"},{"key":"e_1_3_2_1_26_1","first-page":"19997","article-title":"Green hierarchical vision transformer for masked image modeling","volume":"35","author":"Huang Lang","year":"2022","unstructured":"Lang Huang, Shan You, Mingkai Zheng, Fei Wang, Chen Qian, and Toshihiko Yamasaki. 2022. Green hierarchical vision transformer for masked image modeling. Advances in Neural Information Processing Systems, Vol. 35 (2022), 19997-20010.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_27_1","volume-title":"Contrastive masked autoencoders are stronger vision learners","author":"Huang Zhicheng","year":"2023","unstructured":"Zhicheng Huang, Xiaojie Jin, Chengze Lu, Qibin Hou, Ming-Ming Cheng, Dongmei Fu, Xiaohui Shen, and Jiashi Feng. 2023. Contrastive masked autoencoders are stronger vision learners. IEEE Transactions on Pattern Analysis and Machine Intelligence (2023)."},{"key":"e_1_3_2_1_28_1","volume-title":"Data-efficient Event Camera Pre-training via Disentangled Masked Modeling. arXiv preprint arXiv:2403.00416","author":"Huang Zhenpeng","year":"2024","unstructured":"Zhenpeng Huang, Chao Li, Hao Chen, Yongjian Deng, Yifeng Geng, and Limin Wang. 2024. Data-efficient Event Camera Pre-training via Disentangled Masked Modeling. arXiv preprint arXiv:2403.00416 (2024)."},{"key":"e_1_3_2_1_29_1","volume-title":"Layer grafted pre-training: Bridging contrastive learning and masked image modeling for label-efficient representations. arXiv preprint arXiv:2302.14138","author":"Jiang Ziyu","year":"2023","unstructured":"Ziyu Jiang, Yinpeng Chen, Mengchen Liu, Dongdong Chen, Xiyang Dai, Lu Yuan, Zicheng Liu, and Zhangyang Wang. 2023. Layer grafted pre-training: Bridging contrastive learning and masked image modeling for label-efficient representations. arXiv preprint arXiv:2302.14138 (2023)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00215"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00237"},{"key":"e_1_3_2_1_32_1","volume-title":"Cifar10-dvs: an event-stream dataset for object classification. Frontiers in neuroscience","author":"Li Hongmin","year":"2017","unstructured":"Hongmin Li, Hanchao Liu, Xiangyang Ji, Guoqi Li, and Luping Shi. 2017. Cifar10-dvs: an event-stream dataset for object classification. Frontiers in neuroscience, Vol. 11 (2017), 309."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00097"},{"key":"e_1_3_2_1_34_1","volume-title":"Es-imagenet: A million event-stream classification dataset for spiking neural networks. Frontiers in neuroscience","author":"Lin Yihan","year":"2021","unstructured":"Yihan Lin, Wei Ding, Shaohua Qiang, Lei Deng, and Guoqi Li. 2021. Es-imagenet: A million event-stream classification dataset for spiking neural networks. Frontiers in neuroscience, Vol. 15 (2021), 726582."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00888"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58598-3_25"},{"key":"e_1_3_2_1_38_1","volume-title":"Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748","author":"van den Oord Aaron","year":"2018","unstructured":"Aaron van den Oord, Yazhe Li, and Oriol Vinyals. 2018. Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)."},{"key":"e_1_3_2_1_39_1","volume-title":"Converting static image datasets to spiking neuromorphic datasets using saccades. Frontiers in neuroscience","author":"Orchard Garrick","year":"2015","unstructured":"Garrick Orchard, Ajinkya Jayawant, Gregory K Cohen, and Nitish Thakor. 2015. Converting static image datasets to spiking neuromorphic datasets using saccades. Frontiers in neuroscience, Vol. 9 (2015), 437."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00555"},{"key":"e_1_3_2_1_41_1","volume-title":"International conference on machine learning. PMLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748-8763."},{"key":"e_1_3_2_1_42_1","volume-title":"International conference on machine learning. Pmlr, 8821-8831","author":"Ramesh Aditya","year":"2021","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-shot text-to-image generation. In International conference on machine learning. Pmlr, 8821-8831."},{"key":"e_1_3_2_1_43_1","volume-title":"Discrete variational autoencoders. arXiv preprint arXiv:1609.02200","author":"Rolfe Jason Tyler","year":"2016","unstructured":"Jason Tyler Rolfe. 2016. Discrete variational autoencoders. arXiv preprint arXiv:1609.02200 (2016)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01205"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00186"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19830-4_20"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00212"},{"key":"e_1_3_2_1_48_1","volume-title":"Videomae: Masked autoencoders are data-efficient learners for self-supervised video pre-training. Advances in neural information processing systems","author":"Tong Zhan","year":"2022","unstructured":"Zhan Tong, Yibing Song, Jue Wang, and Limin Wang. 2022. Videomae: Masked autoencoders are data-efficient learners for self-supervised video pre-training. Advances in neural information processing systems, Vol. 35 (2022), 10078-10093."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3220938"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20056-4_20"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3073016"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01228-1_26"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3140819"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00943"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00982"},{"key":"e_1_3_2_1_56_1","volume-title":"European Conference on Computer Vision. Springer, 292-310","author":"Yang Yan","year":"2024","unstructured":"Yan Yang, Liyuan Pan, and Liu Liu. 2024. Event camera data dense pre-training. In European Conference on Computer Vision. Springer, 292-310."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01006"},{"key":"e_1_3_2_1_58_1","volume-title":"Masked image modeling with denoising contrast. arXiv preprint arXiv:2205.09616","author":"Yi Kun","year":"2022","unstructured":"Kun Yi, Yixiao Ge, Xiaotong Li, Shusheng Yang, Dian Li, Jianping Wu, Ying Shan, and Xiaohu Qie. 2022. Masked image modeling with denoising contrast. arXiv preprint arXiv:2205.09616 (2022)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00044"},{"key":"e_1_3_2_1_60_1","volume-title":"ibot: Image bert pre-training with online tokenizer. arXiv preprint arXiv:2111.07832","author":"Zhou Jinghao","year":"2021","unstructured":"Jinghao Zhou, Chen Wei, Huiyu Wang, Wei Shen, Cihang Xie, Alan Yuille, and Tao Kong. 2021. ibot: Image bert pre-training with online tokenizer. arXiv preprint arXiv:2111.07832 (2021)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754912","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:17:59Z","timestamp":1765340279000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754912"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":60,"alternative-id":["10.1145\/3746027.3754912","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754912","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}