{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:14:21Z","timestamp":1765340061971,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":80,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755404","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:38:54Z","timestamp":1761377934000},"page":"8263-8272","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["EventLip: Enhancing Event-Based Lip Reading via Frequency-Aware Spatiotemporal Hypergraph Modeling"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-2394-3518","authenticated-orcid":false,"given":"Xueyi","family":"Zhang","sequence":"first","affiliation":[{"name":"National University of Defense Technology, changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0218-7829","authenticated-orcid":false,"given":"Jialu","family":"Sun","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Shenzhen, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3375-2458","authenticated-orcid":false,"given":"Chengwei","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of the Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3527-6034","authenticated-orcid":false,"given":"Xianghu","family":"Yue","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3303-1036","authenticated-orcid":false,"given":"Tianfang","family":"Xiao","sequence":"additional","affiliation":[{"name":"Sun Yat-sen University, Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4386-0472","authenticated-orcid":false,"given":"Siqi","family":"Cai","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8413-7220","authenticated-orcid":false,"given":"Mingrui","family":"Lao","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9158-9401","authenticated-orcid":false,"given":"Haizhou","family":"Li","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Shenzhen, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"doi-asserted-by":"publisher","key":"e_1_3_2_1_1_1","DOI":"10.1007\/s11042-024-18237-5"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_2_1","DOI":"10.1109\/TII.2022.3195063"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_3_1","DOI":"10.1109\/CVPRW59228.2023.00431"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_4_1","DOI":"10.1109\/CVPR.2017.502"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_5_1","DOI":"10.1109\/TASE.2020.3045880"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_6_1","DOI":"10.1109\/CVPRW63382.2024.00219"},{"unstructured":"Yongjian Deng Hao Chen Hai Liu and Youfu Li. [n.d.]. A Voxel Graph CNN for Object Classification with Event Cameras-Supplementary Material. ([n.d.]).","key":"e_1_3_2_1_7_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_8_1","DOI":"10.1109\/COMITCon.2019.8862454"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_9_1","DOI":"10.1109\/ICASSP.2010.5495652"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_10_1","DOI":"10.1145\/3539618.3591765"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_11_1","DOI":"10.1109\/ACCESS.2021.3107946"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_12_1","DOI":"10.1016\/j.jbi.2015.09.011"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_13_1","DOI":"10.1109\/TPAMI.2022.3182052"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_14_1","DOI":"10.1109\/ICCV.2019.00573"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_15_1","DOI":"10.1007\/s11263-019-01209-w"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_16_1","DOI":"10.1109\/TSMCB.2008.927269"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_17_1","DOI":"10.1109\/ISCSLP57327.2022.10037993"},{"key":"e_1_3_2_1_18_1","volume-title":"Vision gnn: An image is worth graph of nodes. Advances in neural information processing systems","author":"Han Kai","year":"2022","unstructured":"Kai Han, Yunhe Wang, Jianyuan Guo, Yehui Tang, and Enhua Wu. 2022. Vision gnn: An image is worth graph of nodes. Advances in neural information processing systems, Vol. 35 (2022), 8291-8303."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_19_1","DOI":"10.1109\/ICCV51070.2023.01820"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_20_1","DOI":"10.1145\/1027933.1027972"},{"key":"e_1_3_2_1_21_1","series-title":"Journal of Physics: Conference Series","volume-title":"Dual-flow spatio-temporal separation network for lip reading","author":"Huang An","year":"2028","unstructured":"An Huang and Xueyi Zhang. 2022. Dual-flow spatio-temporal separation network for lip reading. In Journal of Physics: Conference Series, Vol. 2400. IOP Publishing, 012028."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_22_1","DOI":"10.1109\/ICNC57223.2023.10074096"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_23_1","DOI":"10.1109\/TNNLS.2025.3542176"},{"key":"e_1_3_2_1_24_1","volume-title":"EvCSLR: Event-guided Continuous Sign Language Recognition and Benchmark","author":"Jiang Yu","year":"2024","unstructured":"Yu Jiang, Yuehang Wang, Siqi Li, Yongji Zhang, Qianren Guo, Qi Chu, and Yue Gao. 2024. EvCSLR: Event-guided Continuous Sign Language Recognition and Benchmark. IEEE Transactions on Multimedia (2024)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_25_1","DOI":"10.1109\/CVPR46437.2021.00768"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_26_1","DOI":"10.3389\/frai.2022.1070964"},{"key":"e_1_3_2_1_27_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_28_1","DOI":"10.1145\/3726302.3729906"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_29_1","DOI":"10.1109\/TKDE.2022.3193725"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_30_1","DOI":"10.1109\/TCSVT.2025.3558939"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_31_1","DOI":"10.1109\/TPDS.2023.3281598"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_32_1","DOI":"10.1145\/3357384.3357951"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_33_1","DOI":"10.1016\/j.ins.2024.120660"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_34_1","DOI":"10.1109\/ICCV48922.2021.01345"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_35_1","DOI":"10.1109\/TPWRD.2023.3311643"},{"key":"e_1_3_2_1_36_1","volume-title":"Building robust and trustworthy HGNN models: A learnable threshold approach for Node classification. ACM Transactions on Knowledge Discovery from Data","author":"Ma Li","year":"2025","unstructured":"Li Ma, Yongchao Liu, Xiaofeng Gao, Peng Zhang, and Chuntao Hong. 2025. Building robust and trustworthy HGNN models: A learnable threshold approach for Node classification. ACM Transactions on Knowledge Discovery from Data, Vol. 19, 2 (2025), 1-17."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_37_1","DOI":"10.1109\/TIP.2024.3391913"},{"volume-title":"International Conference on Soft Computing in Data Science","author":"Mahboob Khalid","unstructured":"Khalid Mahboob, Hafsa Nizami, Fayyaz Ali, and Farrukh Alvi. 2021. Sentences Prediction Based on Automatic Lip-Reading Detection with Deep Learning Convolutional Neural Networks Using Video-Based Features. In International Conference on Soft Computing in Data Science. Springer, 42-53.","key":"e_1_3_2_1_38_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_39_1","DOI":"10.1007\/978-3-030-58598-3_25"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_40_1","DOI":"10.1007\/s11042-021-11817-9"},{"key":"e_1_3_2_1_41_1","volume-title":"When deep learning deciphers silent video: a survey on automatic deep lip reading. Multimedia Tools and Applications","author":"Oghbaie Marzieh","year":"2025","unstructured":"Marzieh Oghbaie, Arian Sabaghi, Kooshan Hashemifard, and Mohammad Akbari. 2025. When deep learning deciphers silent video: a survey on automatic deep lip reading. Multimedia Tools and Applications (2025), 1-43."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_42_1","DOI":"10.1109\/CVPR46437.2021.00345"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_43_1","DOI":"10.1007\/978-3-031-44067-0_27"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_44_1","DOI":"10.1016\/j.cviu.2023.103738"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_45_1","DOI":"10.1007\/978-3-030-21074-8_35"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_46_1","DOI":"10.1109\/CVPRW56347.2022.00301"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_47_1","DOI":"10.32604\/cmc.2021.016509"},{"key":"e_1_3_2_1_48_1","volume-title":"Deep learning for visual speech analysis: A survey. arXiv","author":"Sheng C","year":"2022","unstructured":"C Sheng, G Kuang, L Bai, C Hou, Y Guo, X Xu, M Pietik\u00e4inen, and L Liu. [n.d.]. Deep learning for visual speech analysis: A survey. arXiv 2022. arXiv preprint arXiv:2205.10839 ([n.d.])."},{"key":"e_1_3_2_1_49_1","volume-title":"International Conference on Learning Representations.","author":"Shim Kyuhong","year":"2021","unstructured":"Kyuhong Shim, Jungwook Choi, and Wonyong Sung. 2021. Understanding the role of self attention for efficient speech recognition. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_50_1","volume-title":"Tackling Event-Based Lip-Reading by Exploring Multigrained Spatiotemporal Clues","author":"Tan Ganchao","year":"2024","unstructured":"Ganchao Tan, Zengyu Wan, Yang Wang, Yang Cao, and Zheng-Jun Zha. 2024. Tackling Event-Based Lip-Reading by Exploring Multigrained Spatiotemporal Clues. IEEE Transactions on Neural Networks and Learning Systems (2024)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_51_1","DOI":"10.1109\/CVPR52688.2022.01946"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_52_1","DOI":"10.1145\/3287069"},{"key":"e_1_3_2_1_53_1","volume-title":"Enhanced local texture feature sets for face recognition under difficult lighting conditions","author":"Tan Xiaoyang","year":"2010","unstructured":"Xiaoyang Tan and Bill Triggs. 2010. Enhanced local texture feature sets for face recognition under difficult lighting conditions. IEEE transactions on image processing, Vol. 19, 6 (2010), 1635-1650."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_54_1","DOI":"10.1007\/s00371-024-03515-y"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_55_1","DOI":"10.1109\/WACV.2019.00199"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_56_1","DOI":"10.1109\/ICME55011.2023.00367"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_57_1","DOI":"10.1007\/978-3-030-01228-1_25"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_58_1","DOI":"10.1109\/CVPR.2019.00652"},{"key":"e_1_3_2_1_59_1","first-page":"3436","article-title":"Event-stream representation for human gaits identification using deep neural networks","volume":"44","author":"Wang Yanxiang","year":"2021","unstructured":"Yanxiang Wang, Xian Zhang, Yiran Shen, Bowen Du, Guangrong Zhao, Lizhen Cui, and Hongkai Wen. 2021b. Event-stream representation for human gaits identification using deep neural networks. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 44, 7 (2021), 3436-3449.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_60_1","DOI":"10.1109\/CVPR46437.2021.01301"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_61_1","DOI":"10.1145\/1027527.1027648"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_62_1","DOI":"10.1109\/TCYB.2022.3220040"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_63_1","DOI":"10.1609\/aaai.v35i12.17244"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_64_1","DOI":"10.1145\/3477495.3532058"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_65_1","DOI":"10.1016\/j.ins.2022.06.010"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_66_1","DOI":"10.1109\/COMST.2023.3323091"},{"key":"e_1_3_2_1_67_1","volume-title":"SiHGNN: Leveraging Properties of Semantic Graphs for Efficient HGNN Acceleration","author":"Xue Runzhen","year":"2025","unstructured":"Runzhen Xue, Mingyu Yan, Dengke Han, Ziheng Xiao, Zhimin Tang, Xiaochun Ye, and Dongrui Fan. 2025. SiHGNN: Leveraging Properties of Semantic Graphs for Efficient HGNN Acceleration. IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems (2025)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_68_1","DOI":"10.1002\/aisy.202400265"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_69_1","DOI":"10.1145\/3664647.3681607"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_70_1","DOI":"10.1109\/TIP.2024.3374074"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_71_1","DOI":"10.1609\/aaai.v39i10.33104"},{"key":"e_1_3_2_1_72_1","first-page":"30727","article-title":"Language without borders: A dataset and benchmark for code-switching lip reading","volume":"37","author":"Zhang Xueyi","year":"2024","unstructured":"Xueyi Zhang, Mingrui Lao, Peng Zhao, Jun Tang, Yanming Guo, Siqi Cai, Xianghu Yue, and Haizhou Li. 2024a. Language without borders: A dataset and benchmark for code-switching lip reading. Advances in Neural Information Processing Systems, Vol. 37 (2024), 30727-30739.","journal-title":"Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_73_1","DOI":"10.1109\/BigDIA53151.2021.9619626"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_74_1","DOI":"10.1109\/ICME52920.2022.9859810"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_75_1","DOI":"10.1145\/3581783.3612460"},{"key":"e_1_3_2_1_76_1","volume-title":"Spectrum-guided Spatial Feature Enhancement Network for event-based lip-reading. Neurocomputing","author":"Zhang Yi","year":"2025","unstructured":"Yi Zhang, Xiuping Liu, Hongchen Tan, and Xin Li. 2025a. Spectrum-guided Spatial Feature Enhancement Network for event-based lip-reading. Neurocomputing (2025), 129974."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_77_1","DOI":"10.1016\/j.neucom.2023.126388"},{"key":"e_1_3_2_1_78_1","volume-title":"Ensemble Deep Learning Models for EEG-Based Auditory Attention Decoding. In 2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP). IEEE, 339-343","author":"Zhao Peng","year":"2024","unstructured":"Peng Zhao, Ruicong Wang, Zijie Lin, Zexu Pan, Haizhou Li, and Xueyi Zhang. 2024a. Ensemble Deep Learning Models for EEG-Based Auditory Attention Decoding. In 2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP). IEEE, 339-343."},{"key":"e_1_3_2_1_79_1","volume-title":"Binary-Temporal Convolutional Neural Network for Multi-Class Auditory Spatial Attention Detection. In 2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP). IEEE, 1-5.","author":"Zhao Peng","year":"2024","unstructured":"Peng Zhao, Ruicong Wang, Xueyi Zhang, Mingrui Lao, and Siqi Cai. 2024b. Binary-Temporal Convolutional Neural Network for Multi-Class Auditory Spatial Attention Detection. In 2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP). IEEE, 1-5."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_80_1","DOI":"10.1109\/ICCV48922.2021.00014"}],"event":{"sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"acronym":"MM '25","name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755404","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:09:39Z","timestamp":1765339779000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755404"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":80,"alternative-id":["10.1145\/3746027.3755404","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755404","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}