{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T16:50:15Z","timestamp":1778604615790,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754795","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:26:55Z","timestamp":1761377215000},"page":"5326-5335","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Towards Hazardous Activity Recognition for A Novel Real-World Dataset"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-6327-8141","authenticated-orcid":false,"given":"Shehzad","family":"Ali","sequence":"first","affiliation":[{"name":"Department of Applied Artificial Intelligence, Sungkyunkwan University, Seoul, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-9405-5684","authenticated-orcid":false,"given":"Md Tanvir","family":"Islam","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Sungkyunkwan University, Suwon, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0605-7572","authenticated-orcid":false,"given":"Ik Hyun","family":"Lee","sequence":"additional","affiliation":[{"name":"Department of Mechatronics Engineering, Tech University and IKLAB Inc., Seoul, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0487-0356","authenticated-orcid":false,"given":"Mingfu","family":"Xiong","sequence":"additional","affiliation":[{"name":"School of Computer Science and Artificial Intelligence, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3044-8175","authenticated-orcid":false,"given":"Minh-Son","family":"Dao","sequence":"additional","affiliation":[{"name":"Big Data Integration Research Center-NICT, Nukui-kiyamachi, Koganei, Tokyo, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0692-8411","authenticated-orcid":false,"given":"Saeed","family":"Anwar","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Software Engineering, University of Western Australia, Perth, Western Australia, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6107-114X","authenticated-orcid":false,"given":"Sambit","family":"Bakshi","sequence":"additional","affiliation":[{"name":"National Institute of Technology, Rourkela, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5302-1150","authenticated-orcid":false,"given":"Khan","family":"Muhammad","sequence":"additional","affiliation":[{"name":"Department of Applied Artificial Intelligence, Sungkyunkwan University, Seoul, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ESCI56872.2023.10100221"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1186\/s12984-018-0456-x"},{"key":"e_1_3_2_1_3_1","unstructured":"AIHW. 2024. Autralian Institute of Health and Welfare. https:\/\/www.aihw.gov.au\/news-media\/media-releases\/2024\/april\/injuries-in-children-and-adolescents-a-significant-contributor-to-hospitalisations-in-australia\/. Accessed: 2024-08-12."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.5220\/0010839200003124"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/EICT54103.2021.9733601"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the International Conference on Machine Learning (ICML).","author":"Bertasius Gedas","year":"2021","unstructured":"Gedas Bertasius, Heng Wang, and Lorenzo Torresani. 2021. Is Space-Time Attention All You Need for Video Understanding?. In Proceedings of the International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_8_1","volume-title":"A spatio-temporal attention-based model for infant movement assessment from videos","author":"Binh Nguyen-Thai","year":"2021","unstructured":"Nguyen-Thai Binh, Vuong Le, Catherine Morgan, Nadia Badawi, Truyen Tran, and Svetha Venkatesh. 2021. A spatio-temporal attention-based model for infant movement assessment from videos. IEEE journal of biomedical and health informatics, Vol. 25, 10 (2021), 3911-3920."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-022-09521-1"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/AIIoT52608.2021.9454221"},{"key":"e_1_3_2_1_13_1","volume-title":"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. ICLR","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. ICLR (2021)."},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. 21-30","author":"Elaheh Hatamimajoumerd","year":"2024","unstructured":"Hatamimajoumerd Elaheh, Pooria Daneshvar Kakhaki, Xiaofei Huang, Lingfei Luan, Somaieh Amraee, and Sarah Ostadabbas. 2024. Challenges in video-based infant action recognition: A critical examination of the state of the art. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. 21-30."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00630"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458723"},{"key":"e_1_3_2_1_18_1","volume-title":"Benchmarking Micro-action Recognition: Dataset, Method, and Application","author":"Guo Dan","year":"2024","unstructured":"Dan Guo, Kun Li, Bin Hu, Yan Zhang, and Meng Wang. 2024. Benchmarking Micro-action Recognition: Dataset, Method, and Application. IEEE Transactions on Circuits and Systems for Video Technology (2024)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Kai Han Yunhe Wang Hanting Chen Xinghao Chen Jianyuan Guo Zhenhua Liu Yehui Tang An Xiao Chunjing Xu Yixing Xu et al. 2022. A survey on vision transformer. IEEE transactions on pattern analysis and machine intelligence Vol. 45 1 (2022) 87-110.","DOI":"10.1109\/TPAMI.2022.3152247"},{"key":"e_1_3_2_1_20_1","volume-title":"The global landscape of AI ethics guidelines. Nature machine intelligence","author":"Jobin Anna","year":"2019","unstructured":"Anna Jobin, Marcello Ienca, and Effy Vayena. 2019. The global landscape of AI ethics guidelines. Nature machine intelligence, Vol. 1, 9 (2019), 389-399."},{"key":"e_1_3_2_1_21_1","unstructured":"Glenn Jocher Ayush Chaurasia and Jing Qiu. 2023. Ultralytics YOLOv8. https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"e_1_3_2_1_22_1","unstructured":"Glenn Jocher and Jing Qiu. 2024. Ultralytics YOLO11. https:\/\/github.com\/ultralytics\/ultralytics. Accessed 01-01-2025."},{"key":"e_1_3_2_1_23_1","unstructured":"Will Kay Joao Carreira Karen Simonyan Brian Zhang Chloe Hillier Sudheendra Vijayanarasimhan Fabio Viola Tim Green Trevor Back Paul Natsev et al. 2017. The kinetics human action video dataset. arXiv preprint arXiv:1705.06950 (2017)."},{"key":"e_1_3_2_1_24_1","volume-title":"Leveraging Temporal Contextualization for Video Action Recognition. European Conference on Computer Vision (ECCV)","author":"Kim Minji","year":"2024","unstructured":"Minji Kim, Dongyoon Han, Taekyung Kim, and Bohyung Han. 2024. Leveraging Temporal Contextualization for Video Action Recognition. European Conference on Computer Vision (ECCV) (2024)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1186\/s12984-020-0647-0"},{"key":"e_1_3_2_1_26_1","unstructured":"Kunchang Li Xinhao Li Yi Wang Yinan He Yali Wang Limin Wang and Yu Qiao. 2024. VideoMamba: State Space Model for Efficient Video Understanding. arXiv:2403.06977 [cs.CV]"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3282631"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00546"},{"key":"e_1_3_2_1_30_1","volume-title":"International Conference on Learning Representations.","author":"Mehta Sachin","year":"2022","unstructured":"Sachin Mehta and Mohammad Rastegari. 2022. MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TENCON55691.2022.9977799"},{"key":"e_1_3_2_1_32_1","volume-title":"Tom Yan, Lisa Brown, Quanfu Fan, Dan Gutfreund, Carl Vondrick, et al.","author":"Monfort Mathew","year":"2019","unstructured":"Mathew Monfort, Alex Andonian, Bolei Zhou, Kandan Ramakrishnan, Sarah Adel Bargal, Tom Yan, Lisa Brown, Quanfu Fan, Dan Gutfreund, Carl Vondrick, et al., 2019. Moments in time dataset: one million videos for event understanding. IEEE transactions on pattern analysis and machine intelligence, Vol. 42, 2 (2019), 502-508."},{"key":"e_1_3_2_1_33_1","volume-title":"Min Je Kim, and Sung Wook Baik","author":"Munsif Muhammad","year":"2024","unstructured":"Muhammad Munsif, Noman Khan, Altaf Hussain, Min Je Kim, and Sung Wook Baik. 2024. Darkness-adaptive action recognition: Leveraging efficient tubelet slow-fast network for industrial applications. IEEE Transactions on Industrial Informatics (2024)."},{"key":"e_1_3_2_1_34_1","unstructured":"FE Olalere. 2021. Video-based Activity Recognition for Child Behaviour Understanding. Master's thesis."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00051"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.590"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.438"},{"key":"e_1_3_2_1_38_1","unstructured":"Gabriele Rob. 2024. National Electronic Injury Surveillance System. https:\/\/www.safehome.org\/family-safety\/home-childproofing-report\/. Accessed: 2014-08-12."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData47090.2019.9005997"},{"key":"e_1_3_2_1_40_1","unstructured":"SMCH. 2024. Furniture Falls Hurt Kids. Stanford Medicine Children's Health. https:\/\/www.stanfordchildrens.org\/en\/topic\/default?id=furniture-falls-hurt-kids-197-26315. Accessed: 2024-08-12."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3183112"},{"key":"e_1_3_2_1_42_1","volume-title":"Child Activity Recognition using Deep Learning. IJEAT","author":"Suthar Binjal","year":"2020","unstructured":"Binjal Suthar and Bijal Gadhiya. 2020. Child Activity Recognition using Deep Learning. IJEAT (2020)."},{"key":"e_1_3_2_1_43_1","volume-title":"Enhancing Child Safety: Computer Vision-Based Accident Detection for Infants and Toddlers. In 2024 3rd International Conference on Digital Transformation and Applications (ICDXA). IEEE, 1-5.","author":"Tan Jia He","year":"2024","unstructured":"Jia He Tan and Ching Pang Goh. 2024. Enhancing Child Safety: Computer Vision-Based Accident Detection for Infants and Toddlers. In 2024 3rd International Conference on Digital Transformation and Applications (ICDXA). IEEE, 1-5."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1136\/ip-2023-045014"},{"key":"e_1_3_2_1_45_1","volume-title":"Videomae: Masked autoencoders are data-efficient learners for self-supervised video pre-training. Advances in neural information processing systems","author":"Tong Zhan","year":"2022","unstructured":"Zhan Tong, Yibing Song, Jue Wang, and Limin Wang. 2022. Videomae: Masked autoencoders are data-efficient learners for self-supervised video pre-training. Advances in neural information processing systems, Vol. 35 (2022), 10078-10093."},{"key":"e_1_3_2_1_46_1","unstructured":"Ultralytics. 2021. YOLOv5: A state-of-the-art real-time object detection system. https:\/\/docs.ultralytics.com."},{"key":"e_1_3_2_1_47_1","volume-title":"YOLOv10: Real-Time End-to-End Object Detection. arXiv preprint arXiv:2405.14458","author":"Wang Ao","year":"2024","unstructured":"Ao Wang, Hui Chen, Lihao Liu, Kai Chen, Zijia Lin, Jungong Han, and Guiguang Ding. 2024a. YOLOv10: Real-Time End-to-End Object Detection. arXiv preprint arXiv:2405.14458 (2024)."},{"key":"e_1_3_2_1_48_1","volume-title":"YOLOv9: Learning what you want to learn using programmable gradient information. arXiv","author":"Wang CY","year":"2024","unstructured":"CY Wang, IH Yeh, and HYM Liao. 2024b. YOLOv9: Learning what you want to learn using programmable gradient information. arXiv 2024. arXiv preprint arXiv:2402.13616 (2024)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"e_1_3_2_1_50_1","volume-title":"Taylor Videos for Action Recognition. In Forty-first International Conference on Machine Learning (ICML).","author":"Wang Lei","year":"2024","unstructured":"Lei Wang, Xiuyuan Yuan, Tom Gedeon, and Liang Zheng. 2024c. Taylor Videos for Action Recognition. In Forty-first International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.heliyon.2023.e16763"},{"key":"e_1_3_2_1_52_1","unstructured":"WHO. 2024. Preventing Child Injuries. https:\/\/www.who.int\/europe\/activities\/preventing-child-injuries. Accessed: 2024-08-14."},{"key":"e_1_3_2_1_53_1","volume-title":"International Conference on Learning Representations.","author":"Zhang Hongyi","year":"2018","unstructured":"Hongyi Zhang, Moustapha Cisse, Yann N Dauphin, and David Lopez-Paz. 2018. mixup: Beyond Empirical Risk Minimization. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/EMBC48229.2022.9871230"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3379885"},{"key":"e_1_3_2_1_56_1","volume-title":"Spikformer: When Spiking Neural Network Meets Transformer. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=frE4fUwz_h","author":"Zhou Zhaokun","year":"2023","unstructured":"Zhaokun Zhou, Yuesheng Zhu, Chao He, Yaowei Wang, Shuicheng YAN, Yonghong Tian, and Li Yuan. 2023. Spikformer: When Spiking Neural Network Meets Transformer. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=frE4fUwz_h"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754795","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T05:05:21Z","timestamp":1765343121000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754795"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":56,"alternative-id":["10.1145\/3746027.3754795","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754795","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}