{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T12:53:01Z","timestamp":1780923181284,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":88,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"the National Key RD Program","award":["2022YFB4701400"],"award-info":[{"award-number":["2022YFB4701400"]}]},{"name":"SFB 1574 Circular Factory for the Perpetual Product","award":["471687386"],"award-info":[{"award-number":["471687386"]}]},{"name":"SmartAge funded by the Carl Zeiss Stiftung","award":["P2019-01-003; 2021-2026"],"award-info":[{"award-number":["P2019-01-003; 2021-2026"]}]},{"name":"the state of BadenW\u00fcrttemberg through bwHPC and the German Research Foundation (DFG)","award":["INST 35\/1597-1 FUGG"],"award-info":[{"award-number":["INST 35\/1597-1 FUGG"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3680630","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:27Z","timestamp":1729925967000},"page":"4495-4504","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Towards Video-based Activated Muscle Group Estimation in the Wild"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5419-9292","authenticated-orcid":false,"given":"Kunyu","family":"Peng","sequence":"first","affiliation":[{"name":"Karlsruhe Institute of Technology, Karlsruhe, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3272-2337","authenticated-orcid":false,"given":"David","family":"Schneider","sequence":"additional","affiliation":[{"name":"Karlsruhe Institute of Technology, Karlsruhe, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4724-9164","authenticated-orcid":false,"given":"Alina","family":"Roitberg","sequence":"additional","affiliation":[{"name":"Stuttgart University, Stuttgart, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1090-667X","authenticated-orcid":false,"given":"Kailun","family":"Yang","sequence":"additional","affiliation":[{"name":"Hunan University, Changsha, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3471-328X","authenticated-orcid":false,"given":"Jiaming","family":"Zhang","sequence":"additional","affiliation":[{"name":"Karlsruhe Institute of Technology, Karlsruhe, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0541-2069","authenticated-orcid":false,"given":"Chen","family":"Deng","sequence":"additional","affiliation":[{"name":"Beijing Sport University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4187-341X","authenticated-orcid":false,"given":"Kaiyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Beijing Sport University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1271-0005","authenticated-orcid":false,"given":"M. Saquib","family":"Sarfraz","sequence":"additional","affiliation":[{"name":"Mercedes-Benz Tech Innovation, Stuttgart, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8046-4945","authenticated-orcid":false,"given":"Rainer","family":"Stiefelhagen","sequence":"additional","affiliation":[{"name":"Karlsruhe Institute of Technology, Karlsruhe, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Graph convolutional neural network for human action recognition: A comprehensive survey. TAI","author":"Ahmad Tasweer","year":"2021","unstructured":"Tasweer Ahmad, Lianwen Jin, Xin Zhang, Songxuan Lai, Guozhi Tang, and Luojun Lin. 2021. Graph convolutional neural network for human action recognition: A comprehensive survey. TAI (2021)."},{"key":"e_1_3_2_2_2_1","unstructured":"Jean-Baptiste Alayrac et al. 2020. Self-supervised multimodal versatile networks. In NeurIPS."},{"key":"e_1_3_2_2_3_1","volume-title":"A muscle synergy-inspired control design to coordinate functional electrical stimulation and a powered exoskeleton: Artificial generation of synergies to reduce input dimensionality. CSM","author":"Alibeji Naji A","year":"2018","unstructured":"Naji A Alibeji, Vahidreza Molazadeh, Frank Moore-Clingenpeel, and Nitin Sharma. 2018. A muscle synergy-inspired control design to coordinate functional electrical stimulation and a powered exoskeleton: Artificial generation of synergies to reduce input dimensionality. CSM (2018)."},{"key":"e_1_3_2_2_4_1","volume-title":"Font Size and Viewing Distance of Handheld Smart Phones. Optometry and Vision Science","author":"Bababekova Yuliya","year":"2011","unstructured":"Yuliya Bababekova, Mark Rosenfield, Jennifer E Hue, and Rae R Huang. 2011. Font Size and Viewing Distance of Handheld Smart Phones. Optometry and Vision Science (2011)."},{"key":"e_1_3_2_2_5_1","volume-title":"Rehabilitation exercises for dysfunction of the scapula: exploration of muscle activity using fine-wire EMG. The American Journal of Sports Medicine","author":"Berckmans Kelly R","year":"2021","unstructured":"Kelly R Berckmans, Birgit Castelein, Dorien Borms, Thierry Parlevliet, and Ann Cools. 2021. Rehabilitation exercises for dysfunction of the scapula: exploration of muscle activity using fine-wire EMG. The American Journal of Sports Medicine (2021)."},{"key":"e_1_3_2_2_6_1","unstructured":"Lars Buitinck et al. 2013. API design for machine learning software: experiences from the scikit-learn project. In ECMLW."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"crossref","unstructured":"Fabian Caba Heilbron Victor Escorcia Bernard Ghanem and Juan Carlos Niebles. 2015. ActivityNet: A large-scale video benchmark for human activity understanding. In CVPR.","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"crossref","unstructured":"Joao Carreira and Andrew Zisserman. 2017. Quo vadis action recognition? A new model and the kinetics dataset. In CVPR.","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_2_9_1","unstructured":"Pengguang Chen Shu Liu Hengshuang Zhao and Jiaya Jia. 2021. Distilling knowledge via knowledge review. In CVPR."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"crossref","unstructured":"Yuxin Chen Ziqi Zhang Chunfeng Yuan Bing Li Ying Deng and Weiming Hu. 2021. Channel-wise Topology Refinement Graph Convolution for Skeleton-Based Action Recognition. In ICCV.","DOI":"10.1109\/ICCV48922.2021.01311"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"crossref","unstructured":"Zhao-Min Chen Xiu-Shen Wei Peng Wang and Yanwen Guo. 2019. Multi-label image recognition with graph convolutional networks. In CVPR.","DOI":"10.1109\/CVPR.2019.00532"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"crossref","unstructured":"Mia Chiquier and Carl Vondrick. 2023. Muscles in Action. In ICCV.","DOI":"10.1109\/ICCV51070.2023.02019"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"crossref","unstructured":"Vasileios Choutas Philippe Weinzaepfel J\u00e9r\u00f4me Revaud and Cordelia Schmid. 2018. PoTion: Pose motion representation for action recognition. In CVPR.","DOI":"10.1109\/CVPR.2018.00734"},{"key":"e_1_3_2_2_14_1","volume-title":"The effectivity of a passive arm support exoskeleton in reducing muscle activation and perceived exertion during plastering activities. Ergonomics","author":"de Vries Aijse Willem","year":"2021","unstructured":"Aijse Willem de Vries, Frank Krause, and Michiel Pieter de Looze. 2021. The effectivity of a passive arm support exoskeleton in reducing muscle activation and perceived exertion during plastering activities. Ergonomics (2021)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"crossref","unstructured":"Jia Deng Wei Dong Richard Socher Li-Jia Li Kai Li and Li Fei-Fei. 2009. ImageNet: A large-scale hierarchical image database. In CVPR.","DOI":"10.1109\/CVPRW.2009.5206848"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"crossref","unstructured":"Jeffrey Donahue et al. 2015. Long-term recurrent convolutional networks for visual recognition and description. In CVPR.","DOI":"10.21236\/ADA623249"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"crossref","unstructured":"Haodong Duan Yue Zhao Kai Chen Dahua Lin and Bo Dai. 2022. Revisiting skeleton-based action recognition. In CVPR.","DOI":"10.1109\/CVPR52688.2022.00298"},{"key":"e_1_3_2_2_18_1","volume-title":"Proc","author":"Estember Rene D","unstructured":"Rene D Estember and Chih-Jung Huang. 2019. Essential occupational risk and health interventions for Taiwan's bus drivers. In Proc. ICIEA."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"crossref","unstructured":"Haoqi Fan et al. 2021. Multiscale vision transformers. In ICCV.","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"crossref","unstructured":"Hao-Shu Fang et al. 2023. AlphaPose: Whole-body Regional Multi-Person Pose Estimation and Tracking in Real-Time. TPAMI (2023).","DOI":"10.1109\/TPAMI.2022.3222784"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"crossref","unstructured":"Zhiyuan Fang Jianfeng Wang Xiaowei Hu Lijuan Wang Yezhou Yang and Zicheng Liu. 2021. Compressing visual-linguistic model via knowledge distillation. In ICCV.","DOI":"10.1109\/ICCV48922.2021.00146"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"crossref","unstructured":"Christoph Feichtenhofer Haoqi Fan Jitendra Malik and Kaiming He. 2019. SlowFast networks for video recognition. In ICCV.","DOI":"10.1109\/ICCV.2019.00630"},{"key":"e_1_3_2_2_23_1","volume-title":"Wildes","author":"Feichtenhofer Christoph","year":"2017","unstructured":"Christoph Feichtenhofer, Axel Pinz, and Richard P. Wildes. 2017. Spatiotemporal multiplier networks for video action recognition. In CVPR."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"crossref","unstructured":"Christoph Feichtenhofer Axel Pinz and Andrew Zisserman. 2016. Convolutional two-stream network fusion for video action recognition. In CVPR.","DOI":"10.1109\/CVPR.2016.213"},{"key":"e_1_3_2_2_25_1","volume-title":"Hand gesture recognition using multimodal data fusion and multiscale parallel convolutional neural network for human--robot interaction. Expert Systems","author":"Gao Qing","year":"2021","unstructured":"Qing Gao, Jinguo Liu, and Zhaojie Ju. 2021. Hand gesture recognition using multimodal data fusion and multiscale parallel convolutional neural network for human--robot interaction. Expert Systems (2021)."},{"key":"e_1_3_2_2_26_1","volume-title":"Knowledge distillation: A survey. IJCV","author":"Gou Jianping","year":"2021","unstructured":"Jianping Gou, Baosheng Yu, Stephen J. Maybank, and Dacheng Tao. 2021. Knowledge distillation: A survey. IJCV (2021)."},{"key":"e_1_3_2_2_27_1","volume-title":"Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531","author":"Hinton Geoffrey E.","year":"2015","unstructured":"Geoffrey E. Hinton, Oriol Vinyals, and Jeffrey Dean. 2015. Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)."},{"key":"e_1_3_2_2_28_1","volume-title":"Privacy-Preserving Human Activity Recognition System for Assisted Living Environments. TAI","author":"Jain Ankit","year":"2023","unstructured":"Ankit Jain, Rajendra Akerkar, and Abhishek Srivastava. 2023. Privacy-Preserving Human Activity Recognition System for Assisted Living Environments. TAI (2023)."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"crossref","unstructured":"Xiao Jin et al. 2019. Knowledge distillation via route constrained optimization. In ICCV.","DOI":"10.1109\/ICCV.2019.00143"},{"key":"e_1_3_2_2_30_1","volume-title":"Wildes","author":"Kang Soo-Min","year":"2016","unstructured":"Soo-Min Kang and Richard P. Wildes. 2016. Review of action recognition and detection methods. arXiv preprint arXiv:1610.06906 (2016)."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"crossref","unstructured":"Evangelos Kazakos Arsha Nagrani Andrew Zisserman and Dima Damen. 2019. EPIC-Fusion: Audio-visual temporal binding for egocentric action recognition. In ICCV.","DOI":"10.1109\/ICCV.2019.00559"},{"key":"e_1_3_2_2_32_1","volume-title":"HMDB: A large video database for human motion recognition. In ICCV.","author":"Kuehne Hildegard","year":"2011","unstructured":"Hildegard Kuehne, Hueihan Jhuang, Est\u00edbaliz Garrote, Tomaso Poggio, and Thomas Serre. 2011. HMDB: A large video database for human motion recognition. In ICCV."},{"key":"e_1_3_2_2_33_1","volume-title":"Proc. EMBAC.","author":"Kuiken Todd A","year":"2006","unstructured":"Todd A Kuiken, Laura A Miller, Robert D Lipschutz, Kathy A Stubblefield, and Gregory A Dumanian. 2006. Prosthetic command signals following targeted hyper-reinnervation nerve transfer surgery. In Proc. EMBAC."},{"key":"e_1_3_2_2_34_1","volume-title":"Hierarchically Decomposed Graph Convolutional Networks for Skeleton-Based Action Recognition. arXiv preprint arXiv:2208.10741","author":"Lee Jungho","year":"2022","unstructured":"Jungho Lee, Minhyeok Lee, Dogyoon Lee, and Sangyoon Lee. 2022. Hierarchically Decomposed Graph Convolutional Networks for Skeleton-Based Action Recognition. arXiv preprint arXiv:2208.10741 (2022)."},{"key":"e_1_3_2_2_35_1","volume-title":"FitHuBERT: Going Thinner and Deeper for Knowledge Distillation of Speech Self-Supervised Learning. arXiv preprint arXiv:2207.00555","author":"Lee Yeonghyeon","year":"2022","unstructured":"Yeonghyeon Lee, Kangwook Jang, Jahyun Goo, Youngmoon Jung, and Hoirin Kim. 2022. FitHuBERT: Going Thinner and Deeper for Knowledge Distillation of Speech Self-Supervised Learning. arXiv preprint arXiv:2207.00555 (2022)."},{"key":"e_1_3_2_2_36_1","volume-title":"Alexander Vostrikov, and Andrew Zisserman.","author":"Li Ang","year":"2020","unstructured":"Ang Li, Meghana Thotakuri, David A. Ross, Jo ao Carreira, Alexander Vostrikov, and Andrew Zisserman. 2020. The AVA-Kinetics localized human actions video dataset. arXiv preprint arXiv:2005.00214 (2020)."},{"key":"e_1_3_2_2_37_1","unstructured":"Yanghao Li et al. 2022. MViTv2: Improved Multiscale Vision Transformers for Classification and Detection. In CVPR."},{"key":"e_1_3_2_2_38_1","volume":"201","author":"Liang Ting","unstructured":"Ting Liang and Yong J Yuan. 2016. Wearable medical monitoring systems based on wireless networks: A review. IEEE Sensors Journal (2016).","journal-title":"Yong J Yuan."},{"key":"e_1_3_2_2_39_1","unstructured":"Ruiping Liu et al. 2022. TransKD: Transformer Knowledge Distillation for Efficient Semantic Segmentation. arXiv preprint arXiv:2202.13393 (2022)."},{"key":"e_1_3_2_2_40_1","volume-title":"Query2Label: A simple transformer way to multi-label classification. arXiv preprint arXiv:2107.10834","author":"Liu Shilong","year":"2021","unstructured":"Shilong Liu, Lei Zhang, Xiao Yang, Hang Su, and Jun Zhu. 2021. Query2Label: A simple transformer way to multi-label classification. arXiv preprint arXiv:2107.10834 (2021)."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"crossref","unstructured":"Ze Liu et al. 2022. Swin transformer V2: Scaling up capacity and resolution. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"crossref","unstructured":"Ze Liu Jia Ning Yue Cao Yixuan Wei Zheng Zhang Stephen Lin and Han Hu. 2022. Video swin transformer. In CVPR.","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"e_1_3_2_2_43_1","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled weight decay regularization. In ICLR."},{"key":"e_1_3_2_2_44_1","volume-title":"Effects of exercise on muscle fitness in dialysis patients: A systematic review and meta-analysis. American Journal of Nephrology","author":"Lu Yue","year":"2019","unstructured":"Yue Lu, Yujie Wang, and Qian Lu. 2019. Effects of exercise on muscle fitness in dialysis patients: A systematic review and meta-analysis. American Journal of Nephrology (2019)."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"crossref","unstructured":"Alessio Monti Angelo Porrello Simone Calderara Pasquale Coscia Lamberto Ballan and Rita Cucchiara. 2022. How many Observations are Enough? Knowledge Distillation for Trajectory Forecasting. In CVPR.","DOI":"10.1109\/CVPR52688.2022.00644"},{"key":"e_1_3_2_2_46_1","unstructured":"Arsha Nagrani Shan Yang Anurag Arnab Aren Jansen Cordelia Schmid and Chen Sun. 2021. Attention bottlenecks for multimodal fusion. In NeurIPS."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"crossref","unstructured":"Rameswar Panda et al. 2021. AdaMML: Adaptive multi-modal learning for efficient video recognition. In ICCV.","DOI":"10.1109\/ICCV48922.2021.00748"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"crossref","unstructured":"Paritosh Parmar and Brendan Tran Morris. 2019. What and how well you performed? A multitask learning approach to action quality assessment. In CVPR.","DOI":"10.1109\/CVPR.2019.00039"},{"key":"e_1_3_2_2_49_1","volume-title":"Ruth Fong, Jo ao F. Henriques, Geoffrey Zweig, and Andrea Vedaldi.","author":"Patrick Mandela","year":"2020","unstructured":"Mandela Patrick, Yuki Markus Asano, Ruth Fong, Jo ao F. Henriques, Geoffrey Zweig, and Andrea Vedaldi. 2020. Multi-modal self-supervision from generalized data transformations. arXiv preprint arXiv:2003.04298 (2020)."},{"key":"e_1_3_2_2_50_1","unstructured":"Kunyu Peng Jia Fu Kailun Yang Di Wen Yufan Chen Ruiping Liu Junwei Zheng Jiaming Zhang M Saquib Sarfraz Rainer Stiefelhagen et al. 2024. Referring Atomic Video Action Recognition. arXiv preprint arXiv:2407.01872 (2024)."},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"crossref","unstructured":"Kunyu Peng Alina Roitberg Kailun Yang Jiaming Zhang and Rainer Stiefelhagen. 2022. Should I take a walk? Estimating Energy Expenditure from Video Data. In CVPRW.","DOI":"10.1109\/CVPRW56347.2022.00225"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"crossref","unstructured":"Kunyu Peng Alina Roitberg Kailun Yang Jiaming Zhang and Rainer Stiefelhagen. 2022. TransDARC: Transformer-based Driver Activity Recognition with Latent Space Feature Calibration. In IROS.","DOI":"10.1109\/IROS47612.2022.9981445"},{"key":"e_1_3_2_2_53_1","volume-title":"Delving Deep into One-Shot Skeleton-based Action Recognition with Diverse Occlusions. TMM","author":"Peng Kunyu","year":"2023","unstructured":"Kunyu Peng, Alina Roitberg, Kailun Yang, Jiaming Zhang, and Rainer Stiefelhagen. 2023. Delving Deep into One-Shot Skeleton-based Action Recognition with Diverse Occlusions. TMM (2023)."},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"crossref","unstructured":"Kunyu Peng Cheng Yin Junwei Zheng Ruiping Liu David Schneider Jiaming Zhang Kailun Yang M Saquib Sarfraz Rainer Stiefelhagen and Alina Roitberg. 2024. Navigating open set scenarios for skeleton-based action recognition. In AAAI.","DOI":"10.1609\/aaai.v38i5.28247"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-021-11058-w"},{"key":"e_1_3_2_2_56_1","volume-title":"Ryoo","author":"Piergiovanni A. J.","year":"2020","unstructured":"A. J. Piergiovanni, Anelia Angelova, and Michael S. Ryoo. 2020. Evolving losses for unsupervised video representation learning. In CVPR."},{"key":"e_1_3_2_2_57_1","unstructured":"Firstyani Imannisa Rahma Rizki Mawan Harianto Harianto and Kusrini. 2020. Nutrition and Lifestyle Recommendations for Patients Recovering from Covid-19 in Nusa Tenggara Barat Province. In ICORIS."},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"crossref","unstructured":"Tal Ridnik et al. 2021. Asymmetric loss for multi-label classification. In ICCV.","DOI":"10.1109\/ICCV48922.2021.00015"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"crossref","unstructured":"Alina Roitberg Kunyu Peng Zdravko Marinov Constantin Seibold David Schneider and Rainer Stiefelhagen. 2022. A Comparative Analysis of Decision-Level Fusion for Multimodal Driver Behaviour Understanding. In IV.","DOI":"10.1109\/IV51971.2022.9827426"},{"key":"e_1_3_2_2_60_1","volume-title":"Hiera: A hierarchical vision transformer without the bells-and-whistles. In ICML.","author":"Ryali Chaitanya","year":"2023","unstructured":"Chaitanya Ryali, Yuan-Ting Hu, Daniel Bolya, Chen Wei, Haoqi Fan, Po-Yao Huang, Vaibhav Aggarwal, Arkabandhu Chowdhury, Omid Poursaeed, Judy Hoffman, et al. 2023. Hiera: A hierarchical vision transformer without the bells-and-whistles. In ICML."},{"key":"e_1_3_2_2_61_1","volume-title":"Prehabilitation in thoracic surgery. Journal of Thoracic Disease","author":"Sanchez-Lorente David","year":"2018","unstructured":"David Sanchez-Lorente, Ricard Navarro-Ripoll, Rudith Guzman, Jorge Moises, Elena Gimeno, Marc Boada, and Laureano Molins. 2018. Prehabilitation in thoracic surgery. Journal of Thoracic Disease (2018)."},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"crossref","unstructured":"David Schneider Saquib Sarfraz Alina Roitberg and Rainer Stiefelhagen. 2022. Pose-Based Contrastive Learning for Domain Agnostic Activity Representations. In CVPRW.","DOI":"10.1109\/CVPRW56347.2022.00387"},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"crossref","unstructured":"Ramprasaath R Selvaraju Michael Cogswell Abhishek Das Ramakrishna Vedantam Devi Parikh and Dhruv Batra. 2017. Grad-CAM: Visual explanations from deep networks via gradient-based localization. In ICCV.","DOI":"10.1109\/ICCV.2017.74"},{"key":"e_1_3_2_2_64_1","doi-asserted-by":"crossref","unstructured":"Suranga Seneviratne et al. 2017. A survey of wearable devices and challenges. IEEE Communications Surveys & Tutorials (2017).","DOI":"10.1109\/COMST.2017.2731979"},{"key":"e_1_3_2_2_65_1","doi-asserted-by":"crossref","unstructured":"Dian Shao Yue Zhao Bo Dai and Dahua Lin. 2020. FineGym: A hierarchical video dataset for fine-grained action understanding. In CVPR.","DOI":"10.1109\/CVPR42600.2020.00269"},{"key":"e_1_3_2_2_66_1","volume-title":"Skeleton-based action recognition with multi-stream adaptive graph convolutional networks. TIP","author":"Shi Lei","year":"2020","unstructured":"Lei Shi, Yifan Zhang, Jian Cheng, and Hanqing Lu. 2020. Skeleton-based action recognition with multi-stream adaptive graph convolutional networks. TIP (2020)."},{"key":"e_1_3_2_2_67_1","volume-title":"Amir Roshan Zamir, and Mubarak Shah","author":"Soomro Khurram","year":"2012","unstructured":"Khurram Soomro, Amir Roshan Zamir, and Mubarak Shah. 2012. UCF101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)."},{"key":"e_1_3_2_2_68_1","volume-title":"Combining Metric Learning and Attention Heads For Accurate and Efficient Multilabel Image Classification. arXiv preprint arXiv:2209.06585","author":"Sovrasov Vladislav","year":"2022","unstructured":"Vladislav Sovrasov. 2022. Combining Metric Learning and Attention Heads For Accurate and Efficient Multilabel Image Classification. arXiv preprint arXiv:2209.06585 (2022)."},{"key":"e_1_3_2_2_69_1","volume-title":"View Invariant Spatio-Temporal Descriptor for Action Recognition From Skeleton Sequences. TAI","author":"Nirmala Venkata","year":"2023","unstructured":"Venkata Subbareddy K and Nirmala Devi L. 2023. View Invariant Spatio-Temporal Descriptor for Action Recognition From Skeleton Sequences. TAI (2023)."},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"crossref","unstructured":"Yansong Tang et al. 2020. Uncertainty-aware score distribution learning for action quality assessment. In CVPR.","DOI":"10.1109\/CVPR42600.2020.00986"},{"key":"e_1_3_2_2_71_1","volume-title":"Juliano Costa Machado, and Alexandre Balbinot","author":"Tosin Maur\u00edcio Cagliari","year":"2022","unstructured":"Maur\u00edcio Cagliari Tosin, Juliano Costa Machado, and Alexandre Balbinot. 2022. sEMG-based Upper Limb Movement Classifier: Current Scenario and Upcoming Challenges. JAIR (2022)."},{"key":"e_1_3_2_2_72_1","unstructured":"Hugo Touvron Matthieu Cord Matthijs Douze Francisco Massa Alexandre Sablayrolles and Herv\u00e9 J\u00e9gou. 2021. Training data-efficient image transformers & distillation through attention. In ICML."},{"key":"e_1_3_2_2_73_1","unstructured":"Ashish Vaswani et al. 2017. Attention is all you need. In NeurIPS."},{"key":"e_1_3_2_2_74_1","doi-asserted-by":"crossref","unstructured":"Limin Wang Bingkun Huang Zhiyu Zhao Zhan Tong Yinan He Yi Wang Yali Wang and Yu Qiao. 2023. VideoMAE V2: Scaling Video Masked Autoencoders With Dual Masking. In CVPR.","DOI":"10.1109\/CVPR52729.2023.01398"},{"key":"e_1_3_2_2_75_1","unstructured":"Shiguang Wang Zhizhong Li Yue Zhao Yuanjun Xiong Limin Wang and Dahua Lin. 2020. Denseflow. https:\/\/github.com\/open-mmlab\/denseflow."},{"key":"e_1_3_2_2_76_1","unstructured":"Ping-Cheng Wei Kunyu Peng Alina Roitberg Kailun Yang Jiaming Zhang and Rainer Stiefelhagen. 2022. Multi-modal Depression Estimation based on Sub-attentional Fusion. In ECCV."},{"key":"e_1_3_2_2_77_1","unstructured":"Yiping Wei Kunyu Peng Alina Roitberg Jiaming Zhang Junwei Zheng Ruiping Liu Yufan Chen Kailun Yang and Rainer Stiefelhagen. 2024. Elevating Skeleton-Based Action Recognition with Efficient Multi-Modality Self-Supervision. In ICASSP."},{"key":"e_1_3_2_2_78_1","volume":"201","author":"Williams Felicity R.","unstructured":"Felicity R. Williams, Annalisa Berzigotti, Janet M. Lord, Jennifer C. Lai, and Matthew J. Armstrong. 2019. impact of exercise on physical frailty in patients with chronic liver disease. Alimentary Pharmacology & Therapeutics (2019).","journal-title":"Matthew J. Armstrong."},{"key":"e_1_3_2_2_79_1","unstructured":"Jinglin Xu Yongming Rao Xumin Yu Guangyi Chen Jie Zhou and Jiwen Lu. 2022. FineDiving: A Fine-grained Dataset for Procedure-aware Action Quality Assessment. In CVPR."},{"key":"e_1_3_2_2_80_1","volume-title":"A Dual Modality Approach For (Zero-Shot) Multi-Label Classification. arXiv preprint arXiv:2208.09562","author":"Xu Shichao","year":"2022","unstructured":"Shichao Xu, Yikang Li, Jenhao Hsiao, Chiuman Ho, and Zhu Qi. 2022. A Dual Modality Approach For (Zero-Shot) Multi-Label Classification. arXiv preprint arXiv:2208.09562 (2022)."},{"key":"e_1_3_2_2_81_1","volume-title":"Skeleton-Based Human Action Recognition with Noisy Labels. arXiv preprint arXiv:2403.09975","author":"Xu Yi","year":"2024","unstructured":"Yi Xu, Kunyu Peng, Di Wen, Ruiping Liu, Junwei Zheng, Yufan Chen, Jiaming Zhang, Alina Roitberg, Kailun Yang, and Rainer Stiefelhagen. 2024. Skeleton-Based Human Action Recognition with Noisy Labels. arXiv preprint arXiv:2403.09975 (2024)."},{"key":"e_1_3_2_2_82_1","doi-asserted-by":"crossref","unstructured":"Sijie Yan Yuanjun Xiong and Dahua Lin. 2018. Spatial temporal graph convolutional networks for skeleton-based action recognition. In AAAI.","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"e_1_3_2_2_83_1","doi-asserted-by":"crossref","unstructured":"Zhendong Yang Zhe Li Mingqi Shao Dachuan Shi Zehuan Yuan and Chun Yuan. 2022. Masked Generative Distillation. In ECCV.","DOI":"10.1007\/978-3-031-20083-0_4"},{"key":"e_1_3_2_2_84_1","doi-asserted-by":"crossref","unstructured":"Jin Ye Junjun He Xiaojiang Peng Wenhao Wu and Yu Qiao. 2020. Attention-driven dynamic graph convolutional network for multi-label image recognition. In ECCV.","DOI":"10.1007\/978-3-030-58589-1_39"},{"key":"e_1_3_2_2_85_1","unstructured":"Renchun You Zhiyao Guo Lei Cui Xiang Long Yingze Bao and Shilei Wen. 2020. Cross-modality attention with semantic graph embedding for multi-label classification. In AAAI."},{"key":"e_1_3_2_2_86_1","doi-asserted-by":"crossref","unstructured":"Zhengbo Zhang Chunluan Zhou and Zhigang Tu. 2022. Distilling Inter-Class Distance for Semantic Segmentation. In IJCAI.","DOI":"10.24963\/ijcai.2022\/235"},{"key":"e_1_3_2_2_87_1","doi-asserted-by":"crossref","unstructured":"Borui Zhao Quan Cui Renjie Song Yiyu Qiu and Jiajun Liang. 2022. Decoupled Knowledge Distillation. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01165"},{"key":"e_1_3_2_2_88_1","volume-title":"HACS: Human Action Clips and Segments Dataset for Recognition and Temporal Localization. In ICCV.","author":"Zhao Hang","year":"2019","unstructured":"Hang Zhao, Zhicheng Yan, Lorenzo Torresani, and Antonio Torralba. 2019. HACS: Human Action Clips and Segments Dataset for Recognition and Temporal Localization. In ICCV."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680630","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3680630","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:57Z","timestamp":1750295877000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680630"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":88,"alternative-id":["10.1145\/3664647.3680630","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3680630","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}