{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T23:46:00Z","timestamp":1769989560007,"version":"3.49.0"},"publisher-location":"Cham","reference-count":87,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031727603","type":"print"},{"value":"9783031727610","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72761-0_24","type":"book-chapter","created":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T07:01:50Z","timestamp":1727593310000},"page":"421-440","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["E3V-K5: An Authentic Benchmark for\u00a0Redefining Video-Based Energy Expenditure Estimation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8827-9012","authenticated-orcid":false,"given":"Shengxuming","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lei","family":"Jin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yifan","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinyu","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xu","family":"Wen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8640-8434","authenticated-orcid":false,"given":"Zunlei","family":"Feng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2621-6048","authenticated-orcid":false,"given":"Mingli","family":"Song","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,30]]},"reference":[{"key":"24_CR1","doi-asserted-by":"publisher","first-page":"683","DOI":"10.2165\/00007256-200333090-00004","volume":"33","author":"PN Ainslie","year":"2003","unstructured":"Ainslie, P.N., Reilly, T., Westerterp, K.R.: Estimating human energy expenditure. Sports Med. 33, 683\u2013698 (2003)","journal-title":"Sports Med."},{"issue":"5","key":"24_CR2","doi-asserted-by":"publisher","first-page":"1577","DOI":"10.1109\/JBHI.2015.2418256","volume":"19","author":"M Altini","year":"2015","unstructured":"Altini, M., Casale, P., Penders, J.F., Amft, O.: Personalization of energy expenditure estimation in free living using topic models. IEEE J. Biomed. Health Inform. 19(5), 1577\u20131586 (2015)","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"24_CR3","doi-asserted-by":"publisher","first-page":"575","DOI":"10.4330\/wjc.v8.i10.575","volume":"8","author":"AJ Alves","year":"2016","unstructured":"Alves, A.J., et al.: Physical activity in primary and secondary prevention of cardiovascular disease: overview updated. World J. Cardiol. 8, 575\u2013583 (2016)","journal-title":"World J. Cardiol."},{"issue":"1","key":"24_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/2193-1801-2-229","volume":"2","author":"OM Blake","year":"2013","unstructured":"Blake, O.M., Wakeling, J.M.: Estimating changes in metabolic power from EMG. Springerplus 2(1), 1\u20137 (2013)","journal-title":"Springerplus"},{"key":"24_CR5","doi-asserted-by":"crossref","unstructured":"Cao, Z., Simon, T., Wei, S.E., Sheikh, Y.: Realtime multi-person 2D pose estimation using part affinity fields. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.143"},{"key":"24_CR6","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"24_CR7","unstructured":"Chen, X., et\u00a0al.: Symbolic discovery of optimization algorithms. arXiv preprint arXiv:2302.06675 (2023)"},{"key":"24_CR8","doi-asserted-by":"crossref","unstructured":"Chen, Y., Zhang, Z., Yuan, C., Li, B., Deng, Y., Hu, W.: Channel-wise topology refinement graph convolution for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13359\u201313368 (2021)","DOI":"10.1109\/ICCV48922.2021.01311"},{"key":"24_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"536","DOI":"10.1007\/978-3-030-58586-0_32","volume-title":"Computer Vision \u2013 ECCV 2020","author":"K Cheng","year":"2020","unstructured":"Cheng, K., Zhang, Y., Cao, C., Shi, L., Cheng, J., Lu, H.: Decoupling GCN with DropGraph module for skeleton-based action recognition. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12369, pp. 536\u2013553. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58586-0_32"},{"key":"24_CR10","doi-asserted-by":"publisher","first-page":"439","DOI":"10.1007\/s40279-016-0585-y","volume":"47","author":"CCT Clark","year":"2016","unstructured":"Clark, C.C.T., Barnes, C.M., Stratton, G., McNarry, M.A., Mackintosh, K.A., Summers, H.D.: A review of emerging analytical techniques for objective physical activity measurement in humans. Sports Med. 47, 439\u2013447 (2016)","journal-title":"Sports Med."},{"issue":"4","key":"24_CR11","doi-asserted-by":"publisher","first-page":"1324","DOI":"10.1152\/japplphysiol.00818.2005","volume":"100","author":"SE Crouter","year":"2006","unstructured":"Crouter, S.E., Clowers, K.G., Bassett, D.: A novel method for using accelerometer data to predict energy expenditure. J. Appl. Physiol. 100(4), 1324\u20131331 (2006)","journal-title":"J. Appl. Physiol."},{"key":"24_CR12","doi-asserted-by":"crossref","unstructured":"Crouter, S.E., LaMunion, S.R., Hibbing, P.R., Kaplan, A., Bassett, D.: Accuracy of the Cosmed K5 portable calorimeter. PLoS ONE 14 (2019)","DOI":"10.1371\/journal.pone.0226290"},{"key":"24_CR13","doi-asserted-by":"crossref","unstructured":"Donahue, J., et al.: Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2015)","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"24_CR14","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth $$16\\times 16$$ words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"24_CR15","doi-asserted-by":"crossref","unstructured":"Duan, H., Wang, J., Chen, K., Lin, D.: DG-STGCN: dynamic spatial-temporal modeling for skeleton-based action recognition. arXiv preprint arXiv:2210.05895 (2022)","DOI":"10.1109\/CVPR52688.2022.00298"},{"key":"24_CR16","doi-asserted-by":"crossref","unstructured":"Duan, H., Wang, J., Chen, K., Lin, D.: PYSKL: towards good practices for skeleton action recognition. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 7351\u20137354 (2022)","DOI":"10.1145\/3503161.3548546"},{"key":"24_CR17","doi-asserted-by":"crossref","unstructured":"Duan, H., Zhao, Y., Chen, K., Lin, D., Dai, B.: Revisiting skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2969\u20132978 (2022)","DOI":"10.1109\/CVPR52688.2022.00298"},{"issue":"6","key":"24_CR18","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1097\/00008483-200211000-00002","volume":"22","author":"JL Durstine","year":"2002","unstructured":"Durstine, J.L., Grandjean, P.W., Cox, C.A., Thompson, P.D.: Lipids, lipoproteins, and exercise. J. Cardiopulm. Rehabil. Prev. 22(6), 385\u2013398 (2002)","journal-title":"J. Cardiopulm. Rehabil. Prev."},{"key":"24_CR19","doi-asserted-by":"crossref","unstructured":"Edgcomb, A., Vahid, F.: Estimating daily energy expenditure from video for assistive monitoring. In: 2013 IEEE International Conference on Healthcare Informatics, pp. 184\u2013191. IEEE (2013)","DOI":"10.1109\/ICHI.2013.28"},{"key":"24_CR20","doi-asserted-by":"publisher","first-page":"7157","DOI":"10.1109\/TPAMI.2022.3222784","volume":"45","author":"HS Fang","year":"2022","unstructured":"Fang, H.S., et al.: AlphaPose: whole-body regional multi-person pose estimation and tracking in real-time. IEEE Trans. Pattern Anal. Mach. Intell. 45, 7157\u20137173 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"24_CR21","doi-asserted-by":"crossref","unstructured":"Fang, Y., et al.: EVA: exploring the limits of masked visual representation learning at scale. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19358\u201319369 (2023)","DOI":"10.1109\/CVPR52729.2023.01855"},{"key":"24_CR22","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C.: X3D: expanding architectures for efficient video recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"24_CR23","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: SlowFast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"key":"24_CR24","unstructured":"Feichtenhofer, C., Li, Y., He, K., et al.: Masked autoencoders as spatiotemporal learners. In: Advances in Neural Information Processing Systems, vol. 35, pp. 35946\u201335958 (2022)"},{"key":"24_CR25","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Wildes, R.P.: Spatiotemporal multiplier networks for video action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.787"},{"key":"24_CR26","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Zisserman, A.: Convolutional two-stream network fusion for video action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2016)","DOI":"10.1109\/CVPR.2016.213"},{"key":"24_CR27","unstructured":"Ferguson, B.: ACSM\u2019s guidelines for exercise testing and prescription 9th ed. 2014. J. Can. Chiropractic Assoc. 58, 328 (2014)"},{"key":"24_CR28","doi-asserted-by":"crossref","unstructured":"Guo, D., Li, K., Hu, B., Zhang, Y., Wang, M.: Benchmarking micro-action recognition: dataset, method, and application. IEEE Trans. Circuits Syst. Video Technol. (2024)","DOI":"10.1109\/TCSVT.2024.3358415"},{"key":"24_CR29","doi-asserted-by":"crossref","unstructured":"Guo, S., et al.: Cross-architecture self-supervised video representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19270\u201319279 (2022)","DOI":"10.1109\/CVPR52688.2022.01867"},{"issue":"12","key":"24_CR30","doi-asserted-by":"publisher","first-page":"243","DOI":"10.4239\/wjd.v7.i12.243","volume":"7","author":"H Hamasaki","year":"2016","unstructured":"Hamasaki, H.: Daily physical activity and type 2 diabetes: a review. World J. Diabetes 7(12), 243\u2013251 (2016)","journal-title":"World J. Diabetes"},{"key":"24_CR31","unstructured":"Hand, G.A., et al.: The effect of exercise training on total daily energy expenditure and body composition in weight-stable adults: a randomized, controlled trial. Journal of physical activity & health, pp.\u00a01\u20138 (2020)"},{"issue":"3","key":"24_CR32","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1097\/00003677-200107000-00008","volume":"29","author":"RL Hughson","year":"2001","unstructured":"Hughson, R.L., Tschakovsky, M.E., Houston, M.E.: Regulation of oxygen consumption at the onset of exercise. Exerc. Sport Sci. Rev. 29(3), 129\u2013133 (2001)","journal-title":"Exerc. Sport Sci. Rev."},{"issue":"1","key":"24_CR33","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2012","unstructured":"Ji, S., Xu, W., Yang, M., Yu, K.: 3D convolutional neural networks for human action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 35(1), 221\u2013231 (2012)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"24_CR34","doi-asserted-by":"crossref","unstructured":"Kaiyala, K.J., Ramsay, D.S.: Direct animal calorimetry, the underused gold standard for quantifying the fire of life. Comp. Biochem. Physiol. Part A Mol. Integrative Physiol. 158 3, 252\u2013264 (2011)","DOI":"10.1016\/j.cbpa.2010.04.013"},{"issue":"1","key":"24_CR35","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1093\/ajcn\/49.1.37","volume":"49","author":"HJ Kalkwarf","year":"1989","unstructured":"Kalkwarf, H.J., Haas, J.D., Belko, A.Z., Roach, R.C., Roe, D.A.: Accuracy of heart-rate monitoring and activity diaries for estimating energy expenditure. Am. J. Clin. Nutr. 49(1), 37\u201343 (1989)","journal-title":"Am. J. Clin. Nutr."},{"key":"24_CR36","doi-asserted-by":"crossref","unstructured":"Karpathy, A., Toderici, G., Shetty, S., Leung, T., Sukthankar, R., Fei-Fei, L.: Large-scale video classification with convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2014)","DOI":"10.1109\/CVPR.2014.223"},{"key":"24_CR37","doi-asserted-by":"crossref","unstructured":"Kirkham, A.A., Davis, M.K.: Exercise prevention of cardiovascular disease in breast cancer survivors. J. Oncol. 2015 (2015)","DOI":"10.1155\/2015\/917606"},{"key":"24_CR38","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"761","DOI":"10.1007\/978-3-030-58565-5_45","volume-title":"Computer Vision \u2013 ECCV 2020","author":"M Korban","year":"2020","unstructured":"Korban, M., Li, X.: DDGCN: a dynamic directed graph convolutional network for action recognition. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12365, pp. 761\u2013776. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58565-5_45"},{"key":"24_CR39","doi-asserted-by":"crossref","unstructured":"Li, B., Li, X., Zhang, Z., Wu, F.: Spatio-temporal graph routing for skeleton-based action recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a033, pp. 8561\u20138568 (2019)","DOI":"10.1609\/aaai.v33i01.33018561"},{"key":"24_CR40","doi-asserted-by":"crossref","unstructured":"Li, K., et al.: Unmasked teacher: towards training-efficient video foundation models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 19948\u201319960 (2023)","DOI":"10.1109\/ICCV51070.2023.01826"},{"key":"24_CR41","doi-asserted-by":"crossref","unstructured":"Lin, J., Gan, C., Han, S.: TSM: temporal shift module for efficient video understanding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2019)","DOI":"10.1109\/ICCV.2019.00718"},{"key":"24_CR42","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"24_CR43","doi-asserted-by":"crossref","unstructured":"Liu, Z., Zhang, H., Chen, Z., Wang, Z., Ouyang, W.: Disentangling and unifying graph convolutions for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 143\u2013152 (2020)","DOI":"10.1109\/CVPR42600.2020.00022"},{"key":"24_CR44","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1007\/s00421-010-1639-8","volume":"111","author":"K Lyden","year":"2011","unstructured":"Lyden, K., Kozey, S.L., Staudenmeyer, J.W., Freedson, P.S.: A comprehensive evaluation of commonly used accelerometer energy expenditure and met prediction equations. Eur. J. Appl. Physiol. 111, 187\u2013201 (2011)","journal-title":"Eur. J. Appl. Physiol."},{"key":"24_CR45","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1177\/1466424006064001","volume":"126","author":"T Macdonald","year":"2006","unstructured":"Macdonald, T.: Preventing chronic diseases: a vital investment. J. R. Soc. Promotion Health 126, 95 (2006)","journal-title":"J. R. Soc. Promotion Health"},{"key":"24_CR46","unstructured":"Masullo, A., Burghardt, T., Damen, D., Hannuna, S., Ponce-L\u00f3pez, V., Mirmehdi, M.: CaloriNet: from silhouettes to calorie estimation in private environments. arXiv preprint arXiv:1806.08152 (2018)"},{"key":"24_CR47","doi-asserted-by":"publisher","first-page":"1631","DOI":"10.1136\/bmj.318.7198.1631","volume":"318","author":"DR Murdoch","year":"1999","unstructured":"Murdoch, D.R.: High life: a history of high altitude physiology and medicine. BMJ 318, 1631 (1999)","journal-title":"BMJ"},{"key":"24_CR48","doi-asserted-by":"crossref","unstructured":"Nakamura, K., Yeung, S., Alahi, A., Fei-Fei, L.: Jointly learning energy expenditures and activities using egocentric multimodal signals. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition, pp. 6817\u20136826 (2017)","DOI":"10.1109\/CVPR.2017.721"},{"key":"24_CR49","doi-asserted-by":"publisher","first-page":"395","DOI":"10.2165\/00007256-199826060-00003","volume":"26","author":"FG Neely","year":"1998","unstructured":"Neely, F.G.: Biomechanical risk factors for exercise-related lower limb injuries. Sports Med. 26, 395\u2013413 (1998)","journal-title":"Sports Med."},{"key":"24_CR50","doi-asserted-by":"crossref","unstructured":"Pang, J., et al.: Quasi-dense similarity learning for multiple object tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 164\u2013173 (2021)","DOI":"10.1109\/CVPR46437.2021.00023"},{"key":"24_CR51","doi-asserted-by":"crossref","unstructured":"Peng, K., Roitberg, A., Yang, K., Zhang, J., Stiefelhagen, R.: Should I take a walk? Estimating energy expenditure from video data. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, pp. 2074\u20132084 (2022)","DOI":"10.1109\/CVPRW56347.2022.00225"},{"issue":"9","key":"24_CR52","doi-asserted-by":"publisher","DOI":"10.2196\/33606","volume":"6","author":"T Perrett","year":"2022","unstructured":"Perrett, T., Masullo, A., Damen, D., Burghardt, T., Craddock, I., Mirmehdi, M., et al.: Personalized energy expenditure estimation: visual sensing approach with deep learning. JMIR Formative Res. 6(9), e33606 (2022)","journal-title":"JMIR Formative Res."},{"key":"24_CR53","unstructured":"Pham, H.H., Khoudour, L., Crouzil, A., Zegers, P., Velastin, S.A.: Video-based human action recognition using deep learning: a review. arXiv preprint arXiv:2208.03775 (2022)"},{"key":"24_CR54","unstructured":"Qing, Z., et al.: MAR: masked autoencoders for efficient action recognition. IEEE Trans. Multimed. (2023)"},{"key":"24_CR55","unstructured":"Ryali, C., et\u00a0al.: Hiera: a hierarchical vision transformer without the bells-and-whistles. arXiv preprint arXiv:2306.00989 (2023)"},{"key":"24_CR56","unstructured":"Shao, S., et al.: CrowdHuman: a benchmark for detecting human in a crowd. arXiv preprint arXiv:1805.00123 (2018)"},{"key":"24_CR57","doi-asserted-by":"crossref","unstructured":"Shcherbina, A., et al.: Accuracy in wrist-worn, sensor-based measurements of heart rate and energy expenditure in a diverse cohort. J. Personalized Med. 7 (2016)","DOI":"10.1101\/094862"},{"key":"24_CR58","doi-asserted-by":"crossref","unstructured":"Shi, L., Zhang, Y., Cheng, J., Lu, H.: Two-stream adaptive graph convolutional networks for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12026\u201312035 (2019)","DOI":"10.1109\/CVPR.2019.01230"},{"key":"24_CR59","doi-asserted-by":"publisher","first-page":"9532","DOI":"10.1109\/TIP.2020.3028207","volume":"29","author":"L Shi","year":"2020","unstructured":"Shi, L., Zhang, Y., Cheng, J., Lu, H.: Skeleton-based action recognition with multi-stream adaptive graph convolutional networks. IEEE Trans. Image Process. 29, 9532\u20139545 (2020)","journal-title":"IEEE Trans. Image Process."},{"key":"24_CR60","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: Advances in Neural Information Processing Systems, vol. 27 (2014)"},{"key":"24_CR61","unstructured":"American College of Sports Medicine: Guidelines for exercise testing and prescription. Williams & Wilkins (1991)"},{"key":"24_CR62","unstructured":"Tan, H., Lei, J., Wolf, T., Bansal, M.: VIMPAC: video pre-training via masked token prediction and contrastive learning. arXiv preprint arXiv:2106.11250 (2021)"},{"key":"24_CR63","doi-asserted-by":"crossref","unstructured":"Tao, L., et al.: Calorie counter: RGB-depth visual estimation of energy expenditure at home. In: 2016 Asian Conference on Computer Vision (2016)","DOI":"10.1007\/978-3-319-54407-6_16"},{"issue":"9","key":"24_CR64","doi-asserted-by":"publisher","first-page":"1831","DOI":"10.1249\/MSS.0000000000000298","volume":"46","author":"O Tikkanen","year":"2014","unstructured":"Tikkanen, O., K\u00e4rkk\u00e4inen, S., Haakana, P., Kallinen, M., Pullinen, T., Finni, T.: EMG, heart rate, and accelerometer as estimators of energy expenditure in locomotion. Med. Sci. Sports Exerc. 46(9), 1831\u20131839 (2014)","journal-title":"Med. Sci. Sports Exerc."},{"key":"24_CR65","unstructured":"Tong, Z., Song, Y., Wang, J., Wang, L.: VideoMAE: masked autoencoders are data-efficient learners for self-supervised video pre-training. In: Koyejo, S., Mohamed, S., Agarwal, A., Belgrave, D., Cho, K., Oh, A. (eds.) Advances in Neural Information Processing Systems, vol.\u00a035, pp. 10078\u201310093. Curran Associates, Inc. (2022)"},{"key":"24_CR66","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3D convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision (2015)","DOI":"10.1109\/ICCV.2015.510"},{"key":"24_CR67","doi-asserted-by":"crossref","unstructured":"Tran, D., Wang, H., Torresani, L., Feiszli, M.: Video classification with channel-separated convolutional networks. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2019)","DOI":"10.1109\/ICCV.2019.00565"},{"key":"24_CR68","doi-asserted-by":"crossref","unstructured":"Tran, D., Wang, H., Torresani, L., Ray, J., LeCun, Y., Paluri, M.: A closer look at spatiotemporal convolutions for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2018)","DOI":"10.1109\/CVPR.2018.00675"},{"key":"24_CR69","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"issue":"10","key":"24_CR70","doi-asserted-by":"publisher","first-page":"1019","DOI":"10.1139\/apnm-2015-0140","volume":"40","author":"R Villar","year":"2015","unstructured":"Villar, R., Beltrame, T., Hughson, R.L.: Validation of the hexoskin wearable vest during lying, sitting, standing, and walking activities. Appl. Physiol. Nutr. Metab. 40(10), 1019\u20131024 (2015)","journal-title":"Appl. Physiol. Nutr. Metab."},{"key":"24_CR71","doi-asserted-by":"crossref","unstructured":"Wang, L., Koniusz, P.: 3MFormer: multi-order multi-mode transformer for skeletal action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5620\u20135631 (2023)","DOI":"10.1109\/CVPR52729.2023.00544"},{"key":"24_CR72","unstructured":"Wang, L., Xiong, Y., Wang, Z., Qiao, Y.: Towards good practices for very deep two-stream convnets. arXiv preprint arXiv:1507.02159 (2015)"},{"key":"24_CR73","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/978-3-319-46484-8_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"L Wang","year":"2016","unstructured":"Wang, L., et al.: Temporal segment networks: towards good practices for deep action recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 20\u201336. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_2"},{"key":"24_CR74","doi-asserted-by":"crossref","unstructured":"Wang, R., et al.: BEVT: BERT pretraining of video transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14733\u201314743 (2022)","DOI":"10.1109\/CVPR52688.2022.01432"},{"key":"24_CR75","doi-asserted-by":"crossref","unstructured":"Wang, R., et al.: Masked video distillation: Rethinking masked feature modeling for self-supervised video representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6312\u20136322 (2023)","DOI":"10.1109\/CVPR52729.2023.00611"},{"key":"24_CR76","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"24_CR77","unstructured":"Wang, Y., et\u00a0al.: InternVideo: general video foundation models via generative and discriminative learning. arXiv preprint arXiv:2212.03191 (2022)"},{"key":"24_CR78","doi-asserted-by":"crossref","unstructured":"Wang, Y., Long, M., Wang, J., Yu, P.S.: Spatiotemporal pyramid network for video action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.226"},{"key":"24_CR79","doi-asserted-by":"crossref","unstructured":"Wei, C., Fan, H., Xie, S., Wu, C.Y., Yuille, A., Feichtenhofer, C.: Masked feature prediction for self-supervised visual pre-training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14668\u201314678 (2022)","DOI":"10.1109\/CVPR52688.2022.01426"},{"key":"24_CR80","unstructured":"Welk, G.J.: Physical activity assessments for health-related research (2002)"},{"key":"24_CR81","doi-asserted-by":"publisher","first-page":"1277","DOI":"10.1007\/s00421-017-3641-x","volume":"117","author":"KR Westerterp","year":"2017","unstructured":"Westerterp, K.R.: Doubly labelled water assessment of energy expenditure: principle, practice, and promise. Eur. J. Appl. Physiol. 117, 1277\u20131285 (2017)","journal-title":"Eur. J. Appl. Physiol."},{"key":"24_CR82","doi-asserted-by":"crossref","unstructured":"White, L.E., DeBlois, J.P., Barreira, T.V.: Reliability analysis of the Cosmed K5 portable metabolic system. Med. Sci. Sports Exercise (2019)","DOI":"10.1249\/01.mss.0000560990.10036.c4"},{"key":"24_CR83","doi-asserted-by":"crossref","unstructured":"Williams, G.L., Li, S., Pathirana, P.N.: Preliminary investigation of energy comparation between gyroscope, electromyography and VO2 wearable sensors. In: 2016 38th Annual International Conference of the IEEE Engineering in Medicine and Biology Society, pp. 4963\u20134966 (2016)","DOI":"10.1109\/EMBC.2016.7591841"},{"key":"24_CR84","doi-asserted-by":"crossref","unstructured":"Yan, S., Xiong, Y., Lin, D.: Spatial temporal graph convolutional networks for skeleton-based action recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"24_CR85","doi-asserted-by":"crossref","unstructured":"Yue-Hei\u00a0Ng, J., Hausknecht, M., Vijayanarasimhan, S., Vinyals, O., Monga, R., Toderici, G.: Beyond short snippets: deep networks for video classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2015)","DOI":"10.1109\/CVPR.2015.7299101"},{"key":"24_CR86","doi-asserted-by":"crossref","unstructured":"Zhou, B., Andonian, A., Oliva, A., Torralba, A.: Temporal relational reasoning in videos. In: Proceedings of the European Conference on Computer Vision (2018)","DOI":"10.1007\/978-3-030-01246-5_49"},{"key":"24_CR87","doi-asserted-by":"crossref","unstructured":"Zhu, W., Ma, X., Liu, Z., Liu, L., Wu, W., Wang, Y.: MotionBERT: a unified perspective on learning human motion representations. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2023)","DOI":"10.1109\/ICCV51070.2023.01385"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72761-0_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T07:35:34Z","timestamp":1727595334000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72761-0_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,30]]},"ISBN":["9783031727603","9783031727610"],"references-count":87,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72761-0_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,30]]},"assertion":[{"value":"30 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}