{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T07:02:51Z","timestamp":1773212571603,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,10,23]],"date-time":"2017-10-23T00:00:00Z","timestamp":1508716800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"CCF-Tencent Open Research Fund"},{"name":"Microsoft Research Asia","award":["FY17-RESTHEME-013"],"award-info":[{"award-number":["FY17-RESTHEME-013"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,10,23]]},"DOI":"10.1145\/3132734.3132739","type":"proceedings-article","created":{"date-parts":[[2017,10,20]],"date-time":"2017-10-20T19:24:32Z","timestamp":1508527472000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":175,"title":["PKU-MMD"],"prefix":"10.1145","author":[{"given":"Chunhui","family":"Liu","sequence":"first","affiliation":[{"name":"Peking University, Beijing, China"}]},{"given":"Yueyu","family":"Hu","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"given":"Yanghao","family":"Li","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"given":"Sijie","family":"Song","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"given":"Jiaying","family":"Liu","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2017,10,23]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Jake K. Aggarwal and Lu Xia. 2014. Human activity recognition from 3D data: A review. PRL (2014).  Jake K. Aggarwal and Lu Xia. 2014. Human activity recognition from 3D data: A review. PRL (2014).","DOI":"10.1016\/j.patrec.2014.04.011"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Victoria Bloom Dimitrios Makris and Vasileios Argyriou. 2012. G3D: A gaming action dataset and real time action recognition evaluation framework CVPR.  Victoria Bloom Dimitrios Makris and Vasileios Argyriou. 2012. G3D: A gaming action dataset and real time action recognition evaluation framework CVPR.","DOI":"10.1109\/CVPRW.2012.6239175"},{"key":"e_1_3_2_1_3_1","volume-title":"Activitynet: A large-scale video benchmark for human activity understanding CVPR.","author":"Heilbron Fabian Caba","year":"2015"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-016-3374-6"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2013.02.006"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33868-7_6"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Roeland De Geest Efstratios Gavves Amir Ghodrati Zhenyang Li Cees Snoek and Tinne Tuytelaars. 2016. Online Action Detection.  Roeland De Geest Efstratios Gavves Amir Ghodrati Zhenyang Li Cees Snoek and Tinne Tuytelaars. 2016. Online Action Detection.","DOI":"10.1007\/978-3-319-46454-1_17"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Jeffrey Donahue Lisa Anne Hendricks Sergio Guadarrama Marcus Rohrbach Subhashini Venugopalan Kate Saenko and Trevor Darrell. 2015. Long-term recurrent convolutional networks for visual recognition and description CVPR.  Jeffrey Donahue Lisa Anne Hendricks Sergio Guadarrama Marcus Rohrbach Subhashini Venugopalan Kate Saenko and Trevor Darrell. 2015. Long-term recurrent convolutional networks for visual recognition and description CVPR.","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Yong Du Wei Wang and Liang Wang. 2015. Hierarchical recurrent neural network for skeleton based action recognition CVPR.  Yong Du Wei Wang and Liang Wang. 2015. Hierarchical recurrent neural network for skeleton based action recognition CVPR.","DOI":"10.1109\/CVPR.2015.7298714"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2015.50"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-013-0683-3"},{"key":"e_1_3_2_1_12_1","unstructured":"Mohamed E. Hussein Marwan Torki Mohammad Abdelaziz Gowayyed and Motaz El-Saban. 2013. Human Action Recognition Using a Temporal Hierarchy of Covariance Descriptors on 3D Joint Locations.. In IJCAI.   Mohamed E. Hussein Marwan Torki Mohammad Abdelaziz Gowayyed and Motaz El-Saban. 2013. Human Action Recognition Using a Temporal Hierarchy of Covariance Descriptors on 3D Joint Locations.. In IJCAI."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Ivan Laptev Marcin Marszalek Cordelia Schmid and Benjamin Rozenfeld. 2008. Learning realistic human actions from movies. In CVPR.  Ivan Laptev Marcin Marszalek Cordelia Schmid and Benjamin Rozenfeld. 2008. Learning realistic human actions from movies. In CVPR.","DOI":"10.1109\/CVPR.2008.4587756"},{"key":"e_1_3_2_1_15_1","unstructured":"Wanqing Li Zhengyou Zhang and Zicheng Liu. 2010. Action recognition based on a bag of 3D points. CVPR.  Wanqing Li Zhengyou Zhang and Zicheng Liu. 2010. Action recognition based on a bag of 3D points. CVPR."},{"key":"e_1_3_2_1_16_1","unstructured":"Yanghao Li Cuiling Lan Junliang Xing Wenjun Zeng Chunfeng Yuan and Jiaying Liu. 2016. Online Human Action Detection using Joint Classification-Regression Recurrent Neural Networks. In ECCV.  Yanghao Li Cuiling Lan Junliang Xing Wenjun Zeng Chunfeng Yuan and Jiaying Liu. 2016. Online Human Action Detection using Joint Classification-Regression Recurrent Neural Networks. In ECCV."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.109"},{"key":"e_1_3_2_1_18_1","unstructured":"Bingbing Ni Gang Wang and Pierre Moulin. 2013. RGBD-hudaact: A color-depth video database for human daily activity recognition. Springer.  Bingbing Ni Gang Wang and Pierre Moulin. 2013. RGBD-hudaact: A color-depth video database for human daily activity recognition. Springer."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Ferda Ofli Rizwan Chaudhry Gregorij Kurillo Ren\u00e9 Vidal and Ruzena Bajcsy. 2013. Berkeley MHAD: A comprehensive multimodal human action database WACV.  Ferda Ofli Rizwan Chaudhry Gregorij Kurillo Ren\u00e9 Vidal and Ruzena Bajcsy. 2013. Berkeley MHAD: A comprehensive multimodal human action database WACV.","DOI":"10.1109\/WACV.2013.6474999"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Florent Perronnin Jorge S\u00e1nchez and Thomas Mensink. 2010. Improving the fisher kernel for large-scale image classification ECCV.   Florent Perronnin Jorge S\u00e1nchez and Thomas Mensink. 2010. Improving the fisher kernel for large-scale image classification ECCV.","DOI":"10.1007\/978-3-642-15561-1_11"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2533389"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Olga Russakovsky Jia Deng Hao Su Jonathan Krause Sanjeev Satheesh Sean Ma Zhiheng Huang Andrej Karpathy Aditya Khosla Michael S. Bernstein Alexander C. Berg and Fei-Fei Li. 2014. ImageNet Large Scale Visual Recognition Challenge. CoRR (2014).  Olga Russakovsky Jia Deng Hao Su Jonathan Krause Sanjeev Satheesh Sean Ma Zhiheng Huang Andrej Karpathy Aditya Khosla Michael S. Bernstein Alexander C. Berg and Fei-Fei Li. 2014. ImageNet Large Scale Visual Recognition Challenge. CoRR (2014).","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126349"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Amir Shahroudy Jun Liu Tian-Tsong Ng and Gang Wang. 2016. NTU RGB+D: A large scale dataset for 3D human activity analysis CVPR.  Amir Shahroudy Jun Liu Tian-Tsong Ng and Gang Wang. 2016. NTU RGB+D: A large scale dataset for 3D human activity analysis CVPR.","DOI":"10.1109\/CVPR.2016.115"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2505295"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2015.138"},{"key":"e_1_3_2_1_27_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Two-stream convolutional networks for action recognition in videos NIPS.   Karen Simonyan and Andrew Zisserman. 2014. Two-stream convolutional networks for action recognition in videos NIPS."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Sijie Song Cuiling Lan Junliang Xing Wenjun Zeng and Jiaying Liu. 2016. An End-to-End Spatio-Temporal Attention Model for Human Action Recognition from Skeleton Data. AAAI (2016).  Sijie Song Cuiling Lan Junliang Xing Wenjun Zeng and Jiaying Liu. 2016. An End-to-End Spatio-Temporal Attention Model for Human Action Recognition from Skeleton Data. AAAI (2016).","DOI":"10.1609\/aaai.v31i1.11212"},{"key":"e_1_3_2_1_29_1","unstructured":"Khurram Soomro Amir Roshan Zamir and Mubarak Shah. 2012. UCF101: A dataset of 101 human actions classes from videos in the wild. arXiv (2012).  Khurram Soomro Amir Roshan Zamir and Mubarak Shah. 2012. UCF101: A dataset of 101 human actions classes from videos in the wild. arXiv (2012)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Jaeyong Sung Colin Ponce Bart Selman and Ashutosh Saxena. 2011. Human Activity Detection from RGBD Images. AAAI (2011).   Jaeyong Sung Colin Ponce Bart Selman and Ashutosh Saxena. 2011. Human Activity Detection from RGBD Images. AAAI (2011).","DOI":"10.1109\/ICRA.2012.6224591"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Jaeyong Sung Colin Ponce Bart Selman and Ashutosh Saxena. 2012. Unstructured human activity detection from RGBD images ICRA.   Jaeyong Sung Colin Ponce Bart Selman and Ashutosh Saxena. 2012. Unstructured human activity detection from RGBD images ICRA.","DOI":"10.1109\/ICRA.2012.6224591"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Christian Szegedy Sergey Ioffe Vincent Vanhoucke and Alex Alemi. 2016. Inception-v4 Inception-resnet and the impact of residual connections on learning. arXiv (2016).  Christian Szegedy Sergey Ioffe Vincent Vanhoucke and Alex Alemi. 2016. Inception-v4 Inception-resnet and the impact of residual connections on learning. arXiv (2016).","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.341"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.82"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Raviteja Vemulapalli and Rama Chellapa. 2016. Rolling rotations for recognizing human actions from 3D skeletal data CVPR.  Raviteja Vemulapalli and Rama Chellapa. 2016. Rolling rotations for recognizing human actions from 3D skeletal data CVPR.","DOI":"10.1109\/CVPR.2016.484"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.441"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Jiang Wang Zicheng Liu Ying Wu and Junsong Yuan. 2012. Mining actionlet ensemble for action recognition with depth cameras CVPR.  Jiang Wang Zicheng Liu Ying Wu and Junsong Yuan. 2012. Mining actionlet ensemble for action recognition with depth cameras CVPR.","DOI":"10.1109\/CVPR.2012.6247813"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.339"},{"key":"e_1_3_2_1_39_1","unstructured":"Limin Wang Yu Qiao and Xiaoou Tang. 2014. Action recognition and detection by combining motion and appearance features. THUMOS (2014).  Limin Wang Yu Qiao and Xiaoou Tang. 2014. Action recognition and detection by combining motion and appearance features. THUMOS (2014)."},{"key":"e_1_3_2_1_40_1","unstructured":"Limin Wang Zhe Wang Yuanjun Xiong and Yu Qiao. 2015. CUHK&SIAT submission for THUMOS15 action recognition challenge. THUMOS (2015).  Limin Wang Zhe Wang Yuanjun Xiong and Yu Qiao. 2015. CUHK&SIAT submission for THUMOS15 action recognition challenge. THUMOS (2015)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Limin Wang Yuanjun Xiong Zhe Wang Yu Qiao Dahua Lin Xiaoou Tang and Luc Van Gool. 2016. Temporal segment networks: towards good practices for deep action recognition ECCV.  Limin Wang Yuanjun Xiong Zhe Wang Yu Qiao Dahua Lin Xiaoou Tang and Luc Van Gool. 2016. Temporal segment networks: towards good practices for deep action recognition ECCV.","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.406"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.389"},{"key":"e_1_3_2_1_44_1","volume-title":"Watch-n-patch: Unsupervised understanding of actions and relations CVPR.","author":"Wu Chenxia","year":"2015"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806222"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Kiwon Yun Jean Honorio Debaleena Chattopadhyay Tamara L Berg and Dimitris Samaras. 2012. Two-person interaction detection using body-pose features and multiple instance learning CVPR.  Kiwon Yun Jean Honorio Debaleena Chattopadhyay Tamara L Berg and Dimitris Samaras. 2012. Two-person interaction detection using body-pose features and multiple instance learning CVPR.","DOI":"10.1109\/CVPRW.2012.6239234"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"Mihai Zanfir Marius Leordeanu and Cristian Sminchisescu. 2013. The moving pose: An efficient 3D kinematics descriptor for low-latency action recognition and detection. In CVPR.  Mihai Zanfir Marius Leordeanu and Cristian Sminchisescu. 2013. The moving pose: An efficient 3D kinematics descriptor for low-latency action recognition and detection. In CVPR.","DOI":"10.1109\/ICCV.2013.342"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2016.05.019"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"Wentao Zhu Cuiling Lan Junliang Xing Wenjun Zeng Yanghao Li Li Shen and Xiaohui Xie. 2016. Co-occurrence feature learning for skeleton based action recognition using regularized deep LSTM networks. AAAI (2016).   Wentao Zhu Cuiling Lan Junliang Xing Wenjun Zeng Yanghao Li Li Shen and Xiaohui Xie. 2016. Co-occurrence feature learning for skeleton based action recognition using regularized deep LSTM networks. AAAI (2016).","DOI":"10.1609\/aaai.v30i1.10451"}],"event":{"name":"MM '17: ACM Multimedia Conference","location":"Mountain View California USA","acronym":"MM '17","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the Workshop on Visual Analysis in Smart and Connected Communities"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3132734.3132739","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3132734.3132739","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T02:10:57Z","timestamp":1750212657000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3132734.3132739"}},"subtitle":["A Large Scale Benchmark for Skeleton-Based Human Action Understanding"],"short-title":[],"issued":{"date-parts":[[2017,10,23]]},"references-count":49,"alternative-id":["10.1145\/3132734.3132739","10.1145\/3132734"],"URL":"https:\/\/doi.org\/10.1145\/3132734.3132739","relation":{},"subject":[],"published":{"date-parts":[[2017,10,23]]},"assertion":[{"value":"2017-10-23","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}