{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T15:24:50Z","timestamp":1780673090731,"version":"3.54.1"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030012458","type":"print"},{"value":"9783030012465","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01246-5_7","type":"book-chapter","created":{"date-parts":[[2018,10,5]],"date-time":"2018-10-05T20:14:56Z","timestamp":1538770496000},"page":"106-121","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":285,"title":["Skeleton-Based Action Recognition with Spatial Reasoning and Temporal Stack Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3354-1968","authenticated-orcid":false,"given":"Chenyang","family":"Si","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4179-8210","authenticated-orcid":false,"given":"Ya","family":"Jing","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5750-6980","authenticated-orcid":false,"given":"Wei","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5224-8647","authenticated-orcid":false,"given":"Liang","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4054-5649","authenticated-orcid":false,"given":"Tieniu","family":"Tan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2018,10,6]]},"reference":[{"issue":"3","key":"7_CR1","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1145\/1922649.1922653","volume":"43","author":"JK Aggarwal","year":"2011","unstructured":"Aggarwal, J.K., Ryoo, M.S.: Human activity analysis: a review. ACM Comput. Surv. 43(3), 16 (2011)","journal-title":"ACM Comput. Surv."},{"key":"7_CR2","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1016\/j.patrec.2014.04.011","volume":"48","author":"JK Aggarwal","year":"2014","unstructured":"Aggarwal, J.K., Xia, L.: Human activity recognition from 3d data: a review. Pattern Recognit. Lett. 48, 70\u201380 (2014)","journal-title":"Pattern Recognit. Lett."},{"key":"7_CR3","doi-asserted-by":"crossref","unstructured":"Cao, Z., Simon, T., Wei, S.E., Sheikh, Y.: Realtime multi-person 2d pose estimation using part affinity fields. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.143"},{"key":"7_CR4","unstructured":"Du, Y., Wang, W., Wang, L.: Hierarchical recurrent neural network for skeleton based action recognition. In: CVPR (2015)"},{"key":"7_CR5","unstructured":"Duvenaud, D.K., et al.: Convolutional networks on graphs for learning molecular fingerprints. In: NIPS (2015)"},{"key":"7_CR6","unstructured":"Henaff, M., Bruna, J., LeCun, Y.: Deep convolutional networks on graph-structured data. arXiv preprint arXiv:1506.05163 (2015)"},{"issue":"8","key":"7_CR7","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Hu, J.F., Zheng, W.S., Lai, J., Zhang, J.: Jointly learning heterogeneous features for RGB-D activity recognition. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7299172"},{"key":"7_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"280","DOI":"10.1007\/978-3-319-46448-0_17","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J-F Hu","year":"2016","unstructured":"Hu, J.-F., Zheng, W.-S., Ma, L., Wang, G., Lai, J.: Real-time RGB-D activity prediction by soft regression. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 280\u2013296. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_17"},{"key":"7_CR10","unstructured":"Hussein, M.E., Torki, M., Gowayyed, M.A., El-Saban, M.: Human action recognition using a temporal hierarchy of covariance descriptors on 3d joint locations. In: IJCAI (2013)"},{"issue":"2","key":"7_CR11","doi-asserted-by":"publisher","first-page":"201","DOI":"10.3758\/BF03212378","volume":"14","author":"G Johansson","year":"1973","unstructured":"Johansson, G.: Visual perception of biological motion and a model for its analysis. Percept. Psychophys. 14(2), 201\u2013211 (1973)","journal-title":"Percept. Psychophys."},{"key":"7_CR12","doi-asserted-by":"crossref","unstructured":"Ke, Q., Bennamoun, M., An, S., Sohel, F., Boussaid, F.: A new representation of skeleton sequences for 3d action recognition. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.486"},{"key":"7_CR13","doi-asserted-by":"crossref","unstructured":"Kim, T.S., Reiter, A.: Interpretable 3d human action analysis with temporal convolutional networks. In: CVPR Workshops (2017)","DOI":"10.1109\/CVPRW.2017.207"},{"key":"7_CR14","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: ICLR (2015)"},{"key":"7_CR15","unstructured":"Bruna, J., Zaremba, W., Szlam, A., LeCun, Y.: Spectral networks and locally connected networks on graphs. In: ICLR (2014)"},{"key":"7_CR16","doi-asserted-by":"crossref","unstructured":"Lee, I., Kim, D., Kang, S., Lee, S.: Ensemble deep learning for skeleton-based action recognition using temporal sliding LSTM networks. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.115"},{"key":"7_CR17","doi-asserted-by":"crossref","unstructured":"Li, R., Tapaswi, M., Liao, R., Jia, J., Urtasun, R., Fidler, S.: Situation recognition with graph neural networks. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.448"},{"key":"7_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"816","DOI":"10.1007\/978-3-319-46487-9_50","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Liu","year":"2016","unstructured":"Liu, J., Shahroudy, A., Xu, D., Wang, G.: Spatio-temporal LSTM with trust gates for 3D human action recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9907, pp. 816\u2013833. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46487-9_50"},{"key":"7_CR19","doi-asserted-by":"publisher","first-page":"346","DOI":"10.1016\/j.patcog.2017.02.030","volume":"68","author":"M Liu","year":"2017","unstructured":"Liu, M., Liu, H., Chen, C.: Enhanced skeleton visualization for view invariant human action recognition. Pattern Recognit. 68, 346\u2013362 (2017)","journal-title":"Pattern Recognit."},{"key":"7_CR20","unstructured":"Niepert, M., Ahmed, M., Kutzkov, K.: Learning convolutional neural networks for graphs. In: ICML (2016)"},{"issue":"6","key":"7_CR21","doi-asserted-by":"publisher","first-page":"976","DOI":"10.1016\/j.imavis.2009.11.014","volume":"28","author":"R Poppe","year":"2010","unstructured":"Poppe, R.: A survey on vision-based human action recognition. Image Vis. Comput. 28(6), 976\u2013990 (2010)","journal-title":"Image Vis. Comput."},{"key":"7_CR22","doi-asserted-by":"crossref","unstructured":"Qi, X., Liao, R., Jia, J., Fidler, S., Urtasun, R.: 3d graph neural networks for RGBD semantic segmentation. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.556"},{"issue":"1","key":"7_CR23","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1109\/TNN.2008.2005605","volume":"20","author":"F Scarselli","year":"2009","unstructured":"Scarselli, F., Gori, M., Tsoi, A.C., Hagenbuchner, M., Monfardini, G.: The graph neural network model. IEEE Trans. Neural Netw. 20(1), 61\u201380 (2009)","journal-title":"IEEE Trans. Neural Netw."},{"key":"7_CR24","doi-asserted-by":"crossref","unstructured":"Shahroudy, A., Liu, J., Ng, T.T., Wang, G.: NTU RGB+D: a large scale dataset for 3d human activity analysis. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.115"},{"key":"7_CR25","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: NIPS (2014)"},{"key":"7_CR26","doi-asserted-by":"crossref","unstructured":"Song, S., Lan, C., Xing, J., Zeng, W., Liu, J.: An end-to-end spatio-temporal attention model for human action recognition from skeleton data. In: AAAI (2017)","DOI":"10.1609\/aaai.v31i1.11212"},{"key":"7_CR27","doi-asserted-by":"crossref","unstructured":"Vemulapalli, R., Arrate, F., Chellappa, R.: Human action recognition by representing 3d skeletons as points in a lie group. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.82"},{"key":"7_CR28","doi-asserted-by":"crossref","unstructured":"Wang, H., Wang, L.: Modeling temporal dynamics and spatial configurations of actions using two-stream recurrent neural networks. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.387"},{"key":"7_CR29","unstructured":"Wang, J., Liu, Z., Wu, Y., Yuan, J.: Mining actionlet ensemble for action recognition with depth cameras. In: CVPR (2012)"},{"issue":"2","key":"7_CR30","doi-asserted-by":"publisher","first-page":"224","DOI":"10.1016\/j.cviu.2010.10.002","volume":"115","author":"D Weinland","year":"2011","unstructured":"Weinland, D., Ronfard, R., Boyer, E.: A survey of vision-based methods for action representation, segmentation and recognition. Comput. Vis. Image Underst. 115(2), 224\u2013241 (2011)","journal-title":"Comput. Vis. Image Underst."},{"key":"7_CR31","doi-asserted-by":"crossref","unstructured":"Yan, S., Xiong, Y., Lin, D., Tang, X.: Spatial temporal graph convolutional networks for skeleton-based action recognition. In: AAAI (2018)","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"7_CR32","doi-asserted-by":"crossref","unstructured":"Zhang, P., Lan, C., Xing, J., Zeng, W., Xue, J., Zheng, N.: View adaptive recurrent neural networks for high performance human action recognition from skeleton data. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.233"},{"issue":"2","key":"7_CR33","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1109\/MMUL.2012.24","volume":"19","author":"Z Zhang","year":"2012","unstructured":"Zhang, Z.: Microsoft kinect sensor and its effect. IEEE Multimedia 19(2), 4\u201310 (2012)","journal-title":"IEEE Multimedia"},{"key":"7_CR34","doi-asserted-by":"crossref","unstructured":"Zhu, W., et al.: Co-occurrence feature learning for skeleton based action recognition using regularized deep LSTM networks. In: AAAI (2016)","DOI":"10.1609\/aaai.v30i1.10451"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01246-5_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,5]],"date-time":"2022-10-05T00:12:49Z","timestamp":1664928769000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01246-5_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012458","9783030012465"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01246-5_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"6 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}