{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,16]],"date-time":"2024-10-16T04:28:08Z","timestamp":1729052888925},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1007\/s00138-023-01377-3","type":"journal-article","created":{"date-parts":[[2023,3,7]],"date-time":"2023-03-07T12:02:57Z","timestamp":1678190577000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Actions as points: a simple and efficient detector for skeleton-based temporal action detection"],"prefix":"10.1007","volume":"34","author":[{"given":"Jianhua","family":"Yang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ke","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruifeng","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,3,7]]},"reference":[{"key":"1377_CR1","doi-asserted-by":"crossref","unstructured":"Chao,Y.W., Vijayanarasimhan, S., Seybold, B., Ross, D.A., Deng, J., Sukthankar, R.: Rethinking the faster r-cnn architecture for temporal action localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1130\u20131139 (2018)","DOI":"10.1109\/CVPR.2018.00124"},{"key":"1377_CR2","doi-asserted-by":"crossref","unstructured":"Cheng, K., Zhang, Y., He, X., Chen, W., Cheng, J., Lu, H.: Skeleton-based action recognition with shift graph convolutional network. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 183\u2013192 (2020)","DOI":"10.1109\/CVPR42600.2020.00026"},{"issue":"5","key":"1377_CR3","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1049\/iet-cvi.2019.0751","volume":"14","author":"R Cui","year":"2020","unstructured":"Cui, R., Zhu, A., Jingran, W., Hua, G.: Skeleton-based attention-aware spatial-temporal model for action detection and recognition. IET Comput. Vis. 14(5), 177\u2013184 (2020)","journal-title":"IET Comput. Vis."},{"key":"1377_CR4","unstructured":"Du, Y., Wang, W., Wang, L.: Hierarchical recurrent neural network for skeleton based action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1110\u20131118 (2015)"},{"key":"1377_CR5","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"1377_CR6","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 580\u2013587 (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"1377_CR7","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1377_CR8","doi-asserted-by":"crossref","unstructured":"Hou, R., Chen, C., Shah, M.: An end-to-end 3d convolutional neural network for action detection and segmentation in videos. arXiv preprint arXiv:1712.01111 (2017)","DOI":"10.1109\/ICCV.2017.620"},{"key":"1377_CR9","doi-asserted-by":"publisher","first-page":"201","DOI":"10.3758\/BF03212378","volume":"14","author":"G Johansson","year":"1973","unstructured":"Johansson, G.: Visual perception of biological motion and a model for its analysis. Percept. Psychophys. 14, 201\u2013211 (1973)","journal-title":"Percept. Psychophys."},{"key":"1377_CR10","doi-asserted-by":"crossref","unstructured":"Law, H., Deng, J.: Cornernet: Detecting objects as paired keypoints. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 734\u2013750 (2018)","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"1377_CR11","unstructured":"Li, B., Chen, H., Chen, Y., Dai, Y., He, M.: Skeleton boxes: Solving skeleton based action detection with a single deep convolutional neural network. In: 2017 IEEE International Conference on Multimedia & Expo Workshops (ICMEW). IEEE, pp. 613\u2013616 (2017)"},{"key":"1377_CR12","doi-asserted-by":"crossref","unstructured":"Li, C., Zhong, Q., Xie, D., Pu, S.: Co-occurrence feature learning from skeleton data for action recognition and detection with hierarchical aggregation. arXiv preprint arXiv:1804.06055 (2018)","DOI":"10.24963\/ijcai.2018\/109"},{"key":"1377_CR13","doi-asserted-by":"crossref","unstructured":"Li, Y., Lan, C., Xing, J., Zeng, W., Yuan, C., Liu, J.: Online human action detection using joint classification-regression recurrent neural networks. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part VII 14, pp. 203\u2013220. Springer (2016)","DOI":"10.1007\/978-3-319-46478-7_13"},{"key":"1377_CR14","doi-asserted-by":"crossref","unstructured":"Li, Y., Wang, Z., Wang, L., Wu, G.: Actions as moving points. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XVI 16, pp. 68\u201384. Springer (2020)","DOI":"10.1007\/978-3-030-58517-4_5"},{"key":"1377_CR15","doi-asserted-by":"crossref","unstructured":"Lin, T., Zhao, X., Shou, Z.: Single shot temporal action detection. In: Proceedings of the 25th ACM International Conference on Multimedia, pp. 988\u2013996 (2017)","DOI":"10.1145\/3123266.3123343"},{"key":"1377_CR16","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"1377_CR17","doi-asserted-by":"crossref","unstructured":"Liu, C., Hu, Y., Li, Y., Song, S., Liu, J.: Pku-mmd: A large scale benchmark for continuous multi-modal human action understanding. arXiv preprint arXiv:1703.07475 (2017)","DOI":"10.1145\/3132734.3132739"},{"key":"1377_CR18","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.Y., Berg, A.C.: Ssd: Single shot multibox detector. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part I 14, pp. 21\u201337. Springer (2016)","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1377_CR19","doi-asserted-by":"crossref","unstructured":"Redmon, J, Divvala, S., Girshick, R., Farhadi, A.: You only look once: Unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"1377_CR20","doi-asserted-by":"crossref","unstructured":"Redmon, J., Farhadi, A.: Yolo9000: better, faster, stronger. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7263\u20137271 (2017)","DOI":"10.1109\/CVPR.2017.690"},{"key":"1377_CR21","unstructured":"Redmon, J., Farhadi, A.: Yolov3: An incremental improvement. arXiv preprint arXiv:1804.02767 (2018)"},{"key":"1377_CR22","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. Adv. Neural Inf. Processing Syst. 28 (2015)"},{"key":"1377_CR23","doi-asserted-by":"crossref","unstructured":"Rostami, Z., Afrasiabi, M., Khotanlou, H.: Skeleton-based action recognition using spatio-temporal features with convolutional neural networks. In: 2017 IEEE 4th International Conference on Knowledge-Based Engineering and Innovation (KBEI). IEEE, pp. 0583\u20130587 (2017)","DOI":"10.1109\/KBEI.2017.8324867"},{"key":"1377_CR24","doi-asserted-by":"crossref","unstructured":"Shahroudy, A., Liu, J., Ng, T.T., Wang, G.: Ntu rgb+ d: A large scale dataset for 3d human activity analysis. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pages 1010\u20131019 (2016)","DOI":"10.1109\/CVPR.2016.115"},{"key":"1377_CR25","doi-asserted-by":"crossref","unstructured":"Shou, Z., Chan, J., Zareian, A., Miyazawa, K., Chang, S.F.: Cdc: Convolutional-de-convolutional networks for precise temporal action localization in untrimmed videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5734\u20135743 (2017)","DOI":"10.1109\/CVPR.2017.155"},{"key":"1377_CR26","doi-asserted-by":"crossref","unstructured":"Shou, Z., Wang, D., Chang, S.F.: Temporal action localization in untrimmed videos via multi-stage cnns. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1049\u20131058 (2016)","DOI":"10.1109\/CVPR.2016.119"},{"key":"1377_CR27","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"1377_CR28","doi-asserted-by":"crossref","unstructured":"Song, S., Lan, C., Xing, J., Zeng, W., Liu, J.: An end-to-end spatio-temporal attention model for human action recognition from skeleton data. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 31 (2017)","DOI":"10.1609\/aaai.v31i1.11212"},{"issue":"7","key":"1377_CR29","doi-asserted-by":"publisher","first-page":"3459","DOI":"10.1109\/TIP.2018.2818328","volume":"27","author":"S Song","year":"2018","unstructured":"Song, S., Lan, C., Xing, J., Zeng, W., Liu, J.: Spatio-temporal attention-based LSTM networks for 3d action recognition and detection. IEEE Trans. Image Process. 27(7), 3459\u20133471 (2018)","journal-title":"IEEE Trans. Image Process."},{"key":"1377_CR30","doi-asserted-by":"crossref","unstructured":"Tian, Z., Shen, C., Chen, H., He, T.: Fcos: Fully convolutional one-stage object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9627\u20139636 (2019)","DOI":"10.1109\/ICCV.2019.00972"},{"key":"1377_CR31","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3d convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4489\u20134497 (2015)","DOI":"10.1109\/ICCV.2015.510"},{"key":"1377_CR32","doi-asserted-by":"crossref","unstructured":"Wang, H., Wang, L.: Learning robust representations using recurrent neural networks for skeleton based action classification and detection. In: 2017 IEEE International Conference on Multimedia & Expo Workshops (ICMEW). IEEE, pp. 591\u2013596 (2017)","DOI":"10.1109\/ICMEW.2017.8026278"},{"issue":"9","key":"1377_CR33","doi-asserted-by":"publisher","first-page":"4382","DOI":"10.1109\/TIP.2018.2837386","volume":"27","author":"H Wang","year":"2018","unstructured":"Wang, H., Wang, L.: Beyond joints: Learning representations from primitive geometries for skeleton-based action recognition and detection. IEEE Trans. Image Process. 27(9), 4382\u20134394 (2018)","journal-title":"IEEE Trans. Image Process."},{"key":"1377_CR34","doi-asserted-by":"crossref","unstructured":"Wu, J., Li, Y., Wang, L., Wang, K., Li, R., Zhou, T.: Skeleton based temporal action detection with yolo. In: Journal of Physics: Conference Series, vol. 1237, pp. 022087 (2019)","DOI":"10.1088\/1742-6596\/1237\/2\/022087"},{"key":"1377_CR35","doi-asserted-by":"crossref","unstructured":"Yan, S., Xiong, Y., Lin, D.: Spatial temporal graph convolutional networks for skeleton-based action recognition. In: Thirty-Second AAAI Conference on Artificial Intelligence, (2018)","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"1377_CR36","doi-asserted-by":"crossref","unstructured":"Yu, F., Wang, D., Shelhamer, E., Darrell, T.: Deep layer aggregation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2403\u20132412 (2018)","DOI":"10.1109\/CVPR.2018.00255"},{"key":"1377_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, X., Xu, C., Tao, D.: Context aware graph convolution for skeleton-based action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14333\u201314342 (2020)","DOI":"10.1109\/CVPR42600.2020.01434"},{"key":"1377_CR38","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Xiong, Y., Wang, L., Wu, Z., Tang, X., Lin, D.: Temporal action detection with structured segment networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2914\u20132923 (2017)","DOI":"10.1109\/ICCV.2017.317"},{"key":"1377_CR39","unstructured":"Zhou, X., Wang, D., Kr\u00e4henb\u00fchl, P.: Objects as points. arXiv preprint arXiv:1904.07850 (2019)"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-023-01377-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-023-01377-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-023-01377-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,15]],"date-time":"2024-10-15T22:41:51Z","timestamp":1729032111000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-023-01377-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3]]},"references-count":39,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,3]]}},"alternative-id":["1377"],"URL":"https:\/\/doi.org\/10.1007\/s00138-023-01377-3","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"type":"print","value":"0932-8092"},{"type":"electronic","value":"1432-1769"}],"subject":[],"published":{"date-parts":[[2023,3]]},"assertion":[{"value":"13 April 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 December 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 January 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 March 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"35"}}