{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T06:34:30Z","timestamp":1778049270681,"version":"3.51.4"},"publisher-location":"Singapore","reference-count":69,"publisher":"Springer Singapore","isbn-type":[{"value":"9789811501203","type":"print"},{"value":"9789811501210","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-981-15-0121-0_1","type":"book-chapter","created":{"date-parts":[[2019,9,12]],"date-time":"2019-09-12T23:04:06Z","timestamp":1568329446000},"page":"3-21","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Survey on Deep Learning for Human Action Recognition"],"prefix":"10.1007","author":[{"given":"Zirui","family":"Qiu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jun","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingyue","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mantao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dejun","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,9,13]]},"reference":[{"key":"1_CR1","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1007\/978-3-642-25446-8_4","volume-title":"HBU 2011","author":"M Baccouche","year":"2011","unstructured":"Baccouche, M., Mamalet, F., Wolf, C., Garcia, C., Baskurt, A.: Sequential deep learning for human action recognition. In: Salah, A.A., Lepri, B. (eds.) HBU 2011. LNCS, vol. 7065, pp. 29\u201339. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-25446-8_4"},{"key":"1_CR2","doi-asserted-by":"crossref","unstructured":"Caba Heilbron, F., Escorcia, V., Ghanem, B., Carlos Niebles, J.: ActivityNet: a large-scale video benchmark for human activity understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 961\u2013970 (2015)","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"1_CR4","doi-asserted-by":"crossref","unstructured":"Chao, Y.W., Vijayanarasimhan, S., Seybold, B., Ross, D.A., Deng, J., Sukthankar, R.: Rethinking the faster R-CNN architecture for temporal action localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1130\u20131139 (2018)","DOI":"10.1109\/CVPR.2018.00124"},{"key":"1_CR5","doi-asserted-by":"crossref","unstructured":"Ch\u00e9ron, G., Laptev, I., Schmid, C.: P-CNN: pose-based cnn features for action recognition. In: Proceedings of the IEEE international conference on computer vision. pp. 3218\u20133226 (2015)","DOI":"10.1109\/ICCV.2015.368"},{"key":"1_CR6","doi-asserted-by":"crossref","unstructured":"Dai, X., Singh, B., Zhang, G., Davis, L.S., Qiu Chen, Y.: Temporal context network for activity localization in videos. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5793\u20135802 (2017)","DOI":"10.1109\/ICCV.2017.610"},{"key":"1_CR7","doi-asserted-by":"crossref","unstructured":"Donahue, J., et al.: Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2625\u20132634 (2015)","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"1_CR8","doi-asserted-by":"crossref","unstructured":"Du, Y., Fu, Y., Wang, L.: Skeleton based action recognition with convolutional neural network. In: 2015 3rd IAPR Asian Conference on Pattern Recognition (ACPR), pp. 579\u2013583. IEEE (2015)","DOI":"10.1109\/ACPR.2015.7486569"},{"key":"1_CR9","unstructured":"Du, Y., Wang, W., Wang, L.: Hierarchical recurrent neural network for skeleton based action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1110\u20131118 (2015)"},{"key":"1_CR10","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"768","DOI":"10.1007\/978-3-319-46487-9_47","volume-title":"ECCV 2016","author":"V Escorcia","year":"2016","unstructured":"Escorcia, V., Caba, H.F., Niebles, J.C., Ghanem, B.: DAPs: deep action proposals for action understanding. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9907, pp. 768\u2013784. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46487-9_47"},{"key":"1_CR11","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Wildes, R.P.: Spatiotemporal multiplier networks for video action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4768\u20134777 (2017)","DOI":"10.1109\/CVPR.2017.787"},{"key":"1_CR12","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Zisserman, A.: Convolutional two-stream network fusion for video action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1933\u20131941 (2016)","DOI":"10.1109\/CVPR.2016.213"},{"key":"1_CR13","unstructured":"Gkioxari, G., Hariharan, B., Girshick, R., Malik, J.: R-CNNs for pose estimation and action detection. arXiv preprint arXiv:1406.5212 (2014)"},{"key":"1_CR14","doi-asserted-by":"crossref","unstructured":"Gkioxari, G., Malik, J.: Finding action tubes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 759\u2013768 (2015)","DOI":"10.1109\/CVPR.2015.7298676"},{"issue":"1","key":"1_CR15","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1016\/0166-2236(92)90344-8","volume":"15","author":"MA Goodale","year":"1992","unstructured":"Goodale, M.A., Milner, A.D.: Separate visual pathways for perception and action. Trends Neurosci. 15(1), 20\u201325 (1992)","journal-title":"Trends Neurosci."},{"key":"1_CR16","unstructured":"Gorban, A., et al.: Thumos challenge: action recognition with a large number of classes (2015)"},{"key":"1_CR17","doi-asserted-by":"crossref","unstructured":"Graves, A., Mohamed, A.R., Hinton, G.: Speech recognition with deep recurrent neural networks. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 6645\u20136649. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6638947"},{"issue":"8","key":"1_CR18","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"1_CR19","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. arXiv preprint arXiv:1502.03167 (2015)"},{"key":"1_CR20","doi-asserted-by":"crossref","unstructured":"Jhuang, H., Gall, J., Zuffi, S., Schmid, C., Black, M.J.: Towards understanding action recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3192\u20133199 (2013)","DOI":"10.1109\/ICCV.2013.396"},{"issue":"1","key":"1_CR21","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2013","unstructured":"Ji, S., Xu, W., Yang, M., Yu, K.: 3D convolutional neural networks for human action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 35(1), 221\u2013231 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1_CR22","doi-asserted-by":"crossref","unstructured":"Kalogeiton, V., Weinzaepfel, P., Ferrari, V., Schmid, C.: Action tubelet detector for spatio-temporal action localization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4405\u20134413 (2017)","DOI":"10.1109\/ICCV.2017.472"},{"key":"1_CR23","doi-asserted-by":"crossref","unstructured":"Karpathy, A., Toderici, G., Shetty, S., Leung, T., Sukthankar, R., Fei-Fei, L.: Large-scale video classification with convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1725\u20131732 (2014)","DOI":"10.1109\/CVPR.2014.223"},{"key":"1_CR24","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems, pp. 1097\u20131105 (2012)"},{"key":"1_CR25","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"471","DOI":"10.1007\/978-3-319-46493-0_29","volume-title":"ECCV 2016","author":"T Kroeger","year":"2016","unstructured":"Kroeger, T., Timofte, R., Dai, D., Van Gool, L.: Fast optical flow using dense inverse search. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 471\u2013488. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_29"},{"key":"1_CR26","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Jhuang, H., Garrote, E., Poggio, T., Serre, T.: HMDB: a large video database for human motion recognition. In: 2011 International Conference on Computer Vision, pp. 2556\u20132563. IEEE (2011)","DOI":"10.1109\/ICCV.2011.6126543"},{"issue":"7553","key":"1_CR27","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436 (2015)","journal-title":"Nature"},{"key":"1_CR28","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"833","DOI":"10.1007\/978-3-319-46466-4_50","volume-title":"ECCV 2016","author":"G Lev","year":"2016","unstructured":"Lev, G., Sadeh, G., Klein, B., Wolf, L.: RNN fisher vectors for action recognition and image annotation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9910, pp. 833\u2013850. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46466-4_50"},{"key":"1_CR29","unstructured":"Li, B., Dai, Y., Cheng, X., Chen, H., Lin, Y., He, M.: Skeleton based action recognition using translation-scale invariant image mapping and multi-scale deep CNN. In: 2017 IEEE International Conference on Multimedia & Expo Workshops (ICMEW), pp. 601\u2013604. IEEE (2017)"},{"key":"1_CR30","unstructured":"Li, C., Chen, C., Zhang, B., Ye, Q., Han, J., Ji, R.: Deep spatio-temporal manifold network for action recognition. arXiv preprint arXiv:1705.03148 (2017)"},{"key":"1_CR31","doi-asserted-by":"crossref","unstructured":"Li, Q., Qiu, Z., Yao, T., Mei, T., Rui, Y., Luo, J.: Action recognition by learning deep multi-granular spatio-temporal video representation. In: Proceedings of the 2016 ACM on International Conference on Multimedia Retrieval, pp. 159\u2013166. ACM (2016)","DOI":"10.1145\/2911996.2912001"},{"key":"1_CR32","doi-asserted-by":"crossref","unstructured":"Lin, T., Zhao, X., Shou, Z.: Single shot temporal action detection. In: Proceedings of the 25th ACM International Conference on Multimedia, pp. 988\u2013996. ACM (2017)","DOI":"10.1145\/3123266.3123343"},{"key":"1_CR33","doi-asserted-by":"publisher","first-page":"816","DOI":"10.1007\/978-3-319-46487-9_50","volume-title":"ECCV 2016, LNCS","author":"J Liu","year":"2016","unstructured":"Liu, J., Shahroudy, A., Xu, D., Wang, G.: Spatio-temporal LSTM with trust gates for 3D human action recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016, LNCS, vol. 9907, pp. 816\u2013833. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46487-9_50"},{"key":"1_CR34","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-319-46448-0_2","volume-title":"ECCV 2016, LNCS","author":"W Liu","year":"2016","unstructured":"Liu, W., et al.: SSD: single shot multibox detector. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016, LNCS, vol. 9905, pp. 21\u201337. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_2"},{"key":"1_CR35","unstructured":"Ngiam, J., Chen, Z., Koh, P.W., Ng, A.Y.: Learning deep energy models. In: Proceedings of the 28th International Conference on Machine Learning, ICML 2011, pp. 1105\u20131112 (2011)"},{"key":"1_CR36","doi-asserted-by":"publisher","first-page":"744","DOI":"10.1007\/978-3-319-46493-0_45","volume-title":"ECCV 2016, LNCS","author":"X Peng","year":"2016","unstructured":"Peng, X., Schmid, C.: Multi-region two-stream R-CNN for action detection. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016, LNCS, vol. 9908, pp. 744\u2013759. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_45"},{"key":"1_CR37","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, pp. 91\u201399 (2015)"},{"key":"1_CR38","doi-asserted-by":"crossref","unstructured":"Saha, S., Singh, G., Sapienza, M., Torr, P.H., Cuzzolin, F.: Deep learning for detecting multiple space-time action tubes in videos. arXiv preprint arXiv:1608.01529 (2016)","DOI":"10.5244\/C.30.58"},{"key":"1_CR39","unstructured":"Sharma, S., Kiros, R., Salakhutdinov, R.: Action recognition using visual attention. arXiv preprint arXiv:1511.04119 (2015)"},{"key":"1_CR40","doi-asserted-by":"crossref","unstructured":"Shi, Y., Tian, Y., Wang, Y., Zeng, W., Huang, T.: Learning long-term dependencies for action recognition with a biologically-inspired deep network. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 716\u2013725 (2017)","DOI":"10.1109\/ICCV.2017.84"},{"key":"1_CR41","doi-asserted-by":"crossref","unstructured":"Shou, Z., Chan, J., Zareian, A., Miyazawa, K., Chang, S.F.: CDC: convolutional-de-convolutional networks for precise temporal action localization in untrimmed videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5734\u20135743 (2017)","DOI":"10.1109\/CVPR.2017.155"},{"key":"1_CR42","doi-asserted-by":"crossref","unstructured":"Shou, Z., Wang, D., Chang, S.F.: Temporal action localization in untrimmed videos via multi-stage CNNs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1049\u20131058 (2016)","DOI":"10.1109\/CVPR.2016.119"},{"key":"1_CR43","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: Advances in Neural Information Processing Systems, pp. 568\u2013576 (2014)"},{"key":"1_CR44","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for largescale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"1_CR45","doi-asserted-by":"crossref","unstructured":"Singh, B., Marks, T.K., Jones, M., Tuzel, O., Shao, M.: A multi-stream bi-directional recurrent neural network for fine-grained action detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1961\u20131970 (2016)","DOI":"10.1109\/CVPR.2016.216"},{"key":"1_CR46","doi-asserted-by":"crossref","unstructured":"Singh, G., Saha, S., Sapienza, M., Torr, P.H., Cuzzolin, F.: Online real-time multiple spatiotemporal action localisation and prediction. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3637\u20133646 (2017)","DOI":"10.1109\/ICCV.2017.393"},{"key":"1_CR47","unstructured":"Soomro, K., Zamir, A.R., Shah, M.: UCF101: a dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)"},{"key":"1_CR48","unstructured":"Srivastava, N., Mansimov, E., Salakhudinov, R.: Unsupervised learning of video representations using LSTMs. In: International Conference on Machine Learning, pp. 843\u2013852 (2015)"},{"key":"1_CR49","doi-asserted-by":"crossref","unstructured":"Sun, C., Shetty, S., Sukthankar, R., Nevatia, R.: Temporal localization of fine-grained actions in videos by domain transfer from web images. In: Proceedings of the 23rd ACM International Conference on Multimedia, pp. 371\u2013380. ACM (2015)","DOI":"10.1145\/2733373.2806226"},{"key":"1_CR50","doi-asserted-by":"crossref","unstructured":"Sun, L., Jia, K., Yeung, D.Y., Shi, B.E.: Human action recognition using factorized spatio-temporal convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4597\u20134605 (2015)","DOI":"10.1109\/ICCV.2015.522"},{"key":"1_CR51","doi-asserted-by":"crossref","unstructured":"Tang, Y., Tian, Y., Lu, J., Li, P., Zhou, J.: Deep progressive reinforcement learning for skeleton-based action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5323\u20135332 (2018)","DOI":"10.1109\/CVPR.2018.00558"},{"key":"1_CR52","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3D convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4489\u20134497 (2015)","DOI":"10.1109\/ICCV.2015.510"},{"key":"1_CR53","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1016\/j.patcog.2018.01.020","volume":"79","author":"Z Tu","year":"2018","unstructured":"Tu, Z., Xie, W., Qin, Q., Poppe, R., Veltkamp, R.C., Li, B., Yuan, J.: Multistream CNN: Learning representations based on human-related regions for action recognition. Pattern Recogn. 79, 32\u201343 (2018)","journal-title":"Pattern Recogn."},{"issue":"2","key":"1_CR54","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"JR Uijlings","year":"2013","unstructured":"Uijlings, J.R., Van De Sande, K.E., Gevers, T., Smeulders, A.W.: Selective search for object recognition. Int. J. Comput. Vis. 104(2), 154\u2013171 (2013)","journal-title":"Int. J. Comput. Vis."},{"key":"1_CR55","doi-asserted-by":"crossref","unstructured":"Van Gemert, J.C., Jain, M., Gati, E., Snoek, C.G., et al.: APT: action localization proposals from dense trajectories. In: BMVC, vol. 2, p. 4 (2015)","DOI":"10.5244\/C.29.177"},{"issue":"1","key":"1_CR56","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1007\/s11263-012-0594-8","volume":"103","author":"H Wang","year":"2013","unstructured":"Wang, H., Kl\u00e4ser, A., Schmid, C., Liu, C.L.: Dense trajectories and motion boundary descriptors for action recognition. Int. J. Comput. Vis. 103(1), 60\u201379 (2013)","journal-title":"Int. J. Comput. Vis."},{"key":"1_CR57","doi-asserted-by":"crossref","unstructured":"Wang, H., Schmid, C.: Action recognition with improved trajectories. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3551\u20133558 (2013)","DOI":"10.1109\/ICCV.2013.441"},{"key":"1_CR58","doi-asserted-by":"crossref","unstructured":"Wang, L., Qiao, Y., Tang, X.: Action recognition with trajectory-pooled deep-convolutional descriptors. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4305\u20134314 (2015)","DOI":"10.1109\/CVPR.2015.7299059"},{"key":"1_CR59","unstructured":"Wang, L., Xiong, Y., Wang, Z., Qiao, Y.: Towards good practices for very deep two-stream convnets. arXiv preprint arXiv:1507.02159 (2015)"},{"key":"1_CR60","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/978-3-319-46484-8_2","volume-title":"ECCV 2016","author":"L Wang","year":"2016","unstructured":"Wang, L., et al.: Temporal segment networks: towards good practices for deep action recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 20\u201336. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_2"},{"key":"1_CR61","doi-asserted-by":"crossref","unstructured":"Weinzaepfel, P., Harchaoui, Z., Schmid, C.: Learning to track for spatiotemporal action localization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3164\u20133172 (2015)","DOI":"10.1109\/ICCV.2015.362"},{"key":"1_CR62","doi-asserted-by":"crossref","unstructured":"Yuan, J., Ni, B., Yang, X., Kassim, A.A.: Temporal action localization with pyramid of score distribution features. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3093\u20133102 (2016)","DOI":"10.1109\/CVPR.2016.337"},{"key":"1_CR63","doi-asserted-by":"crossref","unstructured":"Yuan, Z., Stroud, J.C., Lu, T., Deng, J.: Temporal action localization by structured maximal sums. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3684\u20133692 (2017)","DOI":"10.1109\/CVPR.2017.342"},{"issue":"3","key":"1_CR64","doi-asserted-by":"publisher","first-page":"261","DOI":"10.3233\/ICA-170544","volume":"24","author":"D Zhang","year":"2017","unstructured":"Zhang, D., He, F., Han, S., Zou, L., Wu, Y., Chen, Y.: An efficient approach to directly compute the exact Hausdorff distance for 3D point sets. Integr. Comput-Aided Eng. 24(3), 261\u2013277 (2017)","journal-title":"Integr. Comput-Aided Eng."},{"key":"1_CR65","doi-asserted-by":"publisher","first-page":"73750","DOI":"10.1109\/ACCESS.2018.2882878","volume":"6","author":"D Zhang","year":"2018","unstructured":"Zhang, D., Tian, L., Hong, M., Han, F., Ren, Y., Chen, Y.: Combining convolution neural network and bidirectional gated recurrent unit for sentence semantic classification. IEEE Access 6, 73750\u201373759 (2018)","journal-title":"IEEE Access"},{"key":"1_CR66","unstructured":"Zhang, D., et al.: Part-based visual tracking with spatially regularized correlation filters. Vis. Comput. 1\u201319 (2019)"},{"key":"1_CR67","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Xiong, Y., Wang, L., Wu, Z., Tang, X., Lin, D.: Temporal action detection with structured segment networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2914\u20132923 (2017)","DOI":"10.1109\/ICCV.2017.317"},{"key":"1_CR68","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1007\/978-3-319-10602-1_26","volume-title":"Computer Vision \u2013 ECCV 2014","author":"CL Zitnick","year":"2014","unstructured":"Zitnick, C.L., Doll\u00e1r, P.: Edge boxes: locating object proposals from edges. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 391\u2013405. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_26"},{"key":"1_CR69","doi-asserted-by":"crossref","unstructured":"Zolfaghari, M., Oliveira, G.L., Sedaghat, N., Brox, T.: Chained multi-stream networks exploiting pose, motion, and appearance for action classification and detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2904\u20132913 (2017)","DOI":"10.1109\/ICCV.2017.316"}],"container-title":["Communications in Computer and Information Science","Data Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-15-0121-0_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T14:49:50Z","timestamp":1709822990000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-15-0121-0_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9789811501203","9789811501210"],"references-count":69,"URL":"https:\/\/doi.org\/10.1007\/978-981-15-0121-0_1","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"13 September 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPCSEE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference of Pioneering Computer Scientists, Engineers and Educators","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guilin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 September 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpcsee2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2019.icpcsee.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}