{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,3]],"date-time":"2025-04-03T00:43:41Z","timestamp":1743641021068},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2018,12,1]],"date-time":"2018-12-01T00:00:00Z","timestamp":1543622400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100004739","name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences","doi-asserted-by":"publisher","award":["Grant No. 2016336"],"award-info":[{"award-number":["Grant No. 2016336"]}],"id":[{"id":"10.13039\/501100004739","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2019,6]]},"DOI":"10.1007\/s10489-018-1347-3","type":"journal-article","created":{"date-parts":[[2018,12,1]],"date-time":"2018-12-01T21:00:57Z","timestamp":1543698057000},"page":"2017-2029","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":25,"title":["Learning multi-temporal-scale deep information for action recognition"],"prefix":"10.1007","volume":"49","author":[{"given":"Guangle","family":"Yao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Lei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiandan","family":"Zhong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ping","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,12,1]]},"reference":[{"issue":"11","key":"1347_CR1","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun Y, Bottou L, Bengio Y, Haffner P (1998) Gradient-based learning applied to document recognition. Proc IEEE 86(11):2278\u20132324","journal-title":"Proc IEEE"},{"key":"1347_CR2","unstructured":"Krizhevsky A, Sutskever I, Hinton G (2012) Imagenet classification with deep convolutional neural networks. In: Proceedings of the annual conference on neural information processing systems, pp 1097\u20131105"},{"key":"1347_CR3","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 580\u2013587","DOI":"10.1109\/CVPR.2014.81"},{"issue":"8","key":"1347_CR4","doi-asserted-by":"publisher","first-page":"1915","DOI":"10.1109\/TPAMI.2012.231","volume":"35","author":"C Farabet","year":"2013","unstructured":"Farabet C, Couprie C, LeCun Y (2013) Learning hierarchical features for scene labeling. IEEE Trans Pattern Anal Mach Intell 35(8):1915\u20131929","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1347_CR5","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T (2015) Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3431\u20133440","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"1347_CR6","doi-asserted-by":"crossref","unstructured":"Karpathy A, Toderici G, Shetty S, Leung T, Sukthankar R, Li F (2014) Large-scale video classification with convolutional neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1725\u20131732","DOI":"10.1109\/CVPR.2014.223"},{"key":"1347_CR7","unstructured":"Simonyan K, Zisserman A (2014) Two-stream convolutional networks for action recognition in videos. In: Proceedings of the advances in neural information processing systems, pp 568\u2013576"},{"key":"1347_CR8","doi-asserted-by":"crossref","unstructured":"Tran D, Bourdev L, Fergus R, Torresani L, Paluri M (2015) Learning spatiotemporal features with 3D convolutional networks. In: Proceedings of the IEEE international conference on computer vision, pp 4489\u20134497","DOI":"10.1109\/ICCV.2015.510"},{"key":"1347_CR9","unstructured":"Tran D, Ray J, Shou Z, Chang SF, Paluri M (2017) ConvNet architecture search for spatiotemporal feature learning. arXiv:\n                    1708.05038"},{"key":"1347_CR10","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"issue":"9","key":"1347_CR11","doi-asserted-by":"publisher","first-page":"1984","DOI":"10.1109\/TIFS.2016.2569061","volume":"11","author":"M Haghighat","year":"2016","unstructured":"Haghighat M, Abdel-Mottaleb M, Alhalabi W (2016) Discriminant correlation analysis: real-time feature level fusion for multimodal biometric recognition. IEEE Trans Inf Foren Sec 11(9):1984\u20131966","journal-title":"IEEE Trans Inf Foren Sec"},{"key":"1347_CR12","unstructured":"Lin Z, Jiang Z, Davis L (2009) Recognizing actions by shape-motion prototype trees. In: Proceedings of the IEEE international conference on computer vision, pp 444\u2013451"},{"key":"1347_CR13","doi-asserted-by":"crossref","unstructured":"Efros A, Berg A, Mori G, Malik J (2003) Recognizing action at a distance. In: Proceedings of the IEEE international conference on computer vision, pp 726\u2013733","DOI":"10.1109\/ICCV.2003.1238420"},{"key":"1347_CR14","doi-asserted-by":"crossref","unstructured":"Wang H, Schmid C (2013) Action recognition with improved trajectories. In: Proceedings of the IEEE international conference on computer vision, pp 3551-3558","DOI":"10.1109\/ICCV.2013.441"},{"key":"1347_CR15","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1016\/j.cviu.2016.03.013","volume":"150","author":"X Peng","year":"2016","unstructured":"Peng X, Wang L, Wang X, Qiao Y (2016) Bag of visual words and fusion methods for action recognition. Comput Vis Image Underst 150:109\u2013125","journal-title":"Comput Vis Image Underst"},{"key":"1347_CR16","doi-asserted-by":"crossref","unstructured":"Dalal N, Triggs B (2005) Histograms of oriented gradients for human detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 886\u2013893","DOI":"10.1109\/CVPR.2005.177"},{"key":"1347_CR17","doi-asserted-by":"crossref","unstructured":"Laptev I, Marszalek M, Schmid C, Rozenfeld B (2008) Learning realistic human actions from movies. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1\u20138","DOI":"10.1109\/CVPR.2008.4587756"},{"key":"1347_CR18","doi-asserted-by":"crossref","unstructured":"Dalal N, Triggs B, Schmid C (2006) Human detection using oriented histograms of flow and appearance. In: Proceedings of the European conference on computer vision, pp 428\u2013441","DOI":"10.1007\/11744047_33"},{"key":"1347_CR19","doi-asserted-by":"crossref","unstructured":"Perronnin F, Dance C (2007) Fisher kernels on visual vocabularies for image categorization. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1-8","DOI":"10.1109\/CVPR.2007.383266"},{"key":"1347_CR20","unstructured":"Yang M, Ji S, Xu W, Wang J (2009) Detecting human actions in surveillance videos. In: Proceedings of the TREC video retrieval evaluation workshop"},{"key":"1347_CR21","unstructured":"Ji S, Xu W, Yang M, Yu K (2010) 3D convolutional neural networks for human action recognition. In: Proceedings of the International conference on machine learning, pp 495\u2013 502"},{"key":"1347_CR22","doi-asserted-by":"crossref","unstructured":"Baccouche M, Mamalet F, Wolf C, Carcia C, Baskurt A (2011) Sequential deep learning for human action recognition. In: Proceedings of the International conference on human behavior unterstanding, pp 29\u201339","DOI":"10.1007\/978-3-642-25446-8_4"},{"issue":"1","key":"1347_CR23","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2013","unstructured":"Ji S, Xu W, Yang M, Yu K (2013) 3D convolutional neural networks for human action recognition. IEEE Trans Pattern Anal Mach Intell 35(1):221\u2013231","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1347_CR24","doi-asserted-by":"crossref","unstructured":"Zhang B, Wang L, Wang Z, Qiao Y, Wang H (2016) Real-time action recognition with enhanced motion vector CNNs. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2718\u20132726","DOI":"10.1109\/CVPR.2016.297"},{"key":"1347_CR25","doi-asserted-by":"crossref","unstructured":"Gao Z, Hua G, Zhang D, Jojic N, Wang L (2017) ER3: A unified framework for event retrieval recognition and recounting. In: Proceedings of the IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2017.227"},{"issue":"6","key":"1347_CR26","doi-asserted-by":"publisher","first-page":"1510","DOI":"10.1109\/TPAMI.2017.2712608","volume":"40","author":"G Varol","year":"2018","unstructured":"Varol G, Laptev I, Schmid C (2018) Long-term temporal convolutions for action recognition. IEEE Trans Pattern Anal Mach Intell 40(6):1510\u20131517","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1347_CR27","unstructured":"Wang L, Xiong Y, Wang Z, Qiao Y (2015) Towards good practices for very deep two-stream convnets. arXiv:\n                    1507.02159.2015"},{"issue":"3","key":"1347_CR28","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M, Berg A, Li F (2015) Imagenet large scale visual recognition challenge. Int J Comput Vis 115(3):211\u2013252","journal-title":"Int J Comput Vis"},{"key":"1347_CR29","doi-asserted-by":"crossref","unstructured":"Zeiler M, Fergus R (2014) Visualizing and understanding convolutional networks. In: Proceedings of the European conference on computer vision, pp 818-833","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"1347_CR30","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. In: Proceedings of the international conference on learning representation"},{"key":"1347_CR31","doi-asserted-by":"crossref","unstructured":"Chatfield K, Simonyan K, Vedaldi A, Zisserman A (2014) Return of the devil in the details: delving deep into convolutional nets. In: Proceedings of the British machine vision conference","DOI":"10.5244\/C.28.6"},{"key":"1347_CR32","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1-9","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"1347_CR33","unstructured":"Soomro K, Zamir A, Shah M (2012) UCF101: A dataset of 101 human actions classes from videos in the wild. Technical Report, University of Central Florida"},{"key":"1347_CR34","doi-asserted-by":"crossref","unstructured":"Kuehne H, Jhuang H, Garrote E, Poggio T, Serre T (2011) HMDB: A large video database for human motion recognition. In: Proceedings of the IEEE international conference on computer vision","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"1347_CR35","first-page":"2579","volume":"9","author":"LVD Maaten","year":"2008","unstructured":"Maaten LVD, Hinton G (2008) Visualizing data using t-sne. J Mach Learn Res 9:2579\u20132605","journal-title":"J Mach Learn Res"},{"key":"1347_CR36","unstructured":"Zach C, Pock T, Bischof H (2007) A duality based approach for realtime tv-L1 optical flow. In: Proceedings of DAGM symposium on pattern recognition, pp 214-223"},{"issue":"3","key":"1347_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1961189.1961199","volume":"2","author":"CC Chang","year":"2011","unstructured":"Chang CC, Lin CJ (2011) Libsvm: a library for support vector machines. ACM Trans Intel Syst Tec 2 (3):1\u201327","journal-title":"ACM Trans Intel Syst Tec"},{"key":"1347_CR38","unstructured":"Ng J, Hausknecht M, Vijayanarasimhan S, Vinyals O, Monga R, Toderici G (2015) Beyond short snippets: deep networks for video classification. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4694-4702"},{"issue":"4","key":"1347_CR39","doi-asserted-by":"publisher","first-page":"677","DOI":"10.1109\/TPAMI.2016.2599174","volume":"39","author":"J Donahue","year":"2017","unstructured":"Donahue J, Hendricks L, Rohrbach M, Venugopalan S, Guadarrama S, Saenko K, Darrell T (2017) Long-term recurrent convolutional networks for visual recognition and description. IEEE Trans Pattern Anal Mach Intell 39(4):677\u2013691","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1347_CR40","doi-asserted-by":"crossref","unstructured":"Wang L, Qiao Y, Tang X (2015) Action recognition with trajectory-pooled deep-convolutional descriptors. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4305-4314","DOI":"10.1109\/CVPR.2015.7299059"},{"key":"1347_CR41","doi-asserted-by":"crossref","unstructured":"Cherian A, Fernando B, Harandi M, Gould S (2017) Generalized rank pooling for activity recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1581-1590","DOI":"10.1109\/CVPR.2017.172"},{"key":"1347_CR42","doi-asserted-by":"crossref","unstructured":"Kar A, Rai N, Sikka K, Sharma G (2017) AdaScan: adaptive scan pooling in deep convolutional neural networks for human action recognition in videos. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5699-5708","DOI":"10.1109\/CVPR.2017.604"},{"key":"1347_CR43","doi-asserted-by":"crossref","unstructured":"Park E, Han X, Berg T, Berg A (2016) Combining multiple sources of knowledge in deep CNNs for action recognition. In: Proceedings of the IEEE winter conference on applications of computer vision, pp 177-186","DOI":"10.1109\/WACV.2016.7477589"},{"key":"1347_CR44","unstructured":"Wu Z, Wang X, Jiang Y, Ye H, Xue X (2018) Modeling spatial-temporal clues in a hybrid deep learning framework for video classification. In: Proceedings of the ACM multimedia conference"},{"key":"1347_CR45","doi-asserted-by":"crossref","unstructured":"Li Y, Li W, Mahadevan V, Vasconcelos N (2016) VLAD3: encoding dynamics of deep features for action recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1951-1960","DOI":"10.1109\/CVPR.2016.215"},{"key":"1347_CR46","unstructured":"Neverova N, Wolf C, Taylor W, Nebout F (2014) Multi-scale deep learning for gesture detection and localization. In: Workshop of the European conference on computer vision, pp 474-490"},{"key":"1347_CR47","doi-asserted-by":"crossref","unstructured":"Jung M, Hwang J, Tani J (2014) Multiple spatio-temporal scales neural network for contextual visual recognition of human actions. In: Proceedings of the IEEE conferences on development and learning and epigenetic robotics, pp 235-241","DOI":"10.1109\/DEVLRN.2014.6982987"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-018-1347-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10489-018-1347-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-018-1347-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,12,2]],"date-time":"2019-12-02T12:43:35Z","timestamp":1575290615000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10489-018-1347-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12,1]]},"references-count":47,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2019,6]]}},"alternative-id":["1347"],"URL":"https:\/\/doi.org\/10.1007\/s10489-018-1347-3","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,12,1]]},"assertion":[{"value":"1 December 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}