{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T21:16:12Z","timestamp":1773090972771,"version":"3.50.1"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2017,8,24]],"date-time":"2017-08-24T00:00:00Z","timestamp":1503532800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2018,2]]},"DOI":"10.1007\/s11042-017-5116-9","type":"journal-article","created":{"date-parts":[[2017,8,24]],"date-time":"2017-08-24T04:19:09Z","timestamp":1503548349000},"page":"3303-3316","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["Action detection based on tracklets with the two-stream CNN"],"prefix":"10.1007","volume":"77","author":[{"given":"Minwen","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Chenqiang","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Qiang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Lan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Jiayao","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,8,24]]},"reference":[{"issue":"99","key":"5116_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TNNLS.2016.2582746","volume":"PP","author":"X Chang","year":"2016","unstructured":"Chang X, Yang Y (2016) Semisupervised feature analysis by mining correlations among multiple tasks. IEEE Trans Neural Netw Learn Syst PP(99):1\u201312. https:\/\/doi.org\/10.1109\/TNNLS.2016.2582746","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"5116_CR2","unstructured":"Chang X, Yang Y, Hauptmann AG, Xing E, Yu Y (2015) Semantic concept discovery for large-scale zero-shot event detection. In: IJCAI, pp 2234\u20132240. AAAI Press"},{"key":"5116_CR3","unstructured":"Chang X, Yang Y, Xing E, Yu Y (2015) Complex event detection using semantic saliency and nearly-isotonic svm. In: Proceedings of the 32nd international conference on machine learning, pp 1348\u20131357. PMLR"},{"issue":"7","key":"5116_CR4","doi-asserted-by":"publisher","first-page":"1502","DOI":"10.1109\/TNNLS.2015.2441735","volume":"27","author":"X Chang","year":"2016","unstructured":"Chang X, Nie F, Wang S, Yang Y, Zhou X, Zhang C (2016) Compound rank- k projections for bilinear analysis. IEEE Trans Neural Netw Learn Syst 27(7):1502\u20131513","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"8","key":"5116_CR5","doi-asserted-by":"publisher","first-page":"3911","DOI":"10.1109\/TIP.2017.2708506","volume":"26","author":"X Chang","year":"2017","unstructured":"Chang X, Ma Z, Lin M, Yang Y, Hauptmann A (2017) Feature interaction augmented sparse learning for fast kinect motion detection. IEEE Trans Image Process 26(8):3911\u20133920","journal-title":"IEEE Trans Image Process"},{"issue":"5","key":"5116_CR6","doi-asserted-by":"publisher","first-page":"1180","DOI":"10.1109\/TCYB.2016.2539546","volume":"47","author":"X Chang","year":"2017","unstructured":"Chang X, Ma Z, Yang Y, Zeng Z, Hauptmann AG (2017) Bi-level semantic representation analysis for multimedia event detection. IEEE Trans Cybern 47(5):1180\u20131197","journal-title":"IEEE Trans Cybern"},{"issue":"8","key":"5116_CR7","doi-asserted-by":"publisher","first-page":"1617","DOI":"10.1109\/TPAMI.2016.2608901","volume":"39","author":"X Chang","year":"2017","unstructured":"Chang X, Yu Y, Yang Y, Xing EP (2017) Semantic pooling for complex event analysis in untrimmed videos. IEEE Trans Pattern Anal Mach Intell 39(8):1617\u20131632. https:\/\/doi.org\/10.1109\/TPAMI.2016.2608901","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"5116_CR8","doi-asserted-by":"crossref","unstructured":"Ch\u00e9ron G, Laptev I, Schmid C (2015) P-cnn: pose-based cnn features for action recognition. In: 2015 IEEE international conference on computer vision (ICCV), pp 3218\u20133226. IEEE","DOI":"10.1109\/ICCV.2015.368"},{"key":"5116_CR9","doi-asserted-by":"crossref","unstructured":"Dalal N, Triggs B (2005) Histograms of oriented gradients for human detection. In: 2005 IEEE computer society conference on computer vision and pattern recognition (CVPR), vol. 1, pp 886\u2013893. IEEE","DOI":"10.1109\/CVPR.2005.177"},{"key":"5116_CR10","doi-asserted-by":"crossref","unstructured":"Dalal N, Triggs B, Schmid C (2006) Human detection using oriented histograms of flow and appearance. In: Proceedings of the 9th European conference on computer vision, pp 428\u2013441. Springer","DOI":"10.1007\/11744047_33"},{"key":"5116_CR11","doi-asserted-by":"crossref","unstructured":"Doll\u00e1r P, Rabaud V, Cottrell G, Belongie S (2005) Behavior recognition via sparse spatio-temporal features. In: 2005 2nd joint IEEE international workshop on visual surveillance and performance evaluation of tracking and surveillance, pp 65\u201372. IEEE","DOI":"10.1109\/VSPETS.2005.1570899"},{"key":"5116_CR12","doi-asserted-by":"crossref","unstructured":"Gao C, Meng D, Tong W, Yang Y, Cai Y, Shen H, Liu G, Xu S, Hauptmann AG (2014) Interactive surveillance event detection through mid-level discriminative representation. In: Proceedings of international conference on multimedia retrieval, pp 305\u2013312. ACM","DOI":"10.1145\/2578726.2578765"},{"key":"5116_CR13","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1016\/j.neucom.2016.05.094","volume":"212","author":"C Gao","year":"2016","unstructured":"Gao C, Du Y, Liu J, Lv J, Yang L, Meng D, Hauptmann AG (2016) Infar dataset: infrared action recognition at different times. Neurocomputing 212:36\u201347","journal-title":"Neurocomputing"},{"key":"5116_CR14","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast r-cnn. In: 2015 IEEE international conference on computer vision (ICCV), pp 1440\u20131448. IEEE","DOI":"10.1109\/ICCV.2015.169"},{"key":"5116_CR15","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: 2014 IEEE conference on computer vision and pattern recognition (CVPR), pp 580\u2013587. IEEE","DOI":"10.1109\/CVPR.2014.81"},{"key":"5116_CR16","doi-asserted-by":"crossref","unstructured":"Gkioxari G, Malik J (2015) Finding action tubes. In: 2015 IEEE conference on computer vision and pattern recognition (CVPR), pp 759\u2013768. IEEE","DOI":"10.1109\/CVPR.2015.7298676"},{"key":"5116_CR17","doi-asserted-by":"crossref","unstructured":"Jain M, Van Gemert J, J\u00e9gou H, Bouthemy P, Snoek CG (2014) Action localization with tubelets from motion. In: 2014 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp 740\u2013747. IEEE","DOI":"10.1109\/CVPR.2014.100"},{"key":"5116_CR18","doi-asserted-by":"crossref","unstructured":"J\u00e9gou H, Douze M, Schmid C, P\u00e9rez P (2010) Aggregating local descriptors into a compact image representation. In: 2010 IEEE conference on computer vision and pattern recognition (CVPR), pp 3304\u20133311. IEEE","DOI":"10.1109\/CVPR.2010.5540039"},{"key":"5116_CR19","doi-asserted-by":"crossref","unstructured":"Jhuang H, Gall J, Zuffi S, Schmid C, Black MJ (2013) Towards understanding action recognition. In: 2013 IEEE international conference on computer vision (ICCV), pp 3192\u20133199. IEEE","DOI":"10.1109\/ICCV.2013.396"},{"issue":"1","key":"5116_CR20","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2013","unstructured":"Ji S, Xu W, Yang M (2013) Yu, K.: 3d convolutional neural networks for human action recognition. IEEE Trans Pattern Anal Mach Intell 35(1):221\u2013231","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"5116_CR21","doi-asserted-by":"crossref","unstructured":"Karpathy A, Toderici G, Shetty S, Leung T, Sukthankar R, Fei-Fei L (2014) Large-scale video classification with convolutional neural networks. In: 2014 IEEE conference on computer vision and pattern recognition (CVPR), pp 1725\u20131732. IEEE","DOI":"10.1109\/CVPR.2014.223"},{"key":"5116_CR22","doi-asserted-by":"crossref","unstructured":"Klaser A, Marsza\u0142ek M, Schmid C, Zisserman A (2010) Human focused action localization in video. In: SGA 2010-international workshop on sign, gesture, and activity, ECCV 2010 workshops, vol. 6553, pp 219\u2013233. Springer","DOI":"10.1007\/978-3-642-35749-7_17"},{"key":"5116_CR23","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. In: Advances in neural information processing systems 25 (NIPS 2012), pp 1097\u20131105. Curran Associates, Inc"},{"key":"5116_CR24","doi-asserted-by":"crossref","unstructured":"Lan T, Wang Y, Mori G (2011) Discriminative figure-centric models for joint action localization and recognition. In: 2011 IEEE international conference on computer vision (ICCV), pp 2003\u20132010. IEEE","DOI":"10.1109\/ICCV.2011.6126472"},{"issue":"4","key":"5116_CR25","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1162\/neco.1989.1.4.541","volume":"1","author":"Y LeCun","year":"1989","unstructured":"LeCun Y, Boser B, Denker JS, Henderson D, Howard RE, Hubbard W, Jackel LD (1989) Backpropagation applied to handwritten zip code recognition. Neural Comput 1(4):541\u2013551","journal-title":"Neural Comput"},{"key":"5116_CR26","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu CY, Berg AC (2016) Ssd: Single shot multibox detector. In: Proceedings of the 14th European conference on computer vision, pp 21\u201337. Springer","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"5116_CR27","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T (2015) Fully convolutional networks for semantic segmentation. In: 2015 IEEE conference on computer vision and pattern recognition (CVPR), pp 3431\u20133440. IEEE","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"5116_CR28","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe D.G. (2004) Distinctive image features from scale-invariant keypoints. Int J Comput Vis 60:91\u2013110","journal-title":"Int J Comput Vis"},{"key":"5116_CR29","doi-asserted-by":"crossref","unstructured":"Perronnin F, Mensink T (2010) Improving the fisher kernel for large-scale image classification. In: Proceedings of the 11th European conference on computer vision, pp 143\u2013156. Springer","DOI":"10.1007\/978-3-642-15561-1_11"},{"key":"5116_CR30","doi-asserted-by":"publisher","first-page":"976","DOI":"10.1016\/j.imavis.2009.11.014","volume":"28","author":"R Poppe","year":"2010","unstructured":"Poppe R (2010) A survey on vision-based human action recognition. Image Vis Comput 28:976\u2013990","journal-title":"Image Vis Comput"},{"key":"5116_CR31","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: Towards real-time object detection with region proposal networks. In: Advances in neural information processing systems 28 (NIPS 2015), pp 91\u201399. Curran Associates, Inc"},{"key":"5116_CR32","doi-asserted-by":"crossref","unstructured":"Rodriguez MD, Ahmed J, Shah M (2008) Action mach a spatio-temporal maximum average correlation height filter for action recognition. In: 2008 IEEE conference on computer vision and pattern recognition (CVPR), pp 1\u20138. IEEE","DOI":"10.1109\/CVPR.2008.4587727"},{"key":"5116_CR33","unstructured":"Simonyan K, Zisserman A (2014) Two-stream convolutional networks for action recognition in videos. In: Advances in neural information processing systems 27 (NIPS 2014), pp 568\u2013576. Curran Associates, Inc"},{"key":"5116_CR34","doi-asserted-by":"crossref","unstructured":"Tian Y, Sukthankar R, Shah M (2013) Spatiotemporal deformable part models for action detection. In: 2013 IEEE conference on computer vision and pattern recognition (CVPR), pp 2642\u20132649. IEEE","DOI":"10.1109\/CVPR.2013.341"},{"issue":"2","key":"5116_CR35","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"JR Uijlings","year":"2013","unstructured":"Uijlings JR, Van De Sande KE, Gevers T, Smeulders AW (2013) Selective search for object recognition. Int J Comput Vis 104(2):154\u2013171","journal-title":"Int J Comput Vis"},{"key":"5116_CR36","doi-asserted-by":"crossref","unstructured":"Wang H, Schmid C (2013) Action recognition with improved trajectories. In: 2013 IEEE international conference on computer vision (ICCV), pp 3551\u20133558. IEEE","DOI":"10.1109\/ICCV.2013.441"},{"key":"5116_CR37","doi-asserted-by":"crossref","unstructured":"Wang H, Kl\u00e4ser A, Schmid C, Liu CL (2011) Action recognition by dense trajectories. In: 2011 IEEE conference on computer vision and pattern recognition (CVPR), pp 3169\u20133176. IEEE","DOI":"10.1109\/CVPR.2011.5995407"},{"key":"5116_CR38","doi-asserted-by":"crossref","unstructured":"Wang L, Qiao Y, Tang X, Van Gool L (2016) Actionness estimation using hybrid fully convolutional networks. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 2708\u20132717. IEEE","DOI":"10.1109\/CVPR.2016.296"},{"key":"5116_CR39","doi-asserted-by":"publisher","first-page":"224","DOI":"10.1016\/j.cviu.2010.10.002","volume":"115","author":"D Weinland","year":"2011","unstructured":"Weinland D, Ronfard R, Boyer E (2011) A survey of vision-based methods for action representation, segmentation and recognition. Comput Vis Image Underst 115:224\u2013241","journal-title":"Comput Vis Image Underst"},{"key":"5116_CR40","doi-asserted-by":"crossref","unstructured":"Weinzaepfel P, Harchaoui Z, Schmid C (2015) Learning to track for spatio-temporal action localization. In: 2015 IEEE international conference on computer vision (ICCV), pp 3164\u20133172. IEEE","DOI":"10.1109\/ICCV.2015.362"},{"key":"5116_CR41","doi-asserted-by":"crossref","unstructured":"Xiang Y, Alahi A, Savarese S (2015) Learning to track: online multi-object tracking by decision making. In: 2015 IEEE international conference on computer vision (ICCV), pp 4705\u20134713. IEEE","DOI":"10.1109\/ICCV.2015.534"},{"key":"5116_CR42","doi-asserted-by":"crossref","unstructured":"Yan Y, Ricci E, Liu G, Subramanian R, Sebe N (2014) Clustered multi-task linear discriminant analysis for view invariant color-depth action recognition. In: 2014 22nd international conference on pattern recognition (ICPR), pp 3493\u20133498. IEEE","DOI":"10.1109\/ICPR.2014.601"},{"key":"5116_CR43","doi-asserted-by":"publisher","first-page":"5599","DOI":"10.1109\/TIP.2014.2365699","volume":"23","author":"Y Yan","year":"2014","unstructured":"Yan Y, Ricci E, Subramanian R, Liu G, Sebe N (2014) Multitask linear discriminant analysis for view invariant action recognition. IEEE Trans Image Process 23:5599\u20135611","journal-title":"IEEE Trans Image Process"},{"key":"5116_CR44","doi-asserted-by":"crossref","unstructured":"Yeung S, Russakovsky O, Mori G, Fei-Fei L (2016) End-to-end learning of action detection from frame glimpses in videos. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 2678\u20132687. IEEE","DOI":"10.1109\/CVPR.2016.293"},{"key":"5116_CR45","doi-asserted-by":"crossref","unstructured":"Yu G, Yuan J (2015) Fast action proposals for human action detection and search. In: 2015 IEEE conference on computer vision and pattern recognition (CVPR), pp 1302\u20131311. IEEE","DOI":"10.1109\/CVPR.2015.7298735"},{"issue":"2","key":"5116_CR46","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1007\/s11263-016-0907-4","volume":"120","author":"D Zhang","year":"2016","unstructured":"Zhang D, Han J, Li C, Wang J, Li X (2016) Detection of co-salient objects by looking deep and wide. Int J Comput Vis 120(2):215\u2013232","journal-title":"Int J Comput Vis"},{"issue":"4","key":"5116_CR47","doi-asserted-by":"publisher","first-page":"1746","DOI":"10.1109\/TIP.2017.2658957","volume":"26","author":"D Zhang","year":"2017","unstructured":"Zhang D, Han J, Jiang L, Ye S, Chang X (2017) Revealing event saliency in unconstrained video collection. IEEE Trans Image Process 26(4):1746\u20131758","journal-title":"IEEE Trans Image Process"},{"issue":"5","key":"5116_CR48","doi-asserted-by":"publisher","first-page":"865","DOI":"10.1109\/TPAMI.2016.2567393","volume":"39","author":"D Zhang","year":"2017","unstructured":"Zhang D, Meng D, Han J (2017) Co-saliency detection via a self-paced multiple-instance learning framework. IEEE Trans Pattern Anal Mach Intell 39 (5):865\u2013878","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"7","key":"5116_CR49","doi-asserted-by":"publisher","first-page":"981","DOI":"10.1109\/TMM.2015.2431496","volume":"17","author":"L Zhu","year":"2015","unstructured":"Zhu L, Shen J, Jin H, Xie L, Zheng R (2015) Landmark classification with hierarchical multi-modal exemplar feature. IEEE Trans Multimedia 17(7):981\u2013993","journal-title":"IEEE Trans Multimedia"},{"issue":"12","key":"5116_CR50","doi-asserted-by":"publisher","first-page":"2756","DOI":"10.1109\/TCYB.2014.2383389","volume":"45","author":"L Zhu","year":"2015","unstructured":"Zhu L, Shen J, Jin H, Zheng R, Xie L (2015) Content-based visual landmark search via multimodal hypergraph learning. IEEE Trans Cybern 45(12):2756\u20132769","journal-title":"IEEE Trans Cybern"},{"issue":"99","key":"5116_CR51","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TCYB.2016.2520477","volume":"PP","author":"L Zhu","year":"2016","unstructured":"Zhu L, Shen J, Xie L, Cheng Z (2016) Unsupervised topic hypergraph hashing for efficient mobile image retrieval. IEEE Trans Cybern PP(99):1\u201314. https:\/\/doi.org\/10.1109\/TCYB.2016.2591068","journal-title":"IEEE Trans Cybern"},{"issue":"2","key":"5116_CR52","doi-asserted-by":"publisher","first-page":"472","DOI":"10.1109\/TKDE.2016.2562624","volume":"29","author":"L Zhu","year":"2017","unstructured":"Zhu L, Shen J, Xie L, Cheng Z (2017) Unsupervised visual hashing with semantic assistant for content-based image retrieval. IEEE Trans Knowl Data Eng 29 (2):472\u2013486","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"5116_CR53","doi-asserted-by":"crossref","unstructured":"Zitnick CL, Doll\u00e1r P (2014) Edge boxes: Locating object proposals from edges. In: Proceedings of the 13th European Conference on Computer Vision, pp 391\u2013405. Springer","DOI":"10.1007\/978-3-319-10602-1_26"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-017-5116-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-017-5116-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-017-5116-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T01:10:42Z","timestamp":1750813842000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-017-5116-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,8,24]]},"references-count":53,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2018,2]]}},"alternative-id":["5116"],"URL":"https:\/\/doi.org\/10.1007\/s11042-017-5116-9","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,8,24]]},"assertion":[{"value":"15 March 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 August 2017","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 August 2017","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 August 2017","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}