{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T10:50:47Z","timestamp":1761562247552,"version":"3.37.3"},"reference-count":76,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2017,6,8]],"date-time":"2017-06-08T00:00:00Z","timestamp":1496880000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100001827","name":"University of Amsterdam","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100001827","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2017,9]]},"DOI":"10.1007\/s11263-017-1023-9","type":"journal-article","created":{"date-parts":[[2017,6,8]],"date-time":"2017-06-08T14:59:21Z","timestamp":1496933961000},"page":"287-311","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Tubelets: Unsupervised Action Proposals from Spatiotemporal Super-Voxels"],"prefix":"10.1007","volume":"124","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8929-3847","authenticated-orcid":false,"given":"Mihir","family":"Jain","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jan","family":"van Gemert","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Herv\u00e9","family":"J\u00e9gou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Patrick","family":"Bouthemy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cees G. M.","family":"Snoek","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,6,8]]},"reference":[{"key":"1023_CR1","doi-asserted-by":"crossref","unstructured":"Brox, T., & Malik, J. (September 2010). Object segmentation by long term analysis of point trajectories. In Proceedings of the European conference on computer vision.","DOI":"10.1007\/978-3-642-15555-0_21"},{"key":"1023_CR2","doi-asserted-by":"crossref","unstructured":"Cao, L., Liu, Z., & Huang, T. S. (June 2010). Cross-dataset action detection. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2010.5539875"},{"key":"1023_CR3","doi-asserted-by":"crossref","unstructured":"Chen, W., & Corso, J. J. (2015). Action detection by implicit intentional motion clustering. In Proceedings of the IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2015.377"},{"key":"1023_CR4","doi-asserted-by":"crossref","unstructured":"Chen, W., Xiong, C., Xu, R., & Corso, J. (2014). Actionness ranking with lattice conditional ordinal random fields. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 748\u2013755).","DOI":"10.1109\/CVPR.2014.101"},{"key":"1023_CR5","doi-asserted-by":"crossref","unstructured":"Cheron, G., Laptev, I., & Schmid, C. (December 2015). P-cnn: Pose-based cnn features for action recognition. In Proceedings of the IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2015.368"},{"issue":"368","key":"1023_CR6","doi-asserted-by":"publisher","first-page":"829","DOI":"10.1080\/01621459.1979.10481038","volume":"74","author":"WS Cleveland","year":"1979","unstructured":"Cleveland, W. S. (1979). Robust locally weighted regression and smoothing scatterplots. Journal of the American Statistical Association, 74(368), 829\u2013836. doi: 10.1080\/01621459.1979.10481038 .","journal-title":"Journal of the American Statistical Association"},{"key":"1023_CR7","doi-asserted-by":"crossref","unstructured":"Dalal, N., & Triggs, B. (June 2005). Histograms of oriented gradients for human detection. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2005.177"},{"key":"1023_CR8","doi-asserted-by":"crossref","unstructured":"Delaitre, V., Laptev, I., Sivic, J. (2010). Recognizing human actions in still images: A study of bag-of-features and part-based representations. In BMVC.","DOI":"10.5244\/C.24.97"},{"issue":"3","key":"1023_CR9","doi-asserted-by":"crossref","first-page":"527","DOI":"10.1109\/TPAMI.2012.141","volume":"35","author":"KG Derpanis","year":"2013","unstructured":"Derpanis, K. G., Sizintsev, M., Cannons, K. J., & Wildes, R. P. (2013). Action spotting and recognition based on a spatiotemporal orientation analysis. IEEE Transactions on Pattern Analysis and Machine Intelligence, 35(3), 527\u2013540.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1023_CR10","doi-asserted-by":"crossref","unstructured":"Dollar, P., Rabaud, V., Cottrell, G., & Belongie, S. (October 2005). Behavior recognition via sparse spatio-temporal features. In Visual surveillance and performance evaluation of tracking and surveillance.","DOI":"10.1109\/VSPETS.2005.1570899"},{"issue":"4","key":"1023_CR11","doi-asserted-by":"crossref","first-page":"1569","DOI":"10.1109\/TIP.2014.2302677","volume":"23","author":"I Everts","year":"2014","unstructured":"Everts, I., van Gemert, J. C., & Gevers, T. (2014). Evaluation of color spatio-temporal interest points for human action recognition. IEEE Transactions on Image Processing, 23(4), 1569\u20131580.","journal-title":"IEEE Transactions on Image Processing"},{"issue":"2","key":"1023_CR12","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1023\/B:VISI.0000022288.19776.77","volume":"59","author":"PF Felzenszwalb","year":"2004","unstructured":"Felzenszwalb, P. F., & Huttenlocher, D. P. (2004). Efficient graph-based image segmentation. International Journal of Computer Vision, 59(2), 167\u2013181.","journal-title":"International Journal of Computer Vision"},{"issue":"9","key":"1023_CR13","doi-asserted-by":"crossref","first-page":"1627","DOI":"10.1109\/TPAMI.2009.167","volume":"32","author":"PF Felzenszwalb","year":"2010","unstructured":"Felzenszwalb, P. F., Girshick, R. B., McAllester, D. A., & Ramanan, D. (2010). Object detection with discriminatively trained part-based models. IEEE Transactions on Pattern Analysis and Machine Intelligence, 32(9), 1627\u20131645.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"1","key":"1023_CR14","doi-asserted-by":"crossref","first-page":"142","DOI":"10.1109\/TPAMI.2015.2437384","volume":"38","author":"RB Girshick","year":"2016","unstructured":"Girshick, R. B., Donahue, J., Darrell, T., & Malik, J. (2016). Region-based convolutional networks for accurate object detection and semantic segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 38(1), 142\u2013158.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1023_CR15","doi-asserted-by":"crossref","unstructured":"Gkioxari, G., & Malik, J. (2015). Finding action tubes. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2015.7298676"},{"issue":"4","key":"1023_CR16","doi-asserted-by":"crossref","first-page":"814","DOI":"10.1109\/TPAMI.2015.2465908","volume":"38","author":"J Hosang","year":"2015","unstructured":"Hosang, J., Benenson, R., Doll\u00e1r, P., & Schiele, B. (2015). What makes for effective detection proposals? IEEE Transactions on Pattern Analysis and Machine Intelligence, 38(4), 814\u2013830.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1023_CR17","doi-asserted-by":"crossref","DOI":"10.1002\/0471725250","volume-title":"Robust statistics","author":"PJ Huber","year":"1981","unstructured":"Huber, P. J. (1981). Robust statistics. New York: Wiley."},{"key":"1023_CR18","doi-asserted-by":"crossref","unstructured":"Jain, M., J\u00e9gou, H., & Bouthemy, P. (June 2013). Better exploiting motion for better action recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2013.330"},{"key":"1023_CR19","doi-asserted-by":"crossref","unstructured":"Jain, M., van Gemert, J. C., J\u00e9gou, H., Bouthemy, P., & Snoek, C. G. M. (June 2014). Action localization by tubelets from motion. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2014.100"},{"key":"1023_CR20","doi-asserted-by":"crossref","unstructured":"Jain, M., van Gemert, J. C., Mensink, T., & Snoek, C. G. M. (2015a). Objects2action: Classifying and localizing actions without any video example. In Proceedings of the IEEE international conference on computer vision (pp. 4588\u20134596).","DOI":"10.1109\/ICCV.2015.521"},{"key":"1023_CR21","doi-asserted-by":"crossref","unstructured":"Jain, M., van Gemert, J. C., & Snoek, C. G. M. (June 2015b). What do 15,000 object categories tell us about classifying and localizing actions? In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2015.7298599"},{"key":"1023_CR22","doi-asserted-by":"crossref","first-page":"28","DOI":"10.3389\/fict.2015.00028","volume":"2","author":"M Jain","year":"2016","unstructured":"Jain, M., J\u00e9gou, H., & Bouthemy, P. (2016). Improved motion description for action classification. Frontiers in ICT, 2, 28.","journal-title":"Frontiers in ICT"},{"issue":"9","key":"1023_CR23","doi-asserted-by":"crossref","first-page":"1704","DOI":"10.1109\/TPAMI.2011.235","volume":"34","author":"H J\u00e9gou","year":"2012","unstructured":"J\u00e9gou, H., Perronnin, F., Douze, M., S\u00e1nchez, J., P\u00e9rez, P., & Schmid, C. (2012). Aggregating local descriptors into compact codes. IEEE Transactions on Pattern Analysis and Machine Intelligence, 34(9), 1704\u20131716.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1023_CR24","doi-asserted-by":"crossref","unstructured":"Jhuang, H., Gall, J., Zuffi, S., Schmid, C., & Black, M. J. (December 2013). Towards understanding action recognition. In Proceedings of the IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2013.396"},{"key":"1023_CR25","doi-asserted-by":"crossref","unstructured":"Kang, K., Ouyang, W., Li, H., & Wang, X. (June 2016). Object detection from video tubelets with convolutional neural networks. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.95"},{"key":"1023_CR26","unstructured":"Ke, Y., Sukthankar, R., & Hebert, M. (October 2005). Efficient visual event detection using volumetric features. In Proceedings of the IEEE international conference on computer vision."},{"key":"1023_CR27","doi-asserted-by":"crossref","unstructured":"Kim, M., & Pavlovic, V. (2010). Structured output ordinal regression for dynamic facial emotion intensity prediction. In European conference on computer vision (pp. 649\u2013662). Springer.","DOI":"10.1007\/978-3-642-15558-1_47"},{"key":"1023_CR28","doi-asserted-by":"crossref","unstructured":"Kl\u00e4ser, A., Marszalek, M., & Schmid, C. (September 2008). A spatio-temporal descriptor based on 3d-gradients. In Proceedings of the British machine vision conference.","DOI":"10.5244\/C.22.99"},{"key":"1023_CR29","doi-asserted-by":"crossref","unstructured":"Kl\u00e4ser, A., Marsza\u0142ek, M., Schmid, C., & Zisserman, A. (2012). Human focused action localization in video. In Trends and topics in computer vision (pp. 219\u2013233).","DOI":"10.1007\/978-3-642-35749-7_17"},{"key":"1023_CR30","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems."},{"key":"1023_CR31","doi-asserted-by":"crossref","unstructured":"Kwak, S., Cho, M., Laptev, I., Ponce, J., & Schmid, C. (2015). Unsupervised object discovery and tracking in video collections. In ICCV.","DOI":"10.1109\/ICCV.2015.363"},{"key":"1023_CR32","doi-asserted-by":"crossref","unstructured":"Lampert, C. H., Blaschko, M. B., & Hofmann, T. (June 2008). Beyond sliding windows: Object localization by efficient subwindow search. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2008.4587586"},{"key":"1023_CR33","unstructured":"Lan, T., Wang, Y., & Mori, G. (November 2011). Discriminative figure-centric models for joint action localization and recognition. In Proceedings of the IEEE international conference on computer vision."},{"issue":"2","key":"1023_CR34","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1007\/s11263-005-1838-7","volume":"64","author":"I Laptev","year":"2005","unstructured":"Laptev, I. (2005). On space-time interest points. International Journal of Computer Vision, 64(2), 107\u2013123.","journal-title":"International Journal of Computer Vision"},{"key":"1023_CR35","doi-asserted-by":"crossref","unstructured":"Ma, S., Zhang, J., Ikizler-Cinbis, N., & Sclaroff, S. (2013). Action recognition and localization by hierarchical space-time segments. In Proceedings of the IEEE international conference on computer vision (pp. 2744\u20132751).","DOI":"10.1109\/ICCV.2013.341"},{"key":"1023_CR36","doi-asserted-by":"crossref","unstructured":"Maji, S., Bourdev, L., & Malik, J. (2011). Action recognition from a distributed representation of pose and appearance. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3177\u20133184).","DOI":"10.1109\/CVPR.2011.5995631"},{"key":"1023_CR37","doi-asserted-by":"crossref","unstructured":"Manen, S., Guillaumin, M., & Van\u00a0Gool L. (2013). Prime object proposals with randomized Prim\u2019s algorithm. In Proceedings of the IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2013.315"},{"key":"1023_CR38","unstructured":"Mosabbeb, E. A., Cabral, R., De\u00a0la Torre, F., & Fathy, M. (2014). Multi-label discriminative weakly-supervised human activity recognition and localization. In ACCV."},{"key":"1023_CR39","unstructured":"Ng, J. Y.-H., Hausknecht, M., Vijayanarasimhan, S., Vinyals, O., Monga, R., & Toderici, G. (2015). Beyond short snippets: Deep networks for video classification. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 4694\u20134702)."},{"issue":"4","key":"1023_CR40","doi-asserted-by":"crossref","first-page":"348","DOI":"10.1006\/jvci.1995.1029","volume":"6","author":"J-M Odobez","year":"1995","unstructured":"Odobez, J.-M., & Bouthemy, P. (1995). Robust multiresolution estimation of parametric motion models. Journal of Visual Communication and Image Representation, 6(4), 348\u2013365.","journal-title":"Journal of Visual Communication and Image Representation"},{"key":"1023_CR41","doi-asserted-by":"crossref","unstructured":"Oneata, D., Verbeek, J., & Schmid, C. (December 2013). Action and event recognition with fisher vectors on a compact feature set. In Proceedings of the IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2013.228"},{"key":"1023_CR42","doi-asserted-by":"crossref","unstructured":"Oneata, D., Revaud, J., Verbeek, J., & Schmid, C. (2014a). Spatio-temporal object detection proposals. In Proceedings of the European conference on computer vision.","DOI":"10.1007\/978-3-319-10578-9_48"},{"key":"1023_CR43","doi-asserted-by":"crossref","unstructured":"Oneata, D., Verbeek, J., & Schmid, C. (2014b). Efficient action localization with approximately normalized fisher vectors. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2014.326"},{"key":"1023_CR44","doi-asserted-by":"crossref","unstructured":"Perronnin, F., & Dance, C. R. (2007). Fisher kernels on visual vocabularies for image categorization. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2007.383266"},{"key":"1023_CR45","doi-asserted-by":"crossref","unstructured":"Perronnin, F., S\u00e1nchez, J., & Mensink, T. (September 2010). Improving the fisher kernel for large-scale image classification. In Proceedings of the European conference on computer vision.","DOI":"10.1007\/978-3-642-15561-1_11"},{"issue":"11","key":"1023_CR46","doi-asserted-by":"crossref","first-page":"3417","DOI":"10.1109\/TIP.2006.881963","volume":"15","author":"G Piriou","year":"2006","unstructured":"Piriou, G., Bouthemy, P., & Yao, J.-F. (2006). Recognition of dynamic video contents with global probabilistic models of visual motion. IEEE Transactions on Image Processing, 15(11), 3417\u20133430.","journal-title":"IEEE Transactions on Image Processing"},{"key":"1023_CR47","doi-asserted-by":"crossref","unstructured":"Prest, A., Leistner, C., Civera, J., Schmid, C., & Ferrari, V. (2012). Learning object class detectors from weakly annotated video. In CVPR.","DOI":"10.1109\/CVPR.2012.6248065"},{"key":"1023_CR48","doi-asserted-by":"crossref","unstructured":"Puscas, M., Sangineto, E., Culibrk, D., & Sebe, N. (2015). Unsupervised tube extraction using transductive learning and dense trajectories. In Proceedings of the IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2015.193"},{"key":"1023_CR49","doi-asserted-by":"crossref","unstructured":"Raptis, M., Kokkinos, I., & Soatto, S. (June 2012). Discovering discriminative action parts from mid-level video representations. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2012.6247807"},{"key":"1023_CR50","doi-asserted-by":"crossref","unstructured":"Rodriguez, M. D., Ahmed, J., & Shah, M. (June 2008). Action mach: A spatio-temporal maximum average correlation height filter for action recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2008.4587727"},{"issue":"3","key":"1023_CR51","doi-asserted-by":"crossref","first-page":"222","DOI":"10.1007\/s11263-013-0636-x","volume":"105","author":"J S\u00e1nchez","year":"2013","unstructured":"S\u00e1nchez, J., Perronnin, F., Mensink, T., & Verbeek, J. (2013). Image classification with the fisher vector: Theory and practice. International Journal of Computer Vision, 105(3), 222\u2013245.","journal-title":"International Journal of Computer Vision"},{"key":"1023_CR52","doi-asserted-by":"crossref","unstructured":"Sch\u00fcldt, C., Laptev, I., & Caputo, B. (2004). Recognizing human actions: A local svm approach. In Proceedings of international conference of pattern recognition.","DOI":"10.1109\/ICPR.2004.1334462"},{"key":"1023_CR53","unstructured":"Simonyan, K., & Zisserman, A. (2014). Two-stream convolutional networks for action recognition in videos. In NIPS."},{"key":"1023_CR54","unstructured":"Soomro, K., Zamir, A.\u00a0R., & Shah, M. (2012). UCF101: A dataset of 101 human actions classes from videos in the wild. CoRR, (2012). URL arxiv.org\/abs\/1212.0402 ."},{"key":"1023_CR55","doi-asserted-by":"crossref","unstructured":"Soomro, K., Idrees, H., & Shah, M. (2015). Action localization in videos through context walk. In Proceedings of the IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2015.375"},{"key":"1023_CR56","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Liu, W., Jia, Y., Sermanet, P., Reed, S., Anguelov, D., et al. (2015). Going deeper with convolutions. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"1023_CR57","doi-asserted-by":"crossref","unstructured":"Tian, Y., Sukthankar, R., & Shah, M. (June 2013). Spatiotemporal deformable part models for action detection. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2013.341"},{"key":"1023_CR58","doi-asserted-by":"crossref","unstructured":"Tran, D., & Yuan, J. (June 2011). Optimal spatio-temporal path discovery for video event detection. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2011.5995416"},{"key":"1023_CR59","unstructured":"Tran, D., & Yuan, J. (December 2012). Max-margin structured output regression for spatio-temporal action localization. In Advances in neural information processing systems."},{"key":"1023_CR60","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., & Paluri, M. (2015). Learning spatiotemporal features with 3d convolutional networks. In Proceedings of the IEEE international conference on computer vision (pp. 4489\u20134497).","DOI":"10.1109\/ICCV.2015.510"},{"issue":"2","key":"1023_CR61","doi-asserted-by":"crossref","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"JRR Uijlings","year":"2013","unstructured":"Uijlings, J. R. R., van de Sande, K. E. A., Gevers, T., & Smeulders, A. W. M. (2013). Selective search for object recognition. International Journal of Computer Vision, 104(2), 154\u2013171.","journal-title":"International Journal of Computer Vision"},{"key":"1023_CR62","doi-asserted-by":"crossref","unstructured":"van\u00a0de Sande, K. E. A., Snoek, C. G. M., & Smeulders, A. W. M. (2014). Fisher and vlad with flair. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2014.304"},{"key":"1023_CR63","doi-asserted-by":"crossref","unstructured":"van Gemert, J. C., Jain, M., Gati, E., & Snoek, C. G. M. (2015). APT: Action localization proposals from dense trajectories. In Proceedings of the British machine vision conference.","DOI":"10.5244\/C.29.177"},{"issue":"2","key":"1023_CR64","doi-asserted-by":"crossref","first-page":"137","DOI":"10.1023\/B:VISI.0000013087.49260.fb","volume":"57","author":"PA Viola","year":"2004","unstructured":"Viola, P. A., & Jones, M. J. (2004). Robust real-time face detection. International Journal of Computer Vision, 57(2), 137\u2013154.","journal-title":"International Journal of Computer Vision"},{"key":"1023_CR65","doi-asserted-by":"crossref","unstructured":"Wang, H., & Schmid, C. (December 2013). Action recognition with improved trajectories. In Proceedings of the IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2013.441"},{"key":"1023_CR66","doi-asserted-by":"crossref","unstructured":"Wang, H., Kl\u00e4ser, A., Schmid, C., & Liu, C.-L. (June 2011). Action recognition by dense trajectories. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2011.5995407"},{"issue":"1","key":"1023_CR67","doi-asserted-by":"crossref","first-page":"60","DOI":"10.1007\/s11263-012-0594-8","volume":"103","author":"H Wang","year":"2013","unstructured":"Wang, H., Kl\u00e4ser, A., Schmid, C., & Liu, C.-L. (2013). Dense trajectories and motion boundary descriptors for action recognition. International Journal of Computer Vision, 103(1), 60\u201379.","journal-title":"International Journal of Computer Vision"},{"key":"1023_CR68","doi-asserted-by":"crossref","unstructured":"Wang, H., Oneata, D., Verbeek, J., & Schmid, C. (2015a). A robust and efficient video representation for action recognition. International Journal of Computer Vision, 119(3), 1\u201320.","DOI":"10.1007\/s11263-015-0846-5"},{"key":"1023_CR69","doi-asserted-by":"crossref","unstructured":"Wang, L., Qiao, Y., & Tang, X. (2014). Video action detection with relational dynamic-poselets. In Proceedings of the European conference on computer vision.","DOI":"10.1007\/978-3-319-10602-1_37"},{"key":"1023_CR70","doi-asserted-by":"crossref","unstructured":"Wang, L., Qiao, Y., & Tang, X. (2015b). Action recognition with trajectory-pooled deep-convolutional descriptors. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 4305\u20134314).","DOI":"10.1109\/CVPR.2015.7299059"},{"issue":"7","key":"1023_CR71","doi-asserted-by":"crossref","first-page":"1310","DOI":"10.1109\/TPAMI.2010.214","volume":"33","author":"Y Wang","year":"2011","unstructured":"Wang, Y., & Mori, G. (2011). Hidden part models for human action recognition: Probabilistic versus max margin. IEEE Transactions on Pattern Analysis and Machine Intelligence, 33(7), 1310\u20131323.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1023_CR72","doi-asserted-by":"crossref","unstructured":"Weinzaepfel, P., Harchaoui, Z., & Schmid, C. (2015). Learning to track for spatio-temporal action localization. In Proceedings of the IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2015.362"},{"key":"1023_CR73","unstructured":"Xu, C., & Corso, J. J. (2012). Evaluation of super-voxel methods for early video processing. In Proceedings of the IEEE conference on computer vision and pattern recognition."},{"key":"1023_CR74","doi-asserted-by":"crossref","unstructured":"Yu, G., & Yuan, J. (2015). Fast action proposals for human action detection and search. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2015.7298735"},{"key":"1023_CR75","unstructured":"Yuan, J., Liu, Zicheng, & Wu, Y. (June 2009). Discriminative subvolume search for efficient action detection. In Proceedings of the IEEE conference on computer vision and pattern recognition."},{"issue":"9","key":"1023_CR76","doi-asserted-by":"crossref","first-page":"1728","DOI":"10.1109\/TPAMI.2011.38","volume":"33","author":"J Yuan","year":"2011","unstructured":"Yuan, J., Liu, Z., & Ying, W. (2011). Discriminative video pattern search for efficient action detection. IEEE Transactions on Pattern Analysis and Machine Intelligence, 33(9), 1728\u20131743.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-017-1023-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-017-1023-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-017-1023-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,29]],"date-time":"2022-07-29T09:40:30Z","timestamp":1659087630000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-017-1023-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,6,8]]},"references-count":76,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2017,9]]}},"alternative-id":["1023"],"URL":"https:\/\/doi.org\/10.1007\/s11263-017-1023-9","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"type":"print","value":"0920-5691"},{"type":"electronic","value":"1573-1405"}],"subject":[],"published":{"date-parts":[[2017,6,8]]}}}