{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T11:43:33Z","timestamp":1762429413613,"version":"3.37.3"},"reference-count":109,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2020,9,10]],"date-time":"2020-09-10T00:00:00Z","timestamp":1599696000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,9,10]],"date-time":"2020-09-10T00:00:00Z","timestamp":1599696000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"crossref","award":["DE160100241"],"award-info":[{"award-number":["DE160100241"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2021,2]]},"DOI":"10.1007\/s11263-020-01376-1","type":"journal-article","created":{"date-parts":[[2020,9,10]],"date-time":"2020-09-10T08:40:39Z","timestamp":1599727239000},"page":"300-320","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["Beyond Covariance: SICE and Kernel Based Visual Feature Representation"],"prefix":"10.1007","volume":"129","author":[{"given":"Jianjia","family":"Zhang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0961-0441","authenticated-orcid":false,"given":"Lei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Luping","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Wanqing","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,10]]},"reference":[{"issue":"2","key":"1376_CR1","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1090\/S0894-0347-09-00650-X","volume":"23","author":"R Adamczak","year":"2010","unstructured":"Adamczak, R., Litvak, A., Pajor, A., & Tomczak-Jaegermann, N. (2010). Quantitative estimates of the convergence of the empirical covariance matrix in log-concave ensembles. Journal of the American Mathematical Society, 23(2), 535\u2013561.","journal-title":"Journal of the American Mathematical Society"},{"key":"1376_CR2","doi-asserted-by":"crossref","unstructured":"Ali, S., Basharat, A., & Shah, M. (2007). Chaotic invariants for human action recognition. In IEEE international conference on computer vision (pp. 1\u20138). IEEE.","DOI":"10.1109\/ICCV.2007.4409046"},{"issue":"2","key":"1376_CR3","doi-asserted-by":"publisher","first-page":"411","DOI":"10.1002\/mrm.20965","volume":"56","author":"V Arsigny","year":"2006","unstructured":"Arsigny, V., Fillard, P., Pennec, X., & Ayache, N. (2006). Log-euclidean metrics for fast and simple calculus on diffusion tensors. Magnetic Resonance in Medicine, 56(2), 411\u2013421.","journal-title":"Magnetic Resonance in Medicine"},{"key":"1376_CR4","first-page":"485","volume":"9","author":"O Banerjee","year":"2008","unstructured":"Banerjee, O., Ghaoui, L. E., & d\u2019Aspremont, A. (2008). Model selection through sparse maximum likelihood estimation for multivariate Gaussian or binary data. Journal of Machine Learning Research, 9, 485\u2013516.","journal-title":"Journal of Machine Learning Research"},{"issue":"3","key":"1376_CR5","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1006\/jmrb.1994.1037","volume":"103","author":"PJ Basser","year":"1994","unstructured":"Basser, P. J., Mattiello, J., & LeBihan, D. (1994). Estimation of the effective self-diffusion tensor from the NMR spin echo. Journal of Magnetic Resonance, Series B, 103(3), 247\u2013254.","journal-title":"Journal of Magnetic Resonance, Series B"},{"key":"1376_CR6","doi-asserted-by":"crossref","unstructured":"Cavazza, J., Morerio, P., & Murino, V. (2017a). A compact kernel approximation for 3D action recognition. In International conference on image analysis and processing (pp. 211\u2013222). Springer.","DOI":"10.1007\/978-3-319-68560-1_19"},{"key":"1376_CR7","doi-asserted-by":"crossref","unstructured":"Cavazza, J., Morerio, P., & Murino, V. (2017b). When kernel methods meet feature learning: Log-covariance network for action recognition from skeletal data. In IEEE conference on computer vision and pattern recognition workshops (pp. 33\u201340). IEEE.","DOI":"10.1109\/CVPRW.2017.165"},{"key":"1376_CR8","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1016\/j.patcog.2019.03.031","volume":"93","author":"J Cavazza","year":"2019","unstructured":"Cavazza, J., Morerio, P., & Murino, V. (2019). Scalable and compact 3D action recognition with approximated RBF kernel machines. Pattern Recognition, 93, 25\u201335.","journal-title":"Pattern Recognition"},{"key":"1376_CR9","doi-asserted-by":"crossref","unstructured":"Cavazza, J., Zunino, A., Biagio, M. S., & Murino, V. (2016). Kernelized covariance for action recognition. In International conference on pattern recognition (pp. 408\u2013413). IEEE.","DOI":"10.1109\/ICPR.2016.7899668"},{"key":"1376_CR10","doi-asserted-by":"crossref","unstructured":"Chatfield, K., Simonyan, K., Vedaldi, A., & Zisserman, A. (2014). Return of the devil in the details: Delving deep into convolutional nets. arXiv preprint arXiv:1405.3531.","DOI":"10.5244\/C.28.6"},{"key":"1376_CR11","doi-asserted-by":"crossref","unstructured":"Cimpoi, M., Maji, S., Kokkinos, I., Mohamed, S., & Vedaldi, A. (2014). Describing textures in the wild. In IEEE conference on computer vision and pattern recognition (pp. 3606\u20133613). IEEE.","DOI":"10.1109\/CVPR.2014.461"},{"issue":"1","key":"1376_CR12","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1007\/s11263-015-0872-3","volume":"118","author":"M Cimpoi","year":"2016","unstructured":"Cimpoi, M., Maji, S., Kokkinos, I., & Vedaldi, A. (2016). Deep filter banks for texture recognition, description, and segmentation. International Journal of Computer Vision, 118(1), 65\u201394. https:\/\/doi.org\/10.1007\/s11263-015-0872-3.","journal-title":"International Journal of Computer Vision"},{"key":"1376_CR13","doi-asserted-by":"crossref","unstructured":"Cirujeda, P., & Binefa, X. (2014). 4DCov: A nested covariance descriptor of spatio-temporal features for gesture recognition in depth sequences. In International conference on 3D vision (Vol.\u00a01, pp. 657\u2013664). IEEE.","DOI":"10.1109\/3DV.2014.10"},{"key":"1376_CR14","doi-asserted-by":"crossref","unstructured":"Cui, Y., Zhou, F., Wang, J., Liu, X., Lin, Y., & Belongie, S. (2017). Kernel pooling for convolutional neural networks. In IEEE conference on computer vision and pattern recognition (pp. 2921\u20132930). IEEE.","DOI":"10.1109\/CVPR.2017.325"},{"key":"1376_CR15","unstructured":"Donahue, J., Jia, Y., Vinyals, O., Hoffman, J., Zhang, N., Tzeng, E., et\u00a0al. (2014). Decaf: A deep convolutional activation feature for generic visual recognition. In International conference on machine learning (pp. 647\u2013655)."},{"key":"1376_CR16","doi-asserted-by":"publisher","first-page":"1102","DOI":"10.1214\/09-AOAS249","volume":"3","author":"IL Dryden","year":"2009","unstructured":"Dryden, I. L., Koloydenko, A., & Zhou, D. (2009). Non-Euclidean statistics for covariance matrices, with applications to diffusion tensor imaging. The Annals of Applied Statistics, 3, 1102\u20131123.","journal-title":"The Annals of Applied Statistics"},{"key":"1376_CR17","unstructured":"Du, Y., Wang, W., & Wang, L. (2015). Hierarchical recurrent neural network for skeleton based action recognition. In IEEE conference on computer vision and pattern recognition (pp. 1110\u20131118). IEEE."},{"key":"1376_CR18","doi-asserted-by":"crossref","unstructured":"Durand, T., Mordan, T., Thome, N., & Cord, M. (2017). Wildcat: Weakly supervised learning of deep convnets for image classification, pointwise localization and segmentation. In IEEE conference on computer vision and pattern recognition (pp. 642\u2013651). IEEE.","DOI":"10.1109\/CVPR.2017.631"},{"key":"1376_CR19","doi-asserted-by":"crossref","unstructured":"Evangelidis, G., Singh, G., & Horaud, R. (2014). Skeletal quads: Human action recognition using joint quadruples. In International conference on pattern recognition (pp. 4513\u20134518). IEEE.","DOI":"10.1109\/ICPR.2014.772"},{"issue":"2","key":"1376_CR20","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C. K., Winn, J., & Zisserman, A. (2010). The pascal visual object classes (voc) challenge. International Journal of Computer Vision, 88(2), 303\u2013338.","journal-title":"International Journal of Computer Vision"},{"key":"1376_CR21","unstructured":"Fasshauer, G.E. (2011). Positive definite kernels: past, present and future. Dolomites Research Notes on Approximation 4 (Special Issue on Kernel Functions and Meshless Methods):21\u201363."},{"key":"1376_CR22","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., & Zisserman, A. (2016). Convolutional two-stream network fusion for video action recognition. In IEEE conference on computer vision and pattern recognition (pp. 1933\u20131941). IEEE.","DOI":"10.1109\/CVPR.2016.213"},{"issue":"3","key":"1376_CR23","doi-asserted-by":"publisher","first-page":"432","DOI":"10.1093\/biostatistics\/kxm045","volume":"9","author":"J Friedman","year":"2008","unstructured":"Friedman, J., Hastie, T., & Tibshirani, R. (2008). Sparse inverse covariance estimation with the graphical lasso. Biostatistics, 9(3), 432\u2013441.","journal-title":"Biostatistics"},{"key":"1376_CR24","doi-asserted-by":"crossref","unstructured":"Gao, Y., Beijbom, O., Zhang, N., & Darrell, T. (2016). Compact bilinear pooling. In IEEE conference on computer vision and pattern recognition (pp. 317\u2013326). IEEE.","DOI":"10.1109\/CVPR.2016.41"},{"key":"1376_CR25","unstructured":"Gross, R., & Shi, J. (2001). The cmu motion of body (mobo) database. Technical report CMU-RI-TR-01-18, Robotics Institute, Pittsburgh, PA."},{"key":"1376_CR26","doi-asserted-by":"crossref","unstructured":"Guo, K., Ishwar, P., & Konrad, J. (2010). Action recognition using sparse representation on covariance manifolds of optical flow. In IEEE international conference on advanced video and signal based surveillance (pp. 188\u2013195). IEEE.","DOI":"10.1109\/AVSS.2010.71"},{"key":"1376_CR27","doi-asserted-by":"crossref","unstructured":"Harandi, M. T., Salzmann, M., & Hartley, R. (2014a). From manifold to manifold: geometry-aware dimensionality reduction for SPD matrices. In European conference on computer vision (pp. 17\u201332). Springer.","DOI":"10.1007\/978-3-319-10605-2_2"},{"key":"1376_CR28","doi-asserted-by":"crossref","unstructured":"Harandi, M. T., Salzmann, M., & Porikli, F.M. (2014b). Bregman divergences for infinite dimensional covariance matrices. In IEEE conference on computer vision and pattern recognition (pp. 1003\u20131010). IEEE.","DOI":"10.1109\/CVPR.2014.132"},{"key":"1376_CR29","doi-asserted-by":"crossref","unstructured":"Harandi, M.T., Sanderson, C., Hartley, R., & Lovell, B.C. (2012). Sparse coding and dictionary learning for symmetric positive definite matrices: A kernel approach. In European Conference on Computer Vision, Springer, pp. 216\u2013229.","DOI":"10.1007\/978-3-642-33709-3_16"},{"issue":"2","key":"1376_CR30","first-page":"83","volume":"27","author":"T Hastie","year":"2005","unstructured":"Hastie, T., Tibshirani, R., Friedman, J., & Franklin, J. (2005). The elements of statistical learning: data mining, inference and prediction. The Mathematical Intelligencer, 27(2), 83\u201385.","journal-title":"The Mathematical Intelligencer"},{"key":"1376_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11263-017-1000-3","volume":"123","author":"M Hayat","year":"2017","unstructured":"Hayat, M., Khan, S. H., & Bennamoun, M. (2017). Empowering simple binary classifiers for image set based face recognition. International Journal of Computer Vision, 123, 1\u201320.","journal-title":"International Journal of Computer Vision"},{"key":"1376_CR32","volume-title":"Neural Networks: A Comprehensive Foundation","author":"S Haykin","year":"1998","unstructured":"Haykin, S. (1998). Neural Networks: A Comprehensive Foundation (2nd ed.). Upper Saddle River, NJ: Prentice Hall PTR.","edition":"2"},{"key":"1376_CR33","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In IEEE conference on computer vision and pattern recognition (pp. 770\u2013778). IEEE.","DOI":"10.1109\/CVPR.2016.90"},{"key":"1376_CR34","unstructured":"Hsu, C.W., Chang, C.C., & Lin, C.J. et\u00a0al. (2003). A practical guide to support vector classification"},{"key":"1376_CR35","doi-asserted-by":"crossref","unstructured":"Hu, J.F., Zheng, W.S., Lai, J., & Zhang, J. (2015). Jointly learning heterogeneous features for RGB-d activity recognition. In IEEE conference on computer vision and pattern recognition (pp. 5344\u20135352). IEEE.","DOI":"10.1109\/CVPR.2015.7299172"},{"key":"1376_CR36","doi-asserted-by":"crossref","unstructured":"Hu, J.F., Zheng, W.S., Pan, J., Lai, J., & Zhang, J. (2018). Deep bilinear learning for RGB-d action recognition. In European conference on computer vision (pp. 335\u2013351). Springer.","DOI":"10.1007\/978-3-030-01234-2_21"},{"key":"1376_CR37","first-page":"3371","volume":"12","author":"J Huang","year":"2011","unstructured":"Huang, J., Zhang, T., & Metaxas, D. (2011). Learning with structured sparsity. The Journal of Machine Learning Research, 12, 3371\u20133412.","journal-title":"The Journal of Machine Learning Research"},{"issue":"3","key":"1376_CR38","doi-asserted-by":"publisher","first-page":"935","DOI":"10.1016\/j.neuroimage.2009.12.120","volume":"50","author":"S Huang","year":"2010","unstructured":"Huang, S., Li, J., Sun, L., Ye, J., Fleisher, A., Wu, T., et al. (2010). Learning brain connectivity of alzheimer\u2019s disease by sparse inverse covariance estimation. NeuroImage, 50(3), 935\u2013949.","journal-title":"NeuroImage"},{"key":"1376_CR39","unstructured":"Hussein, M.E., Torki, M., Gowayyed, M.A., & El-Saban, M. (2013). Human action recognition using a temporal hierarchy of covariance descriptors on 3D joint locations. In International joint conference on artificial intelligence (pp. 2466\u20132472)."},{"key":"1376_CR40","doi-asserted-by":"crossref","unstructured":"Ionescu, C., Vantzos, O., & Sminchisescu, C. (2015). Matrix backpropagation for deep networks with structured layers. In IEEE international conference on computer vision (pp. 2965\u20132973). IEEE.","DOI":"10.1109\/ICCV.2015.339"},{"key":"1376_CR41","doi-asserted-by":"crossref","unstructured":"Jayasumana, S., Hartley, R., Salzmann, M., Li, H., & Harandi, M. (2013). Kernel methods on the Riemannian manifold of symmetric positive definite matrices. In IEEE conference on computer vision and pattern recognition (pp. 73\u201380). IEEE.","DOI":"10.1109\/CVPR.2013.17"},{"key":"1376_CR42","doi-asserted-by":"crossref","unstructured":"Ji, Y., Ye, G., & Cheng, H. (2014). Interactive body part contrast mining for human interaction recognition. In IEEE international conference on multimedia and expo workshops (pp. 1\u20136) IEEE.","DOI":"10.1109\/ICMEW.2014.6890714"},{"key":"1376_CR43","doi-asserted-by":"crossref","unstructured":"Ke, Q., Bennamoun, M., An, S., Sohel, F., & Boussaid, F. (2017). A new representation of skeleton sequences for 3D action recognition. In IEEE conference on computer vision and pattern recognition (pp. 4570\u20134579). IEEE.","DOI":"10.1109\/CVPR.2017.486"},{"key":"1376_CR44","volume-title":"Probabilistic graphical models: Principles and techniques-Adaptive computation and machine learning","author":"D Koller","year":"2009","unstructured":"Koller, D., & Friedman, N. (2009). Probabilistic graphical models: Principles and techniques-Adaptive computation and machine learning. Cambridge: MIT Press."},{"key":"1376_CR45","doi-asserted-by":"crossref","unstructured":"Koniusz, P., & Cherian, A. (2016). Sparse coding for third-order super-symmetric tensor descriptors with application to texture recognition. In IEEE conference on computer vision and pattern recognition (pp. 5395\u20135403). IEEE.","DOI":"10.1109\/CVPR.2016.582"},{"key":"1376_CR46","doi-asserted-by":"crossref","unstructured":"Koniusz, P., Cherian, A., & Porikli, F. (2016). Tensor representations via kernel linearization for action recognition from 3D skeletons. In European conference on computer vision (pp. 37\u201353). Springer.","DOI":"10.1007\/978-3-319-46493-0_3"},{"key":"1376_CR47","unstructured":"Koniusz, P., Yan, F., Gosselin, P.H., & Mikolajczyk, K. (2013). Higher-order occurrence pooling on mid-and low-level features: Visual concept detection. HAL-Inria."},{"key":"1376_CR48","doi-asserted-by":"crossref","unstructured":"Kulkarni, P., Jurie, F., Zepeda, J., P\u00e9rez ,P., & Chevallier, L. (2016). Spleap: Soft pooling of learned parts for image classification. In European conference on computer vision (pp. 329\u2013345). Springer.","DOI":"10.1007\/978-3-319-46484-8_20"},{"key":"1376_CR49","doi-asserted-by":"crossref","unstructured":"Lee, I., Kim, D., Kang, S., & Lee, S. (2017). Ensemble deep learning for skeleton-based action recognition using temporal sliding LSTM networks. In IEEE international conference on computer vision (pp. 1012\u20131020). IEEE.","DOI":"10.1109\/ICCV.2017.115"},{"key":"1376_CR50","doi-asserted-by":"crossref","unstructured":"Lehrmann, A. M., Gehler, P. V., & Nowozin, S. (2013). A non-parametric Bayesian network prior of human pose. In IEEE International conference on computer vision (pp. 1281\u20131288). IEEE.","DOI":"10.1109\/ICCV.2013.162"},{"key":"1376_CR51","doi-asserted-by":"crossref","unstructured":"Leibe, B., & Schiele, B. (2003). Analyzing appearance and contour based methods for object categorization. In IEEE conference on computer vision and pattern recognition (Vol.\u00a02, pp. II\u2013409). IEEE.","DOI":"10.1109\/CVPR.2003.1211497"},{"key":"1376_CR52","doi-asserted-by":"crossref","unstructured":"Li, P., & Wang, Q. (2012). Local log-euclidean covariance matrix (l2ecm) for image representation and its applications. In European conference on computer vision (pp. 469\u2013482). Springer.","DOI":"10.1007\/978-3-642-33712-3_34"},{"key":"1376_CR53","doi-asserted-by":"crossref","unstructured":"Li, P., Xie, J., Wang, Q., & Zuo, W. (2017). Is second-order information helpful for large-scale visual recognition? In IEEE international conference on computer vision (pp. 2070\u20132078). IEEE.","DOI":"10.1109\/ICCV.2017.228"},{"key":"1376_CR54","doi-asserted-by":"crossref","unstructured":"Li, S., Li, W., Cook, C., Zhu, C., & Gao, Y. (2018). Independently recurrent neural network (INDRNN): Building a longer and deeper RNN. In IEEE conference on computer vision and pattern recognition (pp. 5457\u20135466). IEEE.","DOI":"10.1109\/CVPR.2018.00572"},{"key":"1376_CR55","doi-asserted-by":"crossref","unstructured":"Li, W., Wen, L., Chuah, M. C., & Lyu, S. (2015). Category-blind human action recognition: A practical recognition system. In IEEE International conference on computer vision (pp. 4444\u20134452). IEEE.","DOI":"10.1109\/ICCV.2015.505"},{"key":"1376_CR56","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., RoyChowdhury, A., & Maji, S. (2015). Bilinear cnn models for fine-grained visual recognition. In IEEE international conference on computer vision (pp. 1449\u20131457). IEEE.","DOI":"10.1109\/ICCV.2015.170"},{"issue":"6","key":"1376_CR57","doi-asserted-by":"publisher","first-page":"1309","DOI":"10.1109\/TPAMI.2017.2723400","volume":"40","author":"TY Lin","year":"2017","unstructured":"Lin, T. Y., RoyChowdhury, A., & Maji, S. (2017). Bilinear convolutional neural networks for fine-grained visual recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence, 40(6), 1309\u20131322.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1376_CR58","doi-asserted-by":"crossref","unstructured":"Liu, J., Shahroudy, A., Xu, D., & Wang, G. (2016). Spatio-temporal LSTM with trust gates for 3D human action recognition. In European conference on computer vision (pp. 816\u2013833). Springer.","DOI":"10.1007\/978-3-319-46487-9_50"},{"key":"1376_CR59","doi-asserted-by":"crossref","unstructured":"Liu, J., Wang, G., Hu, P., Duan, L. Y., & Kot, A. C. (2017). Global context-aware attention LSTM networks for 3D action recognition. In IEEE conference on computer vision and pattern recognition (Vol.\u00a07, p.\u00a043). IEEE.","DOI":"10.1109\/CVPR.2017.391"},{"key":"1376_CR60","doi-asserted-by":"publisher","first-page":"1436","DOI":"10.1214\/009053606000000281","volume":"34","author":"N Meinshausen","year":"2006","unstructured":"Meinshausen, N., & B\u00fchlmann, P. (2006). High-dimensional graphs and variable selection with the lasso. The Annals of Statistics, 34, 1436\u20131462.","journal-title":"The Annals of Statistics"},{"key":"1376_CR61","doi-asserted-by":"crossref","unstructured":"M\u00fcller, M., Baak, A., & Seidel, H.P. (2009). Efficient and robust annotation of motion capture data. In ACM SIGGRAPH\/Eurographics symposium on computer animation, New Orleans, USA (pp. 17\u201326).","DOI":"10.1145\/1599470.1599473"},{"key":"1376_CR62","doi-asserted-by":"crossref","unstructured":"Ohn-Bar, E., & Trivedi, M. (2013). Joint angles similarities and hog2 for action recognition. In IEEE conference on computer vision and pattern recognition workshops (pp. 465\u2013470). IEEE.","DOI":"10.1109\/CVPRW.2013.76"},{"key":"1376_CR63","doi-asserted-by":"crossref","unstructured":"Oquab, M., Bottou, L., Laptev, I., & Sivic, J. (2015). Is object localization for free?-weakly-supervised learning with convolutional neural networks. In IEEE conference on computer vision and pattern recognition (pp. 685\u2013694). IEEE.","DOI":"10.1109\/CVPR.2015.7298668"},{"key":"1376_CR64","doi-asserted-by":"crossref","unstructured":"Oreifej, O., & Liu, Z. (2013). Hon4d: Histogram of oriented 4d normals for activity recognition from depth sequences. In IEEE conference on computer vision and pattern recognition (pp. 716\u2013723). IEEE.","DOI":"10.1109\/CVPR.2013.98"},{"issue":"6","key":"1376_CR65","doi-asserted-by":"publisher","first-page":"1652","DOI":"10.1109\/TSMCB.2008.927276","volume":"38","author":"Y Pang","year":"2008","unstructured":"Pang, Y., Yuan, Y., & Li, X. (2008a). Effective feature extraction in high-dimensional space. IEEE Transactions on Systems, Man, and Cybernetics Part B, Cybernetics, 38(6), 1652\u20131656.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics Part B, Cybernetics"},{"issue":"7","key":"1376_CR66","doi-asserted-by":"publisher","first-page":"989","DOI":"10.1109\/TCSVT.2008.924108","volume":"18","author":"Y Pang","year":"2008","unstructured":"Pang, Y., Yuan, Y., & Li, X. (2008b). Gabor-based region covariance matrices for face recognition. IEEE Transactions on Circuits and Systems for Video Technology, 18(7), 989\u2013993.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"1376_CR67","unstructured":"Park, J. (2007). Digital correlation matrix in multivariate statistics and its application for component selection and dynamic correlation modeling. ProQuest."},{"issue":"10","key":"1376_CR68","doi-asserted-by":"publisher","first-page":"1090","DOI":"10.1109\/34.879790","volume":"22","author":"PJ Phillips","year":"2000","unstructured":"Phillips, P. J., Moon, H., Rizvi, S. A., & Rauss, P. J. (2000). The FERET evaluation methodology for face-recognition algorithms. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(10), 1090\u20131104.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1376_CR69","doi-asserted-by":"crossref","unstructured":"P\u00f3czos, B., Xiong, L., Sutherland, D.J., & Schneider, J.G. (2012). Nonparametric kernel estimators for image classification. In IEEE conference on computer vision and pattern recognition (pp. 2989\u20132996). IEEE.","DOI":"10.1109\/CVPR.2012.6248028"},{"key":"1376_CR70","doi-asserted-by":"crossref","unstructured":"Porikli, F., Tuzel, O., & Meer, P. (2006). Covariance tracking using model update based on lie algebra. In IEEE conference on computer vision and pattern recognition (pp. 728\u2013735). IEEE.","DOI":"10.1109\/CVPR.2006.94"},{"key":"1376_CR71","unstructured":"Quang, M. H., Biagio, M. S., & Murino, V. (2014). Log-Hilbert-Schmidt metric between positive definite operators on Hilbert spaces. In Conference on neural information processing systems (pp. 388\u2013396)."},{"issue":"4","key":"1376_CR72","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1109\/34.761261","volume":"21","author":"T Randen","year":"1999","unstructured":"Randen, T., & Husoy, J. H. (1999). Filtering for texture classification: A comparative study. IEEE Transactions on Pattern Analysis and Machine Intelligence, 21(4), 291\u2013310.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1376_CR73","doi-asserted-by":"crossref","unstructured":"Romero, A., Gouiff\u00e8s, M., & Lacassagne, L. (2013). Enhanced local binary covariance matrices (ELBCM) for texture analysis and object tracking. In International conference on computer vision\/computer graphics collaboration techniques and applications (pp. 1\u20138).","DOI":"10.1145\/2466715.2466733"},{"issue":"3","key":"1376_CR74","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., et al. (2015). ImageNet large scale visual recognition challenge. International Journal of Computer Vision, 115(3), 211\u2013252. https:\/\/doi.org\/10.1007\/s11263-015-0816-y.","journal-title":"International Journal of Computer Vision"},{"key":"1376_CR75","volume-title":"Learning with kernels: Support vector machines, regularization, optimization, and beyond","author":"B Sch\u00f6lkopf","year":"2002","unstructured":"Sch\u00f6lkopf, B., Smola, A. J., Bach, F., et al. (2002). Learning with kernels: Support vector machines, regularization, optimization, and beyond. Cambridge: MIT Press."},{"key":"1376_CR76","doi-asserted-by":"crossref","unstructured":"Shahroudy, A., Liu, J., Ng, T. T., & Wang, G. (2016). NTU RGB+D: A large scale dataset for 3D human activity analysis. In IEEE conference on computer vision and pattern recognition (pp. 1010\u20131019). IEEE.","DOI":"10.1109\/CVPR.2016.115"},{"key":"1376_CR77","doi-asserted-by":"publisher","first-page":"1045","DOI":"10.1109\/TPAMI.2017.2691321","volume":"40","author":"A Shahroudy","year":"2017","unstructured":"Shahroudy, A., Ng, T. T., Gong, Y., & Wang, G. (2017). Deep multimodal feature analysis for action recognition in RGB+ d videos. IEEE Transactions on Pattern Analysis and Machine Intelligence, 40, 1045\u20131058.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1376_CR78","doi-asserted-by":"crossref","unstructured":"Shi, L., Zhang, Y., Cheng, J., & Lu, H. (2019). Two-stream adaptive graph convolutional networks for skeleton-based action recognition. In IEEE conference on computer vision and pattern recognition (pp. 12026\u201312035).","DOI":"10.1109\/CVPR.2019.01230"},{"key":"1376_CR79","doi-asserted-by":"crossref","unstructured":"Si, C., Jing, Y., Wang, W., Wang, L., & Tan, T. (2018). Skeleton-based action recognition with spatial reasoning and temporal stack learning. In European conference on computer vision (pp. 103\u2013118).","DOI":"10.1007\/978-3-030-01246-5_7"},{"key":"1376_CR80","unstructured":"Simonyan, K., & Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556."},{"issue":"2","key":"1376_CR81","doi-asserted-by":"publisher","first-page":"875","DOI":"10.1016\/j.neuroimage.2010.08.063","volume":"54","author":"SM Smith","year":"2011","unstructured":"Smith, S. M., Miller, K. L., Salimi-Khorshidi, G., Webster, M., Beckmann, C. F., Nichols, T. E., et al. (2011). Network modelling methods for FMRI. Neuroimage, 54(2), 875\u2013891.","journal-title":"Neuroimage"},{"key":"1376_CR82","doi-asserted-by":"crossref","unstructured":"Song, S., Lan, C., Xing, J., Zeng, W., & Liu, J. (2017). An end-to-end spatio-temporal attention model for human action recognition from skeleton data. In AAAI conference on artificial intelligence (pp. 4263\u20134270).","DOI":"10.1609\/aaai.v31i1.11212"},{"key":"1376_CR83","unstructured":"Sra, S. (2011). Positive definite matrices and the symmetric stein divergence. arXiv preprint arXiv:1110.1773."},{"key":"1376_CR84","doi-asserted-by":"crossref","unstructured":"Sun, H., Zhen, X., Zheng, Y., Yang, G., Yin, Y., & Li, S. (2017). Learning deep match kernels for image-set classification. In IEEE conference on computer vision and pattern recognition (pp. 3307\u20133316).","DOI":"10.1109\/CVPR.2017.661"},{"key":"1376_CR85","doi-asserted-by":"crossref","unstructured":"Tabia, H., Laga, H., Picard, D., & Gosselin, P. H. (2014). Covariance descriptors for 3D shape matching and retrieval. In IEEE conference on computer vision and pattern recognition (pp. 4185\u20134192). IEEE.","DOI":"10.1109\/CVPR.2014.533"},{"key":"1376_CR86","doi-asserted-by":"crossref","unstructured":"Tuzel, O., Porikli, F., & Meer, P. (2006). Region covariance: A fast descriptor for detection and classification. In European conference on computer vision (pp. 589\u2013600). Springer.","DOI":"10.1007\/11744047_45"},{"issue":"10","key":"1376_CR87","doi-asserted-by":"publisher","first-page":"1713","DOI":"10.1109\/TPAMI.2008.75","volume":"30","author":"O Tuzel","year":"2008","unstructured":"Tuzel, O., Porikli, F., & Meer, P. (2008). Pedestrian detection via classification on riemannian manifolds. IEEE Transactions on Pattern Analysis and Machine Intelligence, 30(10), 1713\u20131727.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1376_CR88","doi-asserted-by":"crossref","unstructured":"Vedaldi, A., & Lenc, K. (2015). Matconvnet \u2013 convolutional neural networks for matlab. In ACM international conference on multimedia.","DOI":"10.1145\/2733373.2807412"},{"issue":"3","key":"1376_CR89","doi-asserted-by":"publisher","first-page":"480","DOI":"10.1109\/TPAMI.2011.153","volume":"34","author":"A Vedaldi","year":"2012","unstructured":"Vedaldi, A., & Zisserman, A. (2012). Efficient additive kernels via explicit feature maps. IEEE Transactions on Pattern Analysis and Machine Intelligence, 34(3), 480\u2013492.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1376_CR90","doi-asserted-by":"crossref","unstructured":"Vemulapalli, R., Arrate, F., & Chellappa, R. (2014). Human action recognition by representing 3D skeletons as points in a lie group. In IEEE conference on computer vision and pattern recognition (pp. 588\u2013595). IEEE.","DOI":"10.1109\/CVPR.2014.82"},{"key":"1376_CR91","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1109\/TIP.2019.2925285","volume":"29","author":"L Wang","year":"2019","unstructured":"Wang, L., Huynh, D. Q., & Koniusz, P. (2019a). A comparative review of recent kinect-based action recognition algorithms. IEEE Transactions on Image Processing, 29, 15\u201328.","journal-title":"IEEE Transactions on Image Processing"},{"key":"1376_CR92","doi-asserted-by":"crossref","unstructured":"Wang, L., Zhang, J., Zhou, L., Tang, C., & Li, W. (2015a). Beyond covariance: Feature representation with nonlinear kernel matrices. In IEEE international conference on computer vision (pp. 4570\u20134578). IEEE.","DOI":"10.1109\/ICCV.2015.519"},{"key":"1376_CR93","doi-asserted-by":"crossref","unstructured":"Wang, Q., Li, P., Hu, Q., Zhu, P., & Zuo, W. (2019b). Deep global generalized Gaussian networks. In IEEE conference on computer vision and pattern recognition (pp. 5080\u20135088). IEEE.","DOI":"10.1109\/CVPR.2019.00522"},{"key":"1376_CR94","doi-asserted-by":"crossref","unstructured":"Wang, Q., Xie, J., Zuo, W., Zhang, L., & Li, P. (2019c). Deep CNNS meet global covariance pooling: Better representation and generalization. arXiv preprint arXiv:1904.06836.","DOI":"10.1109\/TPAMI.2020.2974833"},{"key":"1376_CR95","unstructured":"Wang, R., Guo, H., & Davis, L.S. (2012). Covariance discriminative learning: A natural and efficient approach to image set classification. In IEEE conference on computer vision and pattern recognition (pp. 2496\u20132503). IEEE."},{"key":"1376_CR96","doi-asserted-by":"crossref","unstructured":"Wang, W., Wang, R., Huang, Z., Shan, S., & Chen, X. (2015b). Discriminant analysis on Riemannian manifold of Gaussian distributions for face recognition with image sets. In IEEE conference on computer vision and pattern recognition (pp. 2048\u20132057). IEEE.","DOI":"10.1109\/CVPR.2015.7298816"},{"key":"1376_CR97","doi-asserted-by":"crossref","unstructured":"Wei, Z.&, Hoai, M. (2016). Region ranking SVM for image classification. In IEEE conference on computer vision and pattern recognition (pp. 2987\u20132996). IEEE.","DOI":"10.1109\/CVPR.2016.326"},{"key":"1376_CR98","unstructured":"Wei, Y., Xia, W., Huang, J., Ni, B., Dong, J., Zhao, Y., et\u00a0al. (2014). CNN: Single-label to multi-label. arXiv preprint arXiv:1406.5726."},{"key":"1376_CR99","doi-asserted-by":"crossref","unstructured":"Wolf, L., Hassner, T., & Maoz, I. (2011). Face recognition in unconstrained videos with matched background similarity. In IEEE conference on computer vision and pattern recognition (pp. 529\u2013534). IEEE.","DOI":"10.1109\/CVPR.2011.5995566"},{"issue":"1","key":"1376_CR100","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1007\/s10044-014-0430-6","volume":"18","author":"Y Wu","year":"2015","unstructured":"Wu, Y., Ma, B., & Jia, Y. (2015). Differential tracking with a kernel-based region covariance descriptor. Pattern Anal Appl, 18(1), 45\u201359. https:\/\/doi.org\/10.1007\/s10044-014-0430-6.","journal-title":"Pattern Anal Appl"},{"key":"1376_CR101","doi-asserted-by":"crossref","unstructured":"Yan, S., Xiong, Y., & Lin, D. (2018). Spatial temporal graph convolutional networks for skeleton-based action recognition. AAAI conference on artificial,. intelligence.","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"1376_CR102","doi-asserted-by":"crossref","unstructured":"Yang, X., & Tian, Y. (2014). Super normal vector for activity recognition using depth sequences. In IEEE conference on computer vision and pattern recognition (pp. 804\u2013811). IEEE.","DOI":"10.1109\/CVPR.2014.108"},{"key":"1376_CR103","doi-asserted-by":"crossref","unstructured":"Yuan, C., Hu, W., Li, X., Maybank, S. J., & Luo, G. (2009). Human action recognition under log-Euclidean Riemannian metric. In Asian conference on computer vision (pp. 343\u2013353).","DOI":"10.1007\/978-3-642-12307-8_32"},{"key":"1376_CR104","doi-asserted-by":"crossref","unstructured":"Yun, K., Honorio, J., Chattopadhyay, D., Berg, T. L., & Samaras, D. (2012a). Two-person interaction detection using body-pose features and multiple instance learning. In IEEE conference on computer vision and pattern recognition workshops (pp. 28\u201335). IEEE.","DOI":"10.1109\/CVPRW.2012.6239234"},{"key":"1376_CR105","doi-asserted-by":"crossref","unstructured":"Yun, K., Honorio, J., Chattopadhyay, D., Berg, T. L, & Samaras, D. (2012b). Two-person interaction detection using body-pose features and multiple instance learning. In IEEE computer society conference on computer vision and pattern recognition workshops (pp. 28\u201335). IEEE.","DOI":"10.1109\/CVPRW.2012.6239234"},{"key":"1376_CR106","doi-asserted-by":"crossref","unstructured":"Zhang, P., Lan, C., Xing, J., Zeng, W., Xue, J., & Zheng, N. (2017a). View adaptive recurrent neural networks for high performance human action recognition from skeleton data. In IEEE international conference on computer vision (pp. 2136\u20132145). IEEE.","DOI":"10.1109\/ICCV.2017.233"},{"key":"1376_CR107","doi-asserted-by":"crossref","unstructured":"Zhang, S., Liu, X., & Xiao, J. (2017b). On geometric features for skeleton-based action recognition using multilayer lstm networks. In IEEE winter conference on applications of computer vision (pp. 148\u2013157). IEEE.","DOI":"10.1109\/WACV.2017.24"},{"key":"1376_CR108","doi-asserted-by":"crossref","unstructured":"Zhu, W., Lan, C., Xing, J., Zeng, W., Li, Y., Shen, L., et\u00a0al. (2016). Co-occurrence feature learning for skeleton based action recognition using regularized deep LSTM networks. In AAAI conference on artificial intelligence (pp. 3697\u20133703).","DOI":"10.1609\/aaai.v30i1.10451"},{"key":"1376_CR109","doi-asserted-by":"crossref","unstructured":"Zunino, A., Cavazza, J., & Murino, V. (2017). Revisiting human action recognition: Personalization vs. generalization. In International conference on image analysis and processing (pp. 469\u2013480). Springer.","DOI":"10.1007\/978-3-319-68560-1_42"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-020-01376-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-020-01376-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-020-01376-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,17]],"date-time":"2022-11-17T17:16:25Z","timestamp":1668705385000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-020-01376-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9,10]]},"references-count":109,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2021,2]]}},"alternative-id":["1376"],"URL":"https:\/\/doi.org\/10.1007\/s11263-020-01376-1","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"type":"print","value":"0920-5691"},{"type":"electronic","value":"1573-1405"}],"subject":[],"published":{"date-parts":[[2020,9,10]]},"assertion":[{"value":"9 May 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 August 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 September 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}