{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T13:59:41Z","timestamp":1762955981389,"version":"3.40.3"},"publisher-location":"Cham","reference-count":60,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030012243"},{"type":"electronic","value":"9783030012250"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01225-0_42","type":"book-chapter","created":{"date-parts":[[2018,10,8]],"date-time":"2018-10-08T08:39:54Z","timestamp":1538987994000},"page":"716-733","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["Learning Discriminative Video Representations Using Adversarial Perturbations"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8546-4522","authenticated-orcid":false,"given":"Jue","family":"Wang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5566-0351","authenticated-orcid":false,"given":"Anoop","family":"Cherian","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,10,6]]},"reference":[{"key":"42_CR1","volume-title":"Optimization Algorithms on Matrix Manifolds","author":"PA Absil","year":"2009","unstructured":"Absil, P.A., Mahony, R., Sepulchre, R.: Optimization Algorithms on Matrix Manifolds. Princeton University Press, Princeton (2009)"},{"key":"42_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1007\/978-3-642-25446-8_4","volume-title":"Human Behavior Understanding","author":"M Baccouche","year":"2011","unstructured":"Baccouche, M., Mamalet, F., Wolf, C., Garcia, C., Baskurt, A.: Sequential deep learning for human action recognition. In: Salah, A.A., Lepri, B. (eds.) HBU 2011. LNCS, vol. 7065, pp. 29\u201339. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-25446-8_4"},{"key":"42_CR3","unstructured":"Ballas, N., Yao, L., Pal, C., Courville, A.: Delving deeper into convolutional networks for learning video representations. In: ICLR (2016)"},{"key":"42_CR4","doi-asserted-by":"crossref","unstructured":"Bilen, H., Fernando, B., Gavves, E., Vedaldi, A.: Action recognition with dynamic image networks. PAMI (2017)","DOI":"10.1109\/TPAMI.2017.2769085"},{"key":"42_CR5","doi-asserted-by":"crossref","unstructured":"Bilen, H., Fernando, B., Gavves, E., Vedaldi, A., Gould, S.: Dynamic image networks for action recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.331"},{"key":"42_CR6","volume-title":"An Introduction to Differentiable Manifolds and Riemannian Geometry","author":"WM Boothby","year":"1986","unstructured":"Boothby, W.M.: An Introduction to Differentiable Manifolds and Riemannian Geometry, vol. 120. Academic press, Orlando (1986)"},{"issue":"1","key":"42_CR7","first-page":"1455","volume":"15","author":"N Boumal","year":"2014","unstructured":"Boumal, N., Mishra, B., Absil, P.A., Sepulchre, R.: Manopt, a matlab toolbox for optimization on manifolds. JMLR 15(1), 1455\u20131459 (2014)","journal-title":"JMLR"},{"key":"42_CR8","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: CVPR, July 2017","DOI":"10.1109\/CVPR.2017.502"},{"issue":"8","key":"42_CR9","doi-asserted-by":"publisher","first-page":"1617","DOI":"10.1109\/TPAMI.2016.2608901","volume":"39","author":"X Chang","year":"2017","unstructured":"Chang, X., Yu, Y.L., Yang, Y., Xing, E.P.: Semantic pooling for complex event analysis in untrimmed videos. PAMI 39(8), 1617\u20131632 (2017)","journal-title":"PAMI"},{"key":"42_CR10","doi-asserted-by":"crossref","unstructured":"Cherian, A., Fernando, B., Harandi, M., Gould, S.: Generalized rank pooling for activity recognition. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.172"},{"key":"42_CR11","doi-asserted-by":"crossref","unstructured":"Cherian, A., Sra, S., Gould, S., Hartley, R.: Non-linear temporal subspace representations for activity recognition. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00234"},{"key":"42_CR12","volume-title":"Fundamental Methods of Mathematical Economics","author":"AC Chiang","year":"1984","unstructured":"Chiang, A.C.: Fundamental Methods of Mathematical Economics. McGraw-Hill, New York (1984)"},{"key":"42_CR13","doi-asserted-by":"crossref","unstructured":"Donahue, J., et al.: Long-term recurrent convolutional networks for visual recognition and description. In: CVPR (2015)","DOI":"10.21236\/ADA623249"},{"key":"42_CR14","unstructured":"Du, Y., Wang, W., Wang, L.: Hierarchical recurrent neural network for skeleton based action recognition. In: CVPR (2015)"},{"key":"42_CR15","volume-title":"Three-Dimensional Computer Vision: A Geometric Viewpoint","author":"O Faugeras","year":"1993","unstructured":"Faugeras, O.: Three-Dimensional Computer Vision: A Geometric Viewpoint. MIT press, Cambridge (1993)"},{"key":"42_CR16","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Wildes, R.: Spatiotemporal residual networks for video action recognition. In: NIPS (2016)","DOI":"10.1109\/CVPR.2017.787"},{"key":"42_CR17","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Wildes, R.P.: Spatiotemporal multiplier networks for video action recognition. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.787"},{"key":"42_CR18","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Wildes, R.P.: Temporal residual networks for dynamic scene recognition. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.786"},{"key":"42_CR19","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Zisserman, A.: Convolutional two-stream network fusion for video action recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.213"},{"key":"42_CR20","doi-asserted-by":"crossref","unstructured":"Fernando, B., Anderson, P., Hutter, M., Gould, S.: Discriminative hierarchical rank pooling for activity recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.212"},{"key":"42_CR21","doi-asserted-by":"crossref","unstructured":"Fernando, B., Gavves, E., Oramas, J.M., Ghodrati, A., Tuytelaars, T.: Modeling video evolution for action recognition. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7299176"},{"key":"42_CR22","unstructured":"Fernando, B., Gould, S.: Learning end-to-end video classification with rank-pooling. In: ICML (2016)"},{"key":"42_CR23","unstructured":"Gould, S., Fernando, B., Cherian, A., Anderson, P., Cruz, R.S., Guo, E.: On differentiating parameterized argmin and argmax problems with application to bi-level optimization. arXiv preprint arXiv:1607.05447 (2016)"},{"issue":"1","key":"42_CR24","doi-asserted-by":"publisher","first-page":"170","DOI":"10.1137\/030601880","volume":"16","author":"WW Hager","year":"2005","unstructured":"Hager, W.W., Zhang, H.: A new conjugate gradient method with guaranteed descent and an efficient line search. SIAM J. Optim. 16(1), 170\u2013192 (2005)","journal-title":"SIAM J. Optim."},{"key":"42_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1007\/978-3-319-10605-2_2","volume-title":"Computer Vision \u2013 ECCV 2014","author":"MT Harandi","year":"2014","unstructured":"Harandi, M.T., Salzmann, M., Hartley, R.: From manifold to manifold: geometry-aware dimensionality reduction for SPD matrices. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8690, pp. 17\u201332. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10605-2_2"},{"key":"42_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"408","DOI":"10.1007\/978-3-319-10584-0_27","volume-title":"Computer Vision \u2013 ECCV 2014","author":"MT Harandi","year":"2014","unstructured":"Harandi, M.T., Salzmann, M., Jayasumana, S., Hartley, R., Li, H.: Expanding the family of grassmannian kernels: an embedding perspective. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8695, pp. 408\u2013423. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10584-0_27"},{"issue":"4","key":"42_CR27","doi-asserted-by":"publisher","first-page":"713","DOI":"10.1109\/TPAMI.2014.2353635","volume":"37","author":"M Hayat","year":"2015","unstructured":"Hayat, M., Bennamoun, M., An, S.: Deep reconstruction models for image set classification. PAMI 37(4), 713\u2013727 (2015)","journal-title":"PAMI"},{"key":"42_CR28","doi-asserted-by":"crossref","unstructured":"Huang, Z., Wang, R., Shan, S., Chen, X.: Projection metric learning on GDrassmann manifold with application to video based face recognition. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298609"},{"key":"42_CR29","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Jhuang, H., Garrote, E., Poggio, T., Serre, T.: HMDB: a large video database for human motion recognition. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"42_CR30","doi-asserted-by":"crossref","unstructured":"Lee, I., Kim, D., Kang, S., Lee, S.: Ensemble deep learning for skeleton-based action recognition using temporal sliding LSTM networks. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.115"},{"key":"42_CR31","doi-asserted-by":"crossref","unstructured":"Li, Q., Qiu, Z., Yao, T., Mei, T., Rui, Y., Luo, J.: Action recognition by learning deep multi-granular spatio-temporal video representation. In: ICMR (2016)","DOI":"10.1145\/2911996.2912001"},{"key":"42_CR32","doi-asserted-by":"crossref","unstructured":"Liu, J., Shahroudy, A., Xu, D., Kot, A.C., Wang, G.: Skeleton-based action recognition using spatio-temporal LSTM network with trust gates. arXiv preprint arXiv:1706.08276 (2017)","DOI":"10.1109\/TPAMI.2017.2771306"},{"key":"42_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"816","DOI":"10.1007\/978-3-319-46487-9_50","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Liu","year":"2016","unstructured":"Liu, J., Shahroudy, A., Xu, D., Wang, G.: Spatio-temporal LSTM with trust gates for 3D human action recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9907, pp. 816\u2013833. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46487-9_50"},{"key":"42_CR34","doi-asserted-by":"crossref","unstructured":"Lu, J., Issaranon, T., Forsyth, D.: SafetyNet: detecting and rejecting adversarial examples robustly. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.56"},{"key":"42_CR35","doi-asserted-by":"crossref","unstructured":"Moosavi-Dezfooli, S.M., Fawzi, A., Fawzi, O., Frossard, P.: Universal adversarial perturbations (2017)","DOI":"10.1109\/CVPR.2017.17"},{"key":"42_CR36","doi-asserted-by":"crossref","unstructured":"Oh, S.J., Fritz, M., Schiele, B.: Adversarial image perturbation for privacy protection-a game theory perspective. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.165"},{"key":"42_CR37","unstructured":"Pascanu, R., Mikolov, T., Bengio, Y.: On the difficulty of training recurrent neural networks. In: ICML (2013)"},{"key":"42_CR38","doi-asserted-by":"crossref","unstructured":"Sadanand, S., Corso, J.J.: Action bank: a high-level representation of activity in video. In: CVPR (2012)","DOI":"10.1109\/CVPR.2012.6247806"},{"key":"42_CR39","doi-asserted-by":"crossref","unstructured":"Shahroudy, A., Liu, J., Ng, T.T., Wang, G.: NTU RGB+ D: a large scale dataset for 3d human activity analysis. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.115"},{"key":"42_CR40","doi-asserted-by":"crossref","unstructured":"Shi, Y., Tian, Y., Wang, Y., Zeng, W., Huang, T.: Learning long-term dependencies for action recognition with a biologically-inspired deep network. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.84"},{"key":"42_CR41","unstructured":"Simonyan, K., Vedaldi, A., Zisserman, A.: Deep inside convolutional networks: Visualising image classification models and saliency maps. arXiv preprint arXiv:1312.6034 (2013)"},{"key":"42_CR42","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: NIPS (2014)"},{"key":"42_CR43","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"issue":"3","key":"42_CR44","first-page":"113","volume":"3","author":"ST Smith","year":"1994","unstructured":"Smith, S.T.: Optimization techniques on riemannian manifolds. Fields Inst. Commun. 3(3), 113\u2013135 (1994)","journal-title":"Fields Inst. Commun."},{"key":"42_CR45","doi-asserted-by":"crossref","unstructured":"Soo Kim, T., Reiter, A.: Interpretable 3d human action analysis with temporal convolutional networks. In: CVPR Workshops (2017)","DOI":"10.1109\/CVPRW.2017.207"},{"key":"42_CR46","unstructured":"Srivastava, N., Mansimov, E., Salakhutdinov, R.: Unsupervised learning of video representations using LSTMs. In: ICML (2015)"},{"key":"42_CR47","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"202","DOI":"10.1007\/978-3-319-46493-0_13","volume-title":"Computer Vision \u2013 ECCV 2016","author":"B Su","year":"2016","unstructured":"Su, B., Zhou, J., Ding, X., Wang, H., Wu, Y.: Hierarchical dynamic parsing and encoding for action recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 202\u2013217. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_13"},{"key":"42_CR48","doi-asserted-by":"crossref","unstructured":"Sun, C., Nevatia, R.: Discover: discovering important segments for classification of video events and recounting. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.329"},{"key":"42_CR49","doi-asserted-by":"crossref","unstructured":"Sun, L., Jia, K., Chen, K., Yeung, D.Y., Shi, B.E., Savarese, S.: Lattice long short-term memory for human action recognition. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.236"},{"key":"42_CR50","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3d convolutional networks. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.510"},{"issue":"1","key":"42_CR51","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1007\/s11263-012-0594-8","volume":"103","author":"H Wang","year":"2013","unstructured":"Wang, H., Kl\u00e4ser, A., Schmid, C., Liu, C.L.: Dense trajectories and motion boundary descriptors for action recognition. IJCV 103(1), 60\u201379 (2013)","journal-title":"IJCV"},{"key":"42_CR52","doi-asserted-by":"crossref","unstructured":"Wang, H., Schmid, C.: Action recognition with improved trajectories. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.441"},{"key":"42_CR53","doi-asserted-by":"crossref","unstructured":"Wang, J., Cherian, A., Porikli, F.: Ordered pooling of optical flow sequences for action recognition. In: WACV. IEEE (2017)","DOI":"10.1109\/WACV.2017.26"},{"key":"42_CR54","doi-asserted-by":"crossref","unstructured":"Wang, J., Cherian, A., Porikli, F., Gould, S.: Video representation learning using discriminative pooling. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00126"},{"key":"42_CR55","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/978-3-319-46484-8_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"L Wang","year":"2016","unstructured":"Wang, L., et al.: Temporal segment networks: towards good practices for deep action recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 20\u201336. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_2"},{"key":"42_CR56","doi-asserted-by":"crossref","unstructured":"Xie, C., Wang, J., Zhang, Z., Zhou, Y., Xie, L., Yuille, A.: Adversarial examples for semantic segmentation and object detection. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.153"},{"key":"42_CR57","doi-asserted-by":"crossref","unstructured":"Yue-Hei Ng, J., Hausknecht, M., Vijayanarasimhan, S., Vinyals, O., Monga, R., Toderici, G.: Beyond short snippets: deep networks for video classification. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7299101"},{"key":"42_CR58","doi-asserted-by":"crossref","unstructured":"Zhang, J., Zhang, T., Dai, Y., Harandi, M., Hartley, R.: Deep unsupervised saliency detection: a multiple noisy labeling perspective. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00941"},{"key":"42_CR59","doi-asserted-by":"crossref","unstructured":"Zhang, P., Lan, C., Xing, J., Zeng, W., Xue, J., Zheng, N.: View adaptive recurrent neural networks for high performance human action recognition from skeleton data. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.233"},{"key":"42_CR60","doi-asserted-by":"crossref","unstructured":"Zhu, W., et al.: Co-occurrence feature learning for skeleton based action recognition using regularized deep LSTM networks. In: AAAI (2016)","DOI":"10.1609\/aaai.v30i1.10451"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01225-0_42","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,8]],"date-time":"2022-10-08T00:16:50Z","timestamp":1665188210000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01225-0_42"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012243","9783030012250"],"references-count":60,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01225-0_42","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"6 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}