{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T00:22:38Z","timestamp":1775607758022,"version":"3.50.1"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030012366","type":"print"},{"value":"9783030012373","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01237-3_7","type":"book-chapter","created":{"date-parts":[[2018,10,6]],"date-time":"2018-10-06T14:42:18Z","timestamp":1538836938000},"page":"106-121","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":131,"title":["Modality Distillation with Multiple Stream Networks for Action Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6371-3310","authenticated-orcid":false,"given":"Nuno C.","family":"Garcia","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5259-1496","authenticated-orcid":false,"given":"Pietro","family":"Morerio","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8645-2328","authenticated-orcid":false,"given":"Vittorio","family":"Murino","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,10,7]]},"reference":[{"key":"7_CR1","unstructured":"Ba, L.J., Caruana, R.: Do deep nets really need to be deep? In: Proceedings of Advances in Neural Information Processing Systems (NIPS) (2014)"},{"key":"7_CR2","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"7_CR3","doi-asserted-by":"crossref","unstructured":"Dalal, N., Triggs, B.: Histograms of oriented gradients for human detection. In: IEEE Computer Society Conference on Computer Vision and Pattern Recognition, CVPR 2005, vol. 1, pp. 886\u2013893. IEEE (2005)","DOI":"10.1109\/CVPR.2005.177"},{"key":"7_CR4","doi-asserted-by":"crossref","unstructured":"Eitel, A., Springenberg, J.T., Spinello, L., Riedmiller, M., Burgard, W.: Multimodal deep learning for robust RGB-D object recognition. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 681\u2013687. IEEE (2015)","DOI":"10.1109\/IROS.2015.7353446"},{"key":"7_CR5","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Wildes, R.P.: Spatiotemporal multiplier networks for video action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4768\u20134777 (2017)","DOI":"10.1109\/CVPR.2017.787"},{"key":"7_CR6","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Zisserman, A.: Convolutional two-stream network fusion for video action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1933\u20131941 (2016)","DOI":"10.1109\/CVPR.2016.213"},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Gkioxari, G., Malik, J.: Finding action tubes. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 759\u2013768. IEEE (2015)","DOI":"10.1109\/CVPR.2015.7298676"},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"7_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"630","DOI":"10.1007\/978-3-319-46493-0_38","volume-title":"Computer Vision \u2013 ECCV 2016","author":"K He","year":"2016","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Identity mappings in deep residual networks. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 630\u2013645. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_38"},{"key":"7_CR10","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. In: Deep Learning and Representation Learning Workshop: NIPS 2014 (2014)"},{"key":"7_CR11","doi-asserted-by":"crossref","unstructured":"Hoffman, J., Gupta, S., Darrell, T.: Learning with side information through modality hallucination. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 826\u2013834 (2016)","DOI":"10.1109\/CVPR.2016.96"},{"key":"7_CR12","doi-asserted-by":"crossref","unstructured":"Karpathy, A., Toderici, G., Shetty, S., Leung, T., Sukthankar, R., Fei-Fei, L.: Large-scale video classification with convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1725\u20131732 (2014)","DOI":"10.1109\/CVPR.2014.223"},{"key":"7_CR13","doi-asserted-by":"crossref","unstructured":"Laptev, I., Marszalek, M., Schmid, C., Rozenfeld, B.: Learning realistic human actions from movies. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2008, pp. 1\u20138. IEEE (2008)","DOI":"10.1109\/CVPR.2008.4587756"},{"key":"7_CR14","doi-asserted-by":"crossref","unstructured":"Liu, J., Akhtar, N., Mian, A.: Viewpoint invariant action recognition using RGB-D videos. arXiv preprint arXiv:1709.05087 (2017)","DOI":"10.1109\/DICTA.2017.8227505"},{"key":"7_CR15","unstructured":"Lopez-Paz, D., Bottou, L., Sch\u00f6lkopf, B., Vapnik, V.: Unifying distillation and privileged information. In: Proceedings of the International Conference on Learning Representations (ICLR) (2016)"},{"key":"7_CR16","doi-asserted-by":"crossref","unstructured":"Luo, Z., Jiang, L., Hsieh, J.T., Niebles, J.C., Fei-Fei, L.: Graph distillation for action detection with privileged information. arXiv preprint arXiv:1712.00108 (2017)","DOI":"10.1007\/978-3-030-01264-9_11"},{"key":"7_CR17","doi-asserted-by":"crossref","unstructured":"Luo, Z., Peng, B., Huang, D.A., Alahi, A., Fei-Fei, L.: Unsupervised learning of long-term motion dynamics for videos. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR). No. EPFL-CONF-230240 (2017)","DOI":"10.1109\/CVPR.2017.751"},{"issue":"6","key":"7_CR18","doi-asserted-by":"publisher","first-page":"1731","DOI":"10.1109\/JSEN.2014.2309987","volume":"14","author":"T Mallick","year":"2014","unstructured":"Mallick, T., Das, P.P., Majumdar, A.K.: Characterizations of noise in kinect depth images: a review. IEEE Sens. J. 14(6), 1731\u20131740 (2014). https:\/\/doi.org\/10.1109\/JSEN.2014.2309987","journal-title":"IEEE Sens. J."},{"key":"7_CR19","doi-asserted-by":"crossref","unstructured":"Ohn-Bar, E., Trivedi, M.M.: Joint angles similarities and HOG2 for action recognition. In: IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), pp. 465\u2013470. IEEE (2013)","DOI":"10.1109\/CVPRW.2013.76"},{"key":"7_CR20","doi-asserted-by":"crossref","unstructured":"Rahmani, H., Bennamoun, M.: Learning action recognition model from depth and skeleton videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5832\u20135841 (2017)","DOI":"10.1109\/ICCV.2017.621"},{"issue":"12","key":"7_CR21","doi-asserted-by":"publisher","first-page":"2430","DOI":"10.1109\/TPAMI.2016.2533389","volume":"38","author":"H Rahmani","year":"2016","unstructured":"Rahmani, H., Mahmood, A., Huynh, D., Mian, A.: Histogram of oriented principal components for cross-view action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 38(12), 2430\u20132443 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"3","key":"7_CR22","doi-asserted-by":"publisher","first-page":"667","DOI":"10.1109\/TPAMI.2017.2691768","volume":"40","author":"H Rahmani","year":"2018","unstructured":"Rahmani, H., Mian, A., Shah, M.: Learning a deep model for human action recognition from novel viewpoints. IEEE Trans. Pattern Anal. Mach. Intell. 40(3), 667\u2013681 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"7_CR23","doi-asserted-by":"crossref","unstructured":"Shahroudy, A., Liu, J., Ng, T.T., Wang, G.: NTU RGB+D: A large scale dataset for 3D human activity analysis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1010\u20131019 (2016)","DOI":"10.1109\/CVPR.2016.115"},{"issue":"5","key":"7_CR24","doi-asserted-by":"publisher","first-page":"1045","DOI":"10.1109\/TPAMI.2017.2691321","volume":"40","author":"A Shahroudy","year":"2018","unstructured":"Shahroudy, A., Ng, T.T., Gong, Y., Wang, G.: Deep multimodal feature analysis for action recognition in RGB+D videos. IEEE Trans. Pattern Anal. Mach. Intell. 40(5), 1045\u20131058 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"7_CR25","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: Advances in Neural Information Processing Systems, pp. 568\u2013576 (2014)"},{"key":"7_CR26","doi-asserted-by":"crossref","unstructured":"Soo Kim, T., Reiter, A.: Interpretable 3D human action analysis with temporal convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 20\u201328 (2017)","DOI":"10.1109\/CVPRW.2017.207"},{"key":"7_CR27","doi-asserted-by":"crossref","unstructured":"Sun, L., Jia, K., Yeung, D.Y., Shi, B.E.: Human action recognition using factorized spatio-temporal convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4597\u20134605 (2015)","DOI":"10.1109\/ICCV.2015.522"},{"key":"7_CR28","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3D convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4489\u20134497 (2015)","DOI":"10.1109\/ICCV.2015.510"},{"key":"7_CR29","doi-asserted-by":"crossref","unstructured":"Tran, D., Wang, H., Torresani, L., Ray, J., LeCun, Y., Paluri, M.: A closer look at spatiotemporal convolutions for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6450\u20136459 (2018)","DOI":"10.1109\/CVPR.2018.00675"},{"issue":"5","key":"7_CR30","doi-asserted-by":"publisher","first-page":"544","DOI":"10.1016\/j.neunet.2009.06.042","volume":"22","author":"V Vapnik","year":"2009","unstructured":"Vapnik, V., Vashist, A.: A new learning paradigm: learning using privileged information. Neural Netw. 22(5), 544\u2013557 (2009)","journal-title":"Neural Netw."},{"key":"7_CR31","doi-asserted-by":"crossref","unstructured":"Wang, H., Kl\u00e4ser, A., Schmid, C., Liu, C.L.: Action recognition by dense trajectories. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3169\u20133176. IEEE (2011)","DOI":"10.1109\/CVPR.2011.5995407"},{"key":"7_CR32","doi-asserted-by":"crossref","unstructured":"Wang, H., Schmid, C.: Action recognition with improved trajectories. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3551\u20133558 (2013)","DOI":"10.1109\/ICCV.2013.441"},{"key":"7_CR33","doi-asserted-by":"crossref","unstructured":"Wang, J., Nie, X., Xia, Y., Wu, Y., Zhu, S.C.: Cross-view action modeling, learning and recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2649\u20132656 (2014)","DOI":"10.1109\/CVPR.2014.339"},{"key":"7_CR34","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. arXiv preprint arXiv:1711.07971 (2017)","DOI":"10.1109\/CVPR.2018.00813"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01237-3_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T18:48:30Z","timestamp":1775242110000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01237-3_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012366","9783030012373"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01237-3_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"7 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}