{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T04:10:49Z","timestamp":1750997449045,"version":"3.41.0"},"publisher-location":"Cham","reference-count":51,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319576862"},{"type":"electronic","value":"9783319576879"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-57687-9_5","type":"book-chapter","created":{"date-parts":[[2017,10,15]],"date-time":"2017-10-15T10:32:59Z","timestamp":1508063579000},"page":"103-124","source":"Crossref","is-referenced-by-count":1,"title":["Saliency Prediction for Action Recognition"],"prefix":"10.1007","author":[{"given":"Michael","family":"Dorr","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eleonora","family":"Vig","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,10,16]]},"reference":[{"key":"5_CR1","doi-asserted-by":"crossref","unstructured":"Agtzidis, I., Startsev, M., Dorr, M.: Smooth pursuit detection based on multiple observers. In: Proceedings of the Ninth Biennial ACM Symposium on Eye Tracking Research & Applications, ETRA\u201916, pp. 303\u2013306. ACM, New York (2016)","DOI":"10.1145\/2857491.2857521"},{"issue":"1","key":"5_CR2","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1109\/TPAMI.2012.89","volume":"35","author":"A. Borji","year":"2013","unstructured":"Borji, A., Itti, L.: State-of-the-art in visual attention modeling. IEEE Trans. Pattern Anal. Mach. Intell. 35(1), 185\u2013207 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"5_CR3","unstructured":"Buso, V., Benois-Pineau, J., Gonz\u00e1lez-D\u00edaz, I.: Object recognition in egocentric videos with saliency-based non uniform sampling and variable resolution space for features selection. In:\u00a0CVPR 2014 Egocentric (First-Person) Vision Workshop (2014)"},{"key":"5_CR4","volume-title":"MIT Saliency Benchmark","author":"Z. Bylinskii","year":"2016","unstructured":"Bylinskii, Z., Judd, T., Borji, A., Itti, L., Durand, F., Oliva, A., Torralba, A.: MIT Saliency Benchmark (2016). http:\/\/saliency.mit.edu"},{"key":"5_CR5","unstructured":"Bylinskii, Z., Judd, T., Oliva, A., Torralba, A., Durand, F.: What do different evaluation metrics tell us about saliency models? arXiv preprint arXiv:1604.03605 (2016)"},{"issue":"3","key":"5_CR6","doi-asserted-by":"crossref","first-page":"6","DOI":"10.1167\/9.3.6","volume":"9","author":"M.S. Castelhano","year":"2009","unstructured":"Castelhano, M.S., Mack, M.L., Henderson, J.M.: Viewing task influences eye movement control during active scene perception. J. Vis. 9(3), 6 (2009)","journal-title":"J. Vis."},{"key":"5_CR7","doi-asserted-by":"crossref","unstructured":"Cerf, M., Frady, P., Koch, C.: Faces and text attract gaze independent of the task: experimental data and computer model. J. Vis. 9(12:10), 1\u201315 (2009)","DOI":"10.1167\/9.12.10"},{"key":"5_CR8","volume-title":"Visual Content Indexing and Retrieval with Psycho-Visual Models","author":"S. Chaabouni","year":"2017","unstructured":"Chaabouni, S., Benois-Pineau, J., Zemmari, A., Amar, C.B.: Deep saliency: prediction of interestingness in video with CNN. In: Benois-Pineau, J., Le Callet, P. (eds.) Visual Content Indexing and Retrieval with Psycho-Visual Models. Springer, Cham (2017)"},{"key":"5_CR9","doi-asserted-by":"crossref","unstructured":"Ciregan, D., Meier, U., Schmidhuber, J.: Multi-column deep neural networks for image classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3642\u20133649 (2012)","DOI":"10.1109\/CVPR.2012.6248110"},{"key":"5_CR10","doi-asserted-by":"crossref","unstructured":"Donahue, J., Anne\u00a0Hendricks, L., Guadarrama, S., Rohrbach, M., Venugopalan, S., Saenko, K., Darrell, T.: Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp.\u00a02625\u20132634 (2015)","DOI":"10.1109\/CVPR.2015.7298878"},{"issue":"10","key":"5_CR11","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1167\/10.10.28","volume":"10","author":"M. Dorr","year":"2010","unstructured":"Dorr, M., Martinetz, T., Gegenfurtner, K., Barth, E.: Variability of eye movements when viewing dynamic natural scenes. J. Vis. 10(10), 1\u201317 (2010)","journal-title":"J. Vis."},{"key":"5_CR12","doi-asserted-by":"crossref","unstructured":"de\u00a0Souza, C.R., Gaidon, A., Vig, E., L\u00f3pez, A.M.: Sympathy for the details: Dense trajectories and hybrid classification architectures for action recognition. In: Proceedings of the European Conference on Computer Vision, pp. 697\u2013716. Springer, Cham (2016)","DOI":"10.1007\/978-3-319-46478-7_43"},{"key":"5_CR13","first-page":"226","volume":"96","author":"M. Ester","year":"1996","unstructured":"Ester, M., Kriegel, H.P., Sander, J., Xu, X.: A density-based algorithm for discovering clusters in large spatial databases with noise. In: KDD Proceedings, vol.\u00a096, pp. 226\u2013231 (1996)","journal-title":"In: KDD Proceedings"},{"key":"5_CR14","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Wildes, R.P.: Dynamically encoded actions based on spacetime saliency. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2755\u20132764 (2015)","DOI":"10.1109\/CVPR.2015.7298892"},{"issue":"7","key":"5_CR15","doi-asserted-by":"crossref","first-page":"957","DOI":"10.1016\/j.compbiomed.2006.08.018","volume":"3","author":"R.B. Goldstein","year":"2007","unstructured":"Goldstein, R.B., Woods, R.L., Peli, E.: Where people look when watching movies: Do all viewers look at the same place? Comput. Biol. Med. 3(7), 957\u201364 (2007)","journal-title":"Comput. Biol. Med."},{"key":"5_CR16","first-page":"5","volume":"1","author":"J. Harel","year":"2006","unstructured":"Harel, J., Koch, C., Perona, P., et\u00a0al.: Graph-based visual saliency. In: Advances in Neural Information Processing Systems, vol.\u00a01, p.\u00a05 (2006)","journal-title":"In: Advances in Neural Information Processing Systems"},{"issue":"1","key":"5_CR17","doi-asserted-by":"crossref","first-page":"1","DOI":"10.3167\/proj.2008.020102","volume":"2","author":"U. Hasson","year":"2008","unstructured":"Hasson, U., Landesman, O., Knappmeyer, B., Vallines, I., Rubin, N., Heeger, D.J.: Neurocinematics: the neuroscience of film. Projections 2(1), 1\u201326 (2008)","journal-title":"Projections"},{"key":"5_CR18","doi-asserted-by":"crossref","first-page":"6","DOI":"10.1016\/j.visres.2016.09.002","volume":"128","author":"I. Hooge","year":"2016","unstructured":"Hooge, I., Holmqvist, K., Nystr\u00f6m, M.: The pupil is faster than the corneal reflection (CR): are video based pupil-CR eye trackers suitable for studying detailed dynamics of eye movements? Vis. Res. 128, 6\u201318 (2016)","journal-title":"Vis. Res."},{"key":"5_CR19","doi-asserted-by":"crossref","unstructured":"Judd, T., Ehinger, K., Durand, F., Torralba, A.: Learning to predict where humans look. In:\u00a0Proceedings of IEEE International Conference on Computer Vision (ICCV), pp. 2106\u20132113 (2009)","DOI":"10.1109\/ICCV.2009.5459462"},{"key":"5_CR20","volume-title":"Large-scale video classification with convolutional neural networks","author":"A. Karpathy","year":"2014","unstructured":"Karpathy, A., Toderici, G., Shetty, S., Leung, T., Sukthankar, R., Fei-Fei, L.: Large-scale video classification with convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2014)"},{"key":"5_CR21","doi-asserted-by":"crossref","unstructured":"Koch, K., McLean, J., Segev, R., Freed, M.A., II, M.J.B., Balasubramanian, V., Sterling, P.: How much the eye tells the brain. Curr. Biol. 16, 1428\u201334 (2006)","DOI":"10.1016\/j.cub.2006.05.056"},{"key":"5_CR22","doi-asserted-by":"crossref","unstructured":"Lan, Z., Lin, M., Li, X., Hauptmann, A.G., Raj, B.: Beyond Gaussian Pyramid: Multi-skip feature stacking for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 204\u2013212 (2015)","DOI":"10.1109\/CVPR.2015.7298616"},{"issue":"1","key":"5_CR23","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1007\/s12559-012-9146-3","volume":"5","author":"S. Marat","year":"2013","unstructured":"Marat, S., Rahman, A., Pellerin, D., Guyader, N., Houzet, D.: Improving visual saliency by adding \u2018face feature map\u2019 and \u2018center bias\u2019. Cogn. Comput. 5(1), 63\u201375 (2013)","journal-title":"Cogn. Comput."},{"key":"5_CR24","doi-asserted-by":"crossref","unstructured":"Marszalek, M., Laptev, I., Schmid, C.: Actions in context. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2929\u20132936 (2009)","DOI":"10.1109\/CVPR.2009.5206557"},{"key":"5_CR25","doi-asserted-by":"crossref","unstructured":"Mathe, S., Sminchisescu, C.: Dynamic eye movement datasets and learnt saliency models for visual action recognition. In: Proceedings of the European Conference on Computer Vision, pp. 842\u2013856. Springer, Berlin (2012)","DOI":"10.1007\/978-3-642-33709-3_60"},{"issue":"7","key":"5_CR26","doi-asserted-by":"crossref","first-page":"1408","DOI":"10.1109\/TPAMI.2014.2366154","volume":"37","author":"S. Mathe","year":"2015","unstructured":"Mathe, S., Sminchisescu, C.: Actions in the eye: dynamic gaze datasets and learnt saliency models for visual recognition. IEEE Trans. Pattern Anal. Mach. Intell. 37(7), 1408\u20131424 (2015)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"1","key":"5_CR27","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1007\/s12559-010-9074-z","volume":"3","author":"P.K. Mital","year":"2011","unstructured":"Mital, P.K., Smith, T.J., Hill, R., Henderson, J.M.: Clustering of gaze during dynamic scene viewing is predicted by motion. Cogn. Comput. 3(1), 5\u201324 (2011)","journal-title":"Cogn. Comput."},{"issue":"C","key":"5_CR28","doi-asserted-by":"crossref","first-page":"109","DOI":"10.1016\/j.cviu.2016.03.013","volume":"150","author":"X. Peng","year":"2016","unstructured":"Peng, X., Wang, L., Wang, X., Qiao, Y.: Bag of visual words and fusion methods for action recognition. Comput. Vis. Image Underst. 150(C), 109\u2013125 (2016)","journal-title":"Comput. Vis. Image Underst."},{"issue":"8","key":"5_CR29","doi-asserted-by":"crossref","first-page":"2397","DOI":"10.1016\/j.visres.2005.03.019","volume":"45","author":"R.J. Peters","year":"2005","unstructured":"Peters, R.J., Iyer, A., Itti, L., Koch, C.: Components of bottom-up gaze allocation in natural images. Vis. Res. 45(8), 2397\u20132416 (2005)","journal-title":"Vis. Res."},{"issue":"3","key":"5_CR30","doi-asserted-by":"crossref","first-page":"601","DOI":"10.1109\/TPAMI.2011.158","volume":"34","author":"A. Prest","year":"2012","unstructured":"Prest, A., Schmid, C., Ferrari, V.: Weakly supervised learning of interactions between humans and objects. IEEE Trans. Pattern Anal. Mach. Intell. 34(3), 601\u2013614 (2012)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"5_CR31","first-page":"3","volume":"2","author":"M. Sapienza","year":"2012","unstructured":"Sapienza, M., Cuzzolin, F., Torr, P.H.: Learning discriminative space-time actions from weakly labelled videos. In: Proceedings of the British Machine Vision Conference, vol.\u00a02, p.\u00a03 (2012)","journal-title":"In: Proceedings of the British Machine Vision Conference"},{"issue":"1","key":"5_CR32","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1007\/s11263-013-0662-8","volume":"110","author":"M. Sapienza","year":"2014","unstructured":"Sapienza, M., Cuzzolin, F., Torr, P.H.: Learning discriminative space\u2013time action parts from weakly labelled videos. Int. J. Comput. Vis. 110(1), 30\u201347 (2014)","journal-title":"Int. J. Comput. Vis."},{"key":"5_CR33","unstructured":"Shapovalova, N., Raptis, M., Sigal, L., Mori, G.: Action is in the eye of the beholder: eye-gaze driven model for spatio-temporal action localization. In: Advances in Neural Information Processing Systems, pp. 2409\u20132417 (2013)"},{"key":"5_CR34","doi-asserted-by":"crossref","unstructured":"Shi, F., Petriu, E., Laganiere, R.: Sampling strategies for real-time action recognition. In:\u00a0Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp.\u00a02595\u20132602 (2013)","DOI":"10.1109\/CVPR.2013.335"},{"key":"5_CR35","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: Advances in Neural Information Processing Systems, pp. 568\u2013576 (2014)"},{"issue":"8","key":"5_CR36","doi-asserted-by":"crossref","first-page":"16","DOI":"10.1167\/13.8.16","volume":"13","author":"T.J. Smith","year":"2013","unstructured":"Smith, T.J., Mital, P.K.: Attentional synchrony and the influence of viewing task on gaze behavior in static and dynamic scenes. J. Vis. 13(8), 16\u201316 (2013)","journal-title":"J. Vis."},{"issue":"4","key":"5_CR37","doi-asserted-by":"crossref","first-page":"1756","DOI":"10.1152\/jn.00344.2010","volume":"105","author":"M. Spering","year":"2011","unstructured":"Spering, M., Sch\u00fctz, A.C., Braun, D.I., Gegenfurtner, K.R.: Keep your eyes on the ball: smooth pursuit eye movements enhance prediction of visual motion. J. Neurophysiol. 105(4), 1756\u20131767 (2011)","journal-title":"J. Neurophysiol."},{"key":"5_CR38","doi-asserted-by":"crossref","unstructured":"Sultani, W., Saleemi, I.: Human action recognition across datasets by foreground-weighted histogram decomposition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 764\u2013771 (2014)","DOI":"10.1109\/CVPR.2014.103"},{"issue":"14","key":"5_CR39","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1167\/7.14.4","volume":"7","author":"B.W. Tatler","year":"2007","unstructured":"Tatler, B.W.: The central fixation bias in scene viewing: Selecting an optimal viewing position independently of motor biases and image feature distributions. J. Vis. 7(14), 1\u201317 (2007). http:\/\/journalofvision.org\/7\/14\/4\/","journal-title":"J. Vis."},{"issue":"7","key":"5_CR40","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1167\/9.7.4","volume":"9","author":"P.H. Tseng","year":"2009","unstructured":"Tseng, P.H., Carmi, R., Cameron, I.G.M., Munoz, D.P., Itti, L.: Quantifying center bias of observers in free viewing of dynamic natural scenes. J. Vis. 9(7), 1\u201316 (2009). http:\/\/journalofvision.org\/9\/7\/4\/","journal-title":"J. Vis."},{"key":"5_CR41","doi-asserted-by":"crossref","unstructured":"Vig, E., Dorr, M., Cox, D.D.: Saliency-based selection of sparse descriptors for action recognition. In: Proceedings of International Conference on Image Processing, pp. 1405\u20131408 (2012)","DOI":"10.1109\/ICIP.2012.6467132"},{"key":"5_CR42","first-page":"84","volume":"7578","author":"E. Vig","year":"2012","unstructured":"Vig, E., Dorr, M., Cox, D.D.: Space-variant descriptor sampling for action recognition based on saliency and eye movements. In: Proceedings of the European Conference on Computer Vision. LNCS, vol. 7578, pp. 84\u201397 (2012)","journal-title":"In: Proceedings of the European Conference on Computer Vision. LNCS"},{"issue":"6","key":"5_CR43","doi-asserted-by":"crossref","first-page":"1080","DOI":"10.1109\/TPAMI.2011.198","volume":"34","author":"E. Vig","year":"2012","unstructured":"Vig, E., Dorr, M., Martinetz, T., Barth, E.: Intrinsic dimensionality predicts the saliency of natural dynamic scenes. IEEE Trans. Pattern Anal. Mach. Intell. 34(6), 1080\u20131091 (2012)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"5_CR44","doi-asserted-by":"crossref","first-page":"28","DOI":"10.3389\/frobt.2015.00028","volume":"2","author":"M. Vrigkas","year":"2015","unstructured":"Vrigkas, M., Nikou, C., Kakadiaris, I.A.: A review of human activity recognition methods. Front. Robot. AI 2, 28 (2015)","journal-title":"Front. Robot. AI"},{"key":"5_CR45","volume-title":"Action recognition with improved trajectories","author":"H. Wang","year":"2013","unstructured":"Wang, H., Schmid, C.: Action recognition with improved trajectories. In: Proceedings of the IEEE International Conference on Computer Vision (2013)"},{"key":"5_CR46","doi-asserted-by":"crossref","unstructured":"Wang, H., Kl\u00e4ser, A., Schmid, C., Liu, C.L.: Action recognition by dense trajectories. In:\u00a0Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3169\u20133176. IEEE, New York (2011)","DOI":"10.1109\/CVPR.2011.5995407"},{"key":"5_CR47","doi-asserted-by":"crossref","unstructured":"Wang, L., Qiao, Y., Tang, X.: Action recognition with trajectory-pooled deep-convolutional descriptors. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4305\u20134314 (2015)","DOI":"10.1109\/CVPR.2015.7299059"},{"key":"5_CR48","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1007\/s11263-015-0846-5","volume":"119","author":"H. Wang","year":"2016","unstructured":"Wang, H., Oneata, D., Verbeek, J., Schmid, C.: A robust and efficient video representation for action recognition. Int. J. Comput. Vis. 119, 219\u201338 (2016)","journal-title":"Int. J. Comput. Vis."},{"key":"5_CR49","doi-asserted-by":"crossref","first-page":"355","DOI":"10.1068\/p5552","volume":"36","author":"R. Wartburg von","year":"2007","unstructured":"von Wartburg, R., Wurtz, P., Pflugshaupt, T., Nyffeler, T., L\u00fcthi, M., M\u00fcri, R.: Size matters: Saccades during scene perception. Perception 36, 355\u201365 (2007)","journal-title":"Perception"},{"key":"5_CR50","unstructured":"Zhou, Y., Yu, H., Wang, S.: Feature sampling strategies for action recognition. arXiv preprint arXiv:1501.06993 (2015)"},{"key":"5_CR51","volume-title":"Edge boxes: locating object proposals from edges","author":"L. Zitnick","year":"2014","unstructured":"Zitnick, L., Dollar, P.: Edge boxes: locating object proposals from edges. In: Proceedings of the European Conference on Computer Vision (2014)"}],"container-title":["Visual Content Indexing and Retrieval with Psycho-Visual Models"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-57687-9_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,26]],"date-time":"2025-06-26T14:01:30Z","timestamp":1750946490000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-57687-9_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319576862","9783319576879"],"references-count":51,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-57687-9_5","relation":{},"subject":[],"published":{"date-parts":[[2017]]}}}