{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T18:46:24Z","timestamp":1773773184604,"version":"3.50.1"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030012519","type":"print"},{"value":"9783030012526","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01252-6_41","type":"book-chapter","created":{"date-parts":[[2018,10,5]],"date-time":"2018-10-05T09:48:05Z","timestamp":1538732885000},"page":"692-707","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":38,"title":["AGIL: Learning Attention from Human for Visuomotor Tasks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6681-3360","authenticated-orcid":false,"given":"Ruohan","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Zhuode","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Luxin","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jake A.","family":"Whritner","sequence":"additional","affiliation":[]},{"given":"Karl S.","family":"Muller","sequence":"additional","affiliation":[]},{"given":"Mary M.","family":"Hayhoe","sequence":"additional","affiliation":[]},{"given":"Dana H.","family":"Ballard","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,10,6]]},"reference":[{"issue":"5","key":"41_CR1","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1016\/j.robot.2008.10.024","volume":"57","author":"BD Argall","year":"2009","unstructured":"Argall, B.D., Chernova, S., Veloso, M., Browning, B.: A survey of robot learning from demonstration. Robot. Auton. Syst. 57(5), 469\u2013483 (2009)","journal-title":"Robot. Auton. Syst."},{"issue":"26","key":"41_CR2","doi-asserted-by":"publisher","first-page":"4355","DOI":"10.1016\/j.visres.2006.08.021","volume":"46","author":"D Baldauf","year":"2006","unstructured":"Baldauf, D., Wolf, M., Deubel, H.: Deployment of visual attention before sequences of goal-directed hand movements. Vis. Res. 46(26), 4355\u20134374 (2006)","journal-title":"Vis. Res."},{"key":"41_CR3","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1613\/jair.3912","volume":"47","author":"MG Bellemare","year":"2012","unstructured":"Bellemare, M.G., Naddaf, Y., Veness, J., Bowling, M.: The arcade learning environment: an evaluation platform for general agents. J. Artif. Intell. Res. 47, 253\u2013279 (2012)","journal-title":"J. Artif. Intell. Res."},{"key":"41_CR4","unstructured":"Bylinskii, Z., Judd, T., Oliva, A., Torralba, A., Durand, F.: What do different evaluation metrics tell us about saliency models? arXiv preprint arXiv:1604.03605 (2016)"},{"key":"41_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"809","DOI":"10.1007\/978-3-319-46454-1_49","volume-title":"Computer Vision \u2013 ECCV 2016","author":"Z Bylinskii","year":"2016","unstructured":"Bylinskii, Z., Recasens, A., Borji, A., Oliva, A., Torralba, A., Durand, F.: Where should saliency models look next? In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9909, pp. 809\u2013824. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46454-1_49"},{"key":"41_CR6","unstructured":"Cruz Jr., G.V., Du, Y., Taylor, M.E.: Pre-training neural networks with human demonstrations for deep reinforcement learning. arXiv preprint arXiv:1709.04083 (2017)"},{"key":"41_CR7","unstructured":"Dhariwal, P., et al.: Openai baselines (2017). https:\/\/github.com\/openai\/baselines"},{"issue":"1","key":"41_CR8","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1167\/13.1.20","volume":"13","author":"G Diaz","year":"2013","unstructured":"Diaz, G., Cooper, J., Rothkopf, C., Hayhoe, M.: Saccades to future ball location reveal memory-based prediction in a virtual-reality interception task. J. Vis. 13(1), 20\u201320 (2013)","journal-title":"J. Vis."},{"issue":"8","key":"41_CR9","doi-asserted-by":"publisher","first-page":"1146","DOI":"10.1038\/nn.3428","volume":"16","author":"E Eldar","year":"2013","unstructured":"Eldar, E., Cohen, J.D., Niv, Y.: The effects of neural gain on attention and learning. Nat. Neurosci. 16(8), 1146\u20131153 (2013)","journal-title":"Nat. Neurosci."},{"key":"41_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"363","DOI":"10.1007\/3-540-45103-X_50","volume-title":"Image Analysis","author":"G Farneb\u00e4ck","year":"2003","unstructured":"Farneb\u00e4ck, G.: Two-frame motion estimation based on polynomial expansion. In: Bigun, J., Gustavsson, T. (eds.) SCIA 2003. LNCS, vol. 2749, pp. 363\u2013370. Springer, Heidelberg (2003). https:\/\/doi.org\/10.1007\/3-540-45103-X_50"},{"key":"41_CR11","unstructured":"Gregor, K., Danihelka, I., Graves, A., Rezende, D., Wierstra, D.: Draw: a recurrent neural network for image generation. In: Proceedings of the 32nd International Conference on Machine Learning (ICML 2015), pp. 1462\u20131471 (2015)"},{"issue":"4","key":"41_CR12","doi-asserted-by":"publisher","first-page":"188","DOI":"10.1016\/j.tics.2005.02.009","volume":"9","author":"M Hayhoe","year":"2005","unstructured":"Hayhoe, M., Ballard, D.: Eye movements in natural behavior. Trends Cogn. Sci. 9(4), 188\u2013194 (2005)","journal-title":"Trends Cogn. Sci."},{"key":"41_CR13","doi-asserted-by":"crossref","unstructured":"Henderson, P., Islam, R., Bachman, P., Pineau, J., Precup, D., Meger, D.: Deep reinforcement learning that matters. arXiv preprint arXiv:1709.06560 (2017)","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"41_CR14","unstructured":"Hessel, M., et al.: Rainbow: Combining improvements in deep reinforcement learning. arXiv preprint arXiv:1710.02298 (2017)"},{"key":"41_CR15","doi-asserted-by":"crossref","unstructured":"Hester, T., et al.: Deep Q-learning from demonstrations. In: Association for the Advancement of Artificial Intelligence (AAAI) (2018)","DOI":"10.1609\/aaai.v32i1.11757"},{"issue":"4","key":"41_CR16","doi-asserted-by":"publisher","first-page":"679","DOI":"10.1037\/0033-295X.109.4.679","volume":"109","author":"CB Holroyd","year":"2002","unstructured":"Holroyd, C.B., Coles, M.G.: The neural basis of human error processing: reinforcement learning, dopamine, and the error-related negativity. Psychol. Rev. 109(4), 679 (2002)","journal-title":"Psychol. Rev."},{"issue":"11","key":"41_CR17","doi-asserted-by":"publisher","first-page":"1254","DOI":"10.1109\/34.730558","volume":"20","author":"L Itti","year":"1998","unstructured":"Itti, L., Koch, C., Niebur, E.: A model of saliency-based visual attention for rapid scene analysis. IEEE Trans. Pattern Anal. Mach. Intell. 20(11), 1254\u20131259 (1998)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"41_CR18","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., et al.: Spatial transformer networks. In: Advances in Neural Information Processing Systems, pp. 2017\u20132025 (2015)"},{"key":"41_CR19","doi-asserted-by":"crossref","unstructured":"Jiang, M., Huang, S., Duan, J., Zhao, Q.: Salicon: saliency in context. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1072\u20131080 (2015)","DOI":"10.1109\/CVPR.2015.7298710"},{"issue":"1636","key":"41_CR20","doi-asserted-by":"publisher","first-page":"20130044","DOI":"10.1098\/rstb.2013.0044","volume":"369","author":"L Johnson","year":"2014","unstructured":"Johnson, L., Sullivan, B., Hayhoe, M., Ballard, D.: Predicting human visuomotor behaviour in a driving task. Philos. Trans. R. Soc. Lond. B: Biol. Sci. 369(1636), 20130044 (2014)","journal-title":"Philos. Trans. R. Soc. Lond. B: Biol. Sci."},{"key":"41_CR21","doi-asserted-by":"crossref","unstructured":"Krafka, K., et al.: Eye tracking for everyone. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2176\u20132184 (2016)","DOI":"10.1109\/CVPR.2016.239"},{"issue":"9","key":"41_CR22","doi-asserted-by":"publisher","first-page":"4446","DOI":"10.1109\/TIP.2017.2710620","volume":"26","author":"SS Kruthiventi","year":"2017","unstructured":"Kruthiventi, S.S., Ayush, K., Babu, R.V.: Deepfix: a fully convolutional neural network for predicting human eye fixations. IEEE Trans. Image Process. 26(9), 4446\u20134456 (2017)","journal-title":"IEEE Trans. Image Process."},{"issue":"2","key":"41_CR23","doi-asserted-by":"publisher","first-page":"451","DOI":"10.1016\/j.neuron.2016.12.040","volume":"93","author":"YC Leong","year":"2017","unstructured":"Leong, Y.C., Radulescu, A., Daniel, R., DeWoskin, V., Niv, Y.: Dynamic interaction between reinforcement learning and attention in multidimensional environments. Neuron 93(2), 451\u2013463 (2017)","journal-title":"Neuron"},{"issue":"1","key":"41_CR24","first-page":"1334","volume":"17","author":"S Levine","year":"2016","unstructured":"Levine, S., Finn, C., Darrell, T., Abbeel, P.: End-to-end training of deep visuomotor policies. J. Mach. Learn. Res. 17(1), 1334\u20131373 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"41_CR25","doi-asserted-by":"crossref","unstructured":"Li, G., Yu, Y.: Visual saliency based on multiscale deep features. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5455\u20135463 (2015)","DOI":"10.1109\/CVPR.2015.7299184"},{"issue":"3","key":"41_CR26","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1007\/s11263-009-0215-3","volume":"82","author":"S Marat","year":"2009","unstructured":"Marat, S., Phuoc, T.H., Granjon, L., Guyader, N., Pellerin, D., Gu\u00e9rin-Dugu\u00e9, A.: Modelling spatio-temporal saliency to predict gaze direction for short videos. Int. J. Comput. Vis. 82(3), 231 (2009)","journal-title":"Int. J. Comput. Vis."},{"key":"41_CR27","unstructured":"Mnih, V., Heess, N., Graves, A., et al.: Recurrent models of visual attention. In: Advances in Neural Information Processing Systems, pp. 2204\u20132212 (2014)"},{"issue":"7540","key":"41_CR28","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"41_CR29","unstructured":"Mousavi, S., Borji, A., Mozayani, N.: Learning to predict where to look in interactive environments using deep recurrent Q-learning. arXiv preprint arXiv:1612.05753 (2016)"},{"key":"41_CR30","doi-asserted-by":"crossref","unstructured":"Nair, A., McGrew, B., Andrychowicz, M., Zaremba, W., Abbeel, P.: Overcoming exploration in reinforcement learning with demonstrations. arXiv preprint arXiv:1709.10089 (2017)","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"41_CR31","unstructured":"Palazzi, A., Abati, D., Calderara, S., Solera, F., Cucchiara, R.: Predicting the driver\u2019s focus of attention: the dr (eye) ve project. arXiv preprint arXiv:1705.03854 (2017)"},{"key":"41_CR32","doi-asserted-by":"crossref","unstructured":"Perry, J.S., Geisler, W.S.: Gaze-contingent real-time simulation of arbitrary visual fields. In: Electronic Imaging 2002, pp. 57\u201369. International Society for Optics and Photonics (2002)","DOI":"10.1117\/12.469554"},{"key":"41_CR33","doi-asserted-by":"crossref","unstructured":"Peters, R.J., Itti, L.: Beyond bottom-up: Incorporating task-dependent influences into a computational model of spatial attention. In: IEEE Conference on Computer Vision and Pattern Recognition, 2007. CVPR 2007, pp. 1\u20138. IEEE (2007)","DOI":"10.1109\/CVPR.2007.383337"},{"key":"41_CR34","doi-asserted-by":"crossref","unstructured":"Riche, N., Duvinage, M., Mancas, M., Gosselin, B., Dutoit, T.: Saliency and human fixations: State-of-the-art and study of comparison metrics. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1153\u20131160 (2013)","DOI":"10.1109\/ICCV.2013.147"},{"issue":"10","key":"41_CR35","doi-asserted-by":"publisher","first-page":"2176","DOI":"10.1162\/0899766054615699","volume":"17","author":"PR Roelfsema","year":"2005","unstructured":"Roelfsema, P.R., van Ooyen, A.: Attention-gated reinforcement learning of internal representations for classification. Neural Comput. 17(10), 2176\u20132214 (2005)","journal-title":"Neural Comput."},{"issue":"14","key":"41_CR36","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1167\/7.14.16","volume":"7","author":"CA Rothkopf","year":"2007","unstructured":"Rothkopf, C.A., Ballard, D.H., Hayhoe, M.M.: Task and context determine where you look. J. Vis. 7(14), 16\u201316 (2007)","journal-title":"J. Vis."},{"issue":"7587","key":"41_CR37","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"issue":"7676","key":"41_CR38","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., et al.: Mastering the game of go without human knowledge. Nature 550(7676), 354 (2017)","journal-title":"Nature"},{"issue":"5","key":"41_CR39","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1167\/11.5.5","volume":"11","author":"BW Tatler","year":"2011","unstructured":"Tatler, B.W., Hayhoe, M.M., Land, M.F., Ballard, D.H.: Eye guidance in natural vision: reinterpreting salience. J. Vis. 11(5), 5\u20135 (2011)","journal-title":"J. Vis."},{"key":"41_CR40","unstructured":"Wang, Z., Schaul, T., Hessel, M., Hasselt, H., Lanctot, M., Freitas, N.: Dueling network architectures for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1995\u20132003 (2016)"},{"key":"41_CR41","unstructured":"Xu, K., et al.: Show, attend and tell: Neural image caption generation with visual attention. In: International Conference on Machine Learning, pp. 2048\u20132057 (2015)"},{"key":"41_CR42","doi-asserted-by":"crossref","unstructured":"Zhao, R., Ouyang, W., Li, H., Wang, X.: Saliency detection by multi-context deep learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1265\u20131274 (2015)","DOI":"10.1109\/CVPR.2015.7298731"},{"key":"41_CR43","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Groth, O., Bernstein, M., Fei-Fei, L.: Visual7w: grounded question answering in images. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4995\u20135004 (2016)","DOI":"10.1109\/CVPR.2016.540"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01252-6_41","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T15:48:20Z","timestamp":1773762500000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01252-6_41"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012519","9783030012526"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01252-6_41","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"6 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}