{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T19:53:12Z","timestamp":1775245992905,"version":"3.50.1"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030012151","type":"print"},{"value":"9783030012168","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01216-8_13","type":"book-chapter","created":{"date-parts":[[2018,10,8]],"date-time":"2018-10-08T11:10:26Z","timestamp":1538997026000},"page":"202-217","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":24,"title":["Egocentric Activity Prediction via Event Modulated Attention"],"prefix":"10.1007","author":[{"given":"Yang","family":"Shen","sequence":"first","affiliation":[]},{"given":"Bingbing","family":"Ni","sequence":"additional","affiliation":[]},{"given":"Zefan","family":"Li","sequence":"additional","affiliation":[]},{"given":"Ning","family":"Zhuang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,10,9]]},"reference":[{"key":"13_CR1","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-68560-1","volume-title":"Survival and Event History Analysis: A Process Point of View","author":"O Aalen","year":"2008","unstructured":"Aalen, O., Borgan, O., Gjessing, H.: Survival and Event History Analysis: A Process Point of View. Springer, New York (2008). https:\/\/doi.org\/10.1007\/978-0-387-68560-1"},{"key":"13_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/978-3-642-15822-3_20","volume-title":"Artificial Neural Networks \u2013 ICANN 2010","author":"M Baccouche","year":"2010","unstructured":"Baccouche, M., Mamalet, F., Wolf, C., Garcia, C., Baskurt, A.: Action classification in soccer videos with long short-term memory recurrent neural networks. In: Diamantaras, K., Duch, W., Iliadis, L.S. (eds.) ICANN 2010. LNCS, vol. 6353, pp. 154\u2013159. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15822-3_20"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Borji, A., Sihite, D.N., Itti, L.: Probabilistic learning of task-specific visual attention. In: CVPR, pp. 470\u2013477 (2012)","DOI":"10.1109\/CVPR.2012.6247710"},{"key":"13_CR4","doi-asserted-by":"crossref","unstructured":"Cho, K., van Merrienboer, B., Bahdanau, D., Bengio, Y.: On the properties of neural machine translation: encoder-decoder approaches. In: Proceedings of SSST@EMNLP, pp. 103\u2013111 (2014). http:\/\/aclweb.org\/anthology\/W\/W14\/W14-4012.pdf","DOI":"10.3115\/v1\/W14-4012"},{"key":"13_CR5","doi-asserted-by":"crossref","unstructured":"Du, N., Dai, H., Trivedi, R., Upadhyay, U., Gomez-Rodriguez, M., Song, L.: Recurrent marked temporal point processes: embedding event history to vector. In: Proceedings of the 22nd ACM SIGKDD International Conference, pp. 1555\u20131564 (2016)","DOI":"10.1145\/2939672.2939875"},{"issue":"14","key":"13_CR6","doi-asserted-by":"publisher","first-page":"18:1","DOI":"10.1167\/8.14.18","volume":"8","author":"W Einhauser","year":"2008","unstructured":"Einhauser, W., Spain, M., Perona, P.: Objects predict fixations better than early saliency. J. Vis. 8(14), 18:1\u201318:26 (2008)","journal-title":"J. Vis."},{"issue":"2","key":"13_CR7","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1207\/s15516709cog1402_1","volume":"14","author":"JL Elman","year":"1990","unstructured":"Elman, J.L.: Finding structure in time. Cogn. Sci. 14(2), 179\u2013211 (1990)","journal-title":"Cogn. Sci."},{"key":"13_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"314","DOI":"10.1007\/978-3-642-33718-5_23","volume-title":"Computer Vision \u2013 ECCV 2012","author":"A Fathi","year":"2012","unstructured":"Fathi, A., Li, Y., Rehg, J.M.: Learning to recognize daily actions using gaze. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7572, pp. 314\u2013327. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33718-5_23"},{"key":"13_CR9","doi-asserted-by":"crossref","unstructured":"Fathi, A., Ren, X., Rehg, J.M.: Learning to recognize objects in egocentric activities. In: CVPR, pp. 3281\u20133288 (2011)","DOI":"10.1109\/CVPR.2011.5995444"},{"key":"13_CR10","unstructured":"Girshick, R.B.: Fast R-CNN. CoRR abs\/1504.08083 (2015). http:\/\/arxiv.org\/abs\/1504.08083"},{"key":"13_CR11","unstructured":"Graves, A.: Generating sequences with recurrent neural networks. CoRR abs\/1308.0850 (2013). http:\/\/arxiv.org\/abs\/1308.0850"},{"issue":"1","key":"13_CR12","doi-asserted-by":"publisher","first-page":"83","DOI":"10.2307\/2334319","volume":"58","author":"AG Hawkes","year":"1971","unstructured":"Hawkes, A.G.: Spectra of some self-exciting and mutually exciting point processes. Biometrika 58(1), 83\u201390 (1971). https:\/\/doi.org\/10.2307\/2334319","journal-title":"Biometrika"},{"issue":"8","key":"13_CR13","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"issue":"11","key":"13_CR14","doi-asserted-by":"publisher","first-page":"1254","DOI":"10.1109\/34.730558","volume":"20","author":"L Itti","year":"1998","unstructured":"Itti, L., Koch, C., Niebur, E.: A model of saliency-based visual attention for rapid scene analysis. IEEE Trans. Pattern Anal. Mach. Intell. 20(11), 1254\u20131259 (1998)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Jang, Y., Song, Y., Yu, Y., Kim, Y., Kim, G.: TGIF-QA: toward spatio-temporal reasoning in visual question answering. In: CVPR, pp. 1359\u20131367 (2017)","DOI":"10.1109\/CVPR.2017.149"},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"Jia, Y., et al..: Caffe: convolutional architecture for fast feature embedding. In: ACMMM, pp. 675\u2013678 (2014). http:\/\/doi.acm.org\/10.1145\/2647868.2654889","DOI":"10.1145\/2647868.2654889"},{"key":"13_CR17","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: NIPS, pp. 1106\u20131114 (2012)"},{"key":"13_CR18","first-page":"123","volume-title":"JMP for Basic Univariate and Multivariate Statistics: Methods for Researchers and Social Scientists","author":"A Lehman","year":"2005","unstructured":"Lehman, A., O\u2019Rourke, N., Hatcher, L., Stepanski, E.: JMP for Basic Univariate and Multivariate Statistics: Methods for Researchers and Social Scientists, 2nd edn, p. 123. SAS Institute Inc., Cary (2005)","edition":"2"},{"key":"13_CR19","doi-asserted-by":"crossref","unstructured":"Li, Y., Ye, Z., Rehg, J.M.: Delving into egocentric actions. In: CVPR, pp. 287\u2013295 (2015)","DOI":"10.1109\/CVPR.2015.7298625"},{"key":"13_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-319-46448-0_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"W Liu","year":"2016","unstructured":"Liu, W., et al.: SSD: single shot multibox detector. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 21\u201337. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_2"},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"Liu, Y., Yan, J., Ouyang, W.: Quality aware network for set to set recognition. In: CVPR, pp. 4694\u20134703 (2017)","DOI":"10.1109\/CVPR.2017.499"},{"key":"13_CR22","doi-asserted-by":"crossref","unstructured":"Ma, M., Fan, H., Kitani, K.M.: Going deeper into first-person activity recognition. In: CVPR, pp. 1894\u20131903 (2016)","DOI":"10.1109\/CVPR.2016.209"},{"key":"13_CR23","unstructured":"Mnih, V., Heess, N., Graves, A., Kavukcuoglu, K.: Recurrent models of visual attention. In: NIPS, pp. 2204\u20132212 (2014). http:\/\/papers.nips.cc\/paper\/5542-recurrent-models-of-visual-attention"},{"key":"13_CR24","doi-asserted-by":"crossref","unstructured":"Moltisanti, D., Wray, M., Mayol-Cuevas, W.W., Damen, D.: Trespassing the boundaries: labeling temporal bounds for object interactions in egocentric video. In: ICCV, pp. 2905\u20132913 (2017)","DOI":"10.1109\/ICCV.2017.314"},{"key":"13_CR25","doi-asserted-by":"crossref","unstructured":"Ng, J.Y., Hausknecht, M.J., Vijayanarasimhan, S., Vinyals, O., Monga, R., Toderici, G.: Beyond short snippets: deep networks for video classification. In: CVPR, pp. 4694\u20134702 (2015)","DOI":"10.1109\/CVPR.2015.7299101"},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"Poleg, Y., Ephrat, A., Peleg, S., Arora, C.: Compact CNN for indexing egocentric videos. In: WACV, pp. 1\u20139 (2016)","DOI":"10.1109\/WACV.2016.7477708"},{"key":"13_CR27","doi-asserted-by":"crossref","unstructured":"Ryoo, M.S., Rothrock, B., Matthies, L.H.: Pooled motion features for first-person videos. In: CVPR, pp. 896\u2013904 (2015)","DOI":"10.1109\/CVPR.2015.7298691"},{"key":"13_CR28","doi-asserted-by":"crossref","unstructured":"Singh, S., Arora, C., Jawahar, C.V.: First person action recognition using deep learned descriptors. In: CVPR, pp. 2620\u20132628 (2016)","DOI":"10.1109\/CVPR.2016.287"},{"key":"13_CR29","unstructured":"Sutskever, I., Vinyals, O., Le, Q.V.: Sequence to sequence learning with neural networks. In: NIPS, pp. 3104\u20133112 (2014). http:\/\/papers.nips.cc\/paper\/5346-sequence-to-sequence-learning-with-neural-networks"},{"key":"13_CR30","doi-asserted-by":"crossref","unstructured":"Xiao, S., Yan, J., Yang, X., Zha, H., Chu, S.M.: Modeling the intensity function of point process via recurrent neural networks. In: AAAI, pp. 1597\u20131603 (2017). http:\/\/aaai.org\/ocs\/index.php\/AAAI\/AAAI17\/paper\/view\/14391","DOI":"10.1609\/aaai.v31i1.10724"},{"key":"13_CR31","doi-asserted-by":"crossref","unstructured":"Zhang, M., Ma, K.T., Lim, J., Zhao, Q., Feng, J.: Deep future gaze: gaze anticipation on egocentric videos using adversarial networks. In: CPVR, pp. 3539\u20133548 (2017)","DOI":"10.1109\/CVPR.2017.377"},{"key":"13_CR32","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Ni, B., Hong, R., Yang, X., Tian, Q.: Cascaded interactional targeting network for egocentric video analysis. In: CVPR, pp. 1904\u20131913 (2016)","DOI":"10.1109\/CVPR.2016.210"},{"key":"13_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1007\/978-3-319-16817-3_5","volume-title":"Computer Vision \u2013 ACCV 2014","author":"X Zhu","year":"2015","unstructured":"Zhu, X., Jia, X., Wong, K.-Y.K.: Pixel-level hand detection with shape-aware structured forests. In: Cremers, D., Reid, I., Saito, H., Yang, M.-H. (eds.) ACCV 2014. LNCS, vol. 9006, pp. 64\u201378. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-16817-3_5"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01216-8_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T18:57:31Z","timestamp":1775242651000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01216-8_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012151","9783030012168"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01216-8_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"9 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}