{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T09:07:08Z","timestamp":1777626428106,"version":"3.51.4"},"publisher-location":"Cham","reference-count":50,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319464534","type":"print"},{"value":"9783319464541","type":"electronic"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-46454-1_5","type":"book-chapter","created":{"date-parts":[[2016,9,15]],"date-time":"2016-09-15T09:15:09Z","timestamp":1473930909000},"page":"71-88","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Learning Visual Storylines with Skipping Recurrent Neural Networks"],"prefix":"10.1007","author":[{"given":"Gunnar A.","family":"Sigurdsson","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinlei","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Abhinav","family":"Gupta","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,9,16]]},"reference":[{"key":"5_CR1","doi-asserted-by":"crossref","unstructured":"Chen, X., Shrivastava, A., Gupta, A.: NEIL: extracting visual knowledge from web data. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.178"},{"key":"5_CR2","doi-asserted-by":"crossref","unstructured":"Divvala, S.K., Farhadi, A., Guestrin, C.: Learning everything about anything: webly-supervised visual concept learning. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.412"},{"key":"5_CR3","doi-asserted-by":"crossref","unstructured":"Sadeghi, F., Divvala, S.K., Farhadi, A.: VisKE: visual knowledge extraction and question answering by visual verification of relation phrases. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298752"},{"key":"5_CR4","unstructured":"Izadinia, H., Farhadi, A., Hertzmann, A., Hoffman, M.D.: Image classification and retrieval from user-supplied tags (2014). arXiv preprint: \n                      arXiv:1411.6909"},{"key":"5_CR5","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: NIPS (2012)"},{"issue":"2","key":"5_CR6","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1207\/s15516709cog1402_1","volume":"14","author":"JL Elman","year":"1990","unstructured":"Elman, J.L.: Finding structure in time. Cogn. Sci. 14(2), 179\u2013211 (1990)","journal-title":"Cogn. Sci."},{"key":"5_CR7","doi-asserted-by":"crossref","unstructured":"Malinowski, M., Rohrbach, M., Fritz, M.: Ask your neurons: a neural-based approach to answering questions about images. In: ICCV, pp. 1\u20139 (2015)","DOI":"10.1109\/ICCV.2015.9"},{"key":"5_CR8","unstructured":"Shih, K.J., Singh, S., Hoiem, D.: Where to look: focus regions for visual question answering (2015). arXiv preprint: \n                      arXiv:1511.07394"},{"key":"5_CR9","unstructured":"Yang, Z., He, X., Gao, J., Deng, L., Smola, A.: Stacked attention networks for image question answering (2015). arXiv preprint: \n                      arXiv:1511.02274"},{"key":"5_CR10","unstructured":"Zhu, Y., Groth, O., Bernstein, M., Fei-Fei, L.: Visual7w: grounded question answering in images (2015). arXiv preprint: \n                      arXiv:1511.03416"},{"key":"5_CR11","unstructured":"Xiong, C., Merity, S., Socher, R.: Dynamic memory networks for visual and textual question answering (2016). arXiv preprint: \n                      arXiv:1603.01417"},{"key":"5_CR12","unstructured":"Karpathy, A., Li, F.: Deep visual-semantic alignments for generating image descriptions (2014). arXiv preprint: \n                      arXiv:1412.2306"},{"key":"5_CR13","doi-asserted-by":"crossref","unstructured":"Donahue, J., Hendricks, L.A., Guadarrama, S., Rohrbach, M., Venugopalan, S., Saenko, K., Darrell, T.: Long-term recurrent convolutional networks for visual recognition and description. In: CVPR (2015)","DOI":"10.21236\/ADA623249"},{"key":"5_CR14","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"5_CR15","unstructured":"Venugopalan, S., Rohrbach, M., Donahue, J., Mooney, R.J., Darrell, T., Saenko, K.: Sequence to sequence - video to text (2015). arXiv preprint: \n                      arXiv:1505.00487"},{"key":"5_CR16","unstructured":"Xu, K., Ba, J., Kiros, R., Courville, A., Salakhutdinov, R., Zemel, R., Bengio, Y.: Show, attend and tell: neural image caption generation with visual attention (2015). arXiv preprint: \n                      arXiv:1502.03044"},{"key":"5_CR17","unstructured":"Gregor, K., Danihelka, I., Graves, A., Wierstra, D.: Draw: a recurrent neural network for image generation (2015). arXiv preprint: \n                      arXiv:1502.04623"},{"key":"5_CR18","unstructured":"Zhu, Y., Kiros, R., Zemel, R., Salakhutdinov, R., Urtasun, R., Torralba, A., Fidler, S.: Aligning books and movies: towards story-like visual explanations by watching movies and reading books (2015). arXiv preprint: \n                      arXiv:1506.06724"},{"key":"5_CR19","doi-asserted-by":"crossref","unstructured":"Chen, X., Zitnick, C.L.: Learning a recurrent visual representation for image caption generation. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298856"},{"issue":"2","key":"5_CR20","first-page":"157","volume":"5","author":"Y Bengio","year":"1994","unstructured":"Bengio, Y., Simard, P., Frasconi, P.: Learning long-term dependencies with gradient descent is difficult. TNN 5(2), 157\u2013166 (1994)","journal-title":"TNN"},{"issue":"8","key":"5_CR21","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"5_CR22","doi-asserted-by":"crossref","unstructured":"Kim, G., Xing, E.P.: Reconstructing storyline graphs for image recommendation from web community photos. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.496"},{"key":"5_CR23","doi-asserted-by":"crossref","unstructured":"Kim, G., Sigal, L., Xing, E.P.: Joint summarization of large-scale collections of web images and videos for storyline reconstruction. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.538"},{"key":"5_CR24","doi-asserted-by":"crossref","unstructured":"DeMenthon, D., Kobla, V., Doermann, D.: Video summarization by curve simplification. In: ACM MM, pp. 211\u2013218. ACM (1998)","DOI":"10.21236\/ADA459300"},{"issue":"2","key":"5_CR25","first-page":"296","volume":"15","author":"CW Ngo","year":"2005","unstructured":"Ngo, C.W., Ma, Y.F., Zhang, H.J.: Video summarization and scene detection by graph modeling. TCSVT 15(2), 296\u2013305 (2005)","journal-title":"TCSVT"},{"key":"5_CR26","doi-asserted-by":"crossref","unstructured":"Khosla, A., Hamid, R., Lin, C.J., Sundaresan, N.: Large-scale video summarization using web-image priors. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.348"},{"key":"5_CR27","series-title":"Lecture Notes in Computer Science","first-page":"1","volume-title":"Computer Vision \u2013 ECCV 2014","author":"R Martin-Brualla","year":"2014","unstructured":"Martin-Brualla, R., He, Y., Russell, B.C., Seitz, S.M.: The 3D jigsaw puzzle: mapping large indoor spaces. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014, Part III. LNCS, vol. 8691, pp. 1\u201316. Springer, Heidelberg (2014)"},{"key":"5_CR28","doi-asserted-by":"crossref","unstructured":"Sadeghi, F., Tena, J.R., Farhadi Ali, S.L.: Learning to select and order vacation photographs. In: WACV (2015)","DOI":"10.1109\/WACV.2015.74"},{"key":"5_CR29","doi-asserted-by":"crossref","unstructured":"Xiong, B., Kim, G., Sigal, L.: Storyline representation of egocentric videos with an applications to story-based search. In: ICCV, pp. 4525\u20134533 (2015)","DOI":"10.1109\/ICCV.2015.514"},{"key":"5_CR30","doi-asserted-by":"crossref","unstructured":"Kim, G., Moon, S., Sigal, L.: Joint photo stream and blog post summarization and exploration. In: CVPR, pp. 3081\u20133089. IEEE (2015)","DOI":"10.1109\/CVPR.2015.7298927"},{"key":"5_CR31","doi-asserted-by":"crossref","unstructured":"Chu, W.S., Song, Y., Jaimes, A.: Video co-summarization: video summarization by visual co-occurrence. In: CVPR, pp. 3584\u20133592 (2015)","DOI":"10.1109\/CVPR.2015.7298981"},{"key":"5_CR32","unstructured":"Shank, R., Abelson, R.: Scripts, plans, goals and understanding (1977)"},{"key":"5_CR33","unstructured":"Chambers, N., Jurafsky, D.: Unsupervised learning of narrative event chains. In: ACL (2008)"},{"key":"5_CR34","doi-asserted-by":"crossref","unstructured":"McIntyre, N., Lapata, M.: Learning to tell tales: a data-driven approach to story generation. In: ACL (2009)","DOI":"10.3115\/1687878.1687910"},{"key":"5_CR35","unstructured":"Wang, D., Li, T., Ogihara, M.: Generating pictorial storylines via minimum-weight connected dominating set approximation in multi-view graphs. In: AAAI (2012)"},{"key":"5_CR36","doi-asserted-by":"crossref","unstructured":"Gupta, A., Srinivasan, P., Shi, J., Davis, L.S.: Understanding videos, constructing plots learning a visually grounded storyline model from annotated videos. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206492"},{"issue":"1","key":"5_CR37","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1145\/1198302.1198305","volume":"3","author":"BT Truong","year":"2007","unstructured":"Truong, B.T., Venkatesh, S.: Video abstraction: a systematic review and classification. TOMCCAP 3(1), 3 (2007)","journal-title":"TOMCCAP"},{"issue":"1","key":"5_CR38","first-page":"82","volume":"16","author":"Z Cernekova","year":"2006","unstructured":"Cernekova, Z., Pitas, I., Nikou, C.: Information theory-based shot cut\/fade detection and video summarization. TCSVT 16(1), 82\u201391 (2006)","journal-title":"TCSVT"},{"key":"5_CR39","unstructured":"Lee, Y.J., Ghosh, J., Grauman, K.: Discovering important people and objects for egocentric video summarization. In: CVPR (2012)"},{"key":"5_CR40","doi-asserted-by":"crossref","unstructured":"Ma, Y.F., Lu, L., Zhang, H.J., Li, M.: A user attention model for video summarization. In: ACM MM (2002)","DOI":"10.1145\/641007.641116"},{"key":"5_CR41","doi-asserted-by":"crossref","unstructured":"Sinha, P., Mehrotra, S., Jain, R.: Summarization of personal photologs using multidimensional content and context. In: ICMR (2011)","DOI":"10.1145\/1991996.1992000"},{"key":"5_CR42","doi-asserted-by":"crossref","unstructured":"Obrador, P., De Oliveira, R., Oliver, N.: Supporting personal photo storytelling for social albums. In: ACM MM, pp. 561\u2013570. ACM (2010)","DOI":"10.1145\/1873951.1874025"},{"key":"5_CR43","doi-asserted-by":"crossref","unstructured":"Mikolov, T.: Recurrent neural network based language model. In: INTERSPEECH (2010)","DOI":"10.1109\/ICASSP.2011.5947611"},{"key":"5_CR44","unstructured":"Sutskever, I., Martens, J., Hinton, G.E.: Generating text with recurrent neural networks. In: ICML (2011)"},{"key":"5_CR45","unstructured":"Mikolov, T., Sutskever, I., Chen, K., Corrado, G.S., Dean, J.: Distributed representations of words and phrases and their compositionality. In: NIPS (2013)"},{"key":"5_CR46","unstructured":"Williams, R.J., Zipser, D.: Gradient-based learning algorithms for recurrent networks and their computational complexity. In: Back-Propagation: Theory, Architectures and Applications, pp. 433\u2013486 (1995)"},{"issue":"4","key":"5_CR47","doi-asserted-by":"publisher","first-page":"339","DOI":"10.1016\/0893-6080(88)90007-X","volume":"1","author":"PJ Werbos","year":"1988","unstructured":"Werbos, P.J.: Generalization of backpropagation with application to a recurrent gas market model. Neural Netw. 1(4), 339\u2013356 (1988)","journal-title":"Neural Netw."},{"key":"5_CR48","unstructured":"Thomee, B., Shamma, D.A., Friedland, G., Elizalde, B., Ni, K., Poland, D., Borth, D., Li, L.J.: The new data and new challenges in multimedia research (2015). arXiv preprint: \n                      arXiv:1503.01817"},{"key":"5_CR49","unstructured":"Karpathy, A., Johnson, J., Li, F.: Visualizing and understanding recurrent networks (2015). arXiv preprint: \n                      arXiv:1506.02078"},{"key":"5_CR50","unstructured":"Arthur, D., Vassilvitskii, S.: k-means++: the advantages of careful seeding. In: Proceedings of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms, Society for Industrial and Applied Mathematics, pp. 1027\u20131035 (2007)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2016"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-46454-1_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,10]],"date-time":"2020-10-10T01:39:15Z","timestamp":1602293955000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-46454-1_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319464534","9783319464541"],"references-count":50,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-46454-1_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016]]},"assertion":[{"value":"16 September 2016","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Amsterdam","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2016","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 October 2016","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 October 2016","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2016","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.eccv2016.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}