{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T16:55:13Z","timestamp":1777568113592,"version":"3.51.4"},"reference-count":56,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2015,1,7]],"date-time":"2015-01-07T00:00:00Z","timestamp":1420588800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2015,8]]},"DOI":"10.1007\/s11263-014-0794-5","type":"journal-article","created":{"date-parts":[[2015,1,6]],"date-time":"2015-01-06T04:17:36Z","timestamp":1420517856000},"page":"38-55","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":113,"title":["Predicting Important Objects for Egocentric Video Summarization"],"prefix":"10.1007","volume":"114","author":[{"given":"Yong Jae","family":"Lee","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kristen","family":"Grauman","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,1,7]]},"reference":[{"key":"794_CR1","unstructured":"Aghazadeh, O., Sullivan, J., & Carlsson, S. (2011). Novelty detection from an egocentric perspective. In CVPR."},{"key":"794_CR2","doi-asserted-by":"crossref","unstructured":"Alexe, B., Deselaers, T., & Ferrari, V. (2010). What is an object? In CVPR.","DOI":"10.1109\/CVPR.2010.5540226"},{"key":"794_CR3","doi-asserted-by":"crossref","unstructured":"Aner, A., & Kender, J. R. (2002). Video Summaries through mosaic-based shot and scene clustering. In ECCV.","DOI":"10.1007\/3-540-47979-1_26"},{"issue":"3","key":"794_CR4","doi-asserted-by":"crossref","first-page":"500","DOI":"10.1109\/TPAMI.2010.143","volume":"33","author":"T Brox","year":"2011","unstructured":"Brox, T., & Malik, J. (2011). Large displacement optical flow: Descriptor matching in variational motion estimation. TPAMI, 33(3), 500\u2013513.","journal-title":"TPAMI"},{"key":"794_CR5","doi-asserted-by":"crossref","unstructured":"Carreira, J., & Sminchisescu, C. (2010). Constrained parametric min-cuts for automatic object segmentation. In CVPR.","DOI":"10.1109\/CVPR.2010.5540063"},{"key":"794_CR6","doi-asserted-by":"crossref","unstructured":"Caspi, Y., Axelrod, A., Matsushita, Y., & Gamliel, A. (2006). Dynamic stills and clip trailer. The Visual Computer, 22(9), 642\u2013652","DOI":"10.1007\/s00371-006-0046-y"},{"key":"794_CR7","doi-asserted-by":"crossref","unstructured":"Cheng, M.-M., Zhang, Z., Lin, W.-Y., & Torr, P. (2014). BING: Binarized normed gradients for objectness estimation at 300fpsn. In CVPR.","DOI":"10.1109\/CVPR.2014.414"},{"key":"794_CR8","doi-asserted-by":"crossref","unstructured":"Clarkson, B., & Pentland, A. (1999). Unsupervised clustering of ambulatory audio and video. In ICASSP.","DOI":"10.1109\/ICASSP.1999.757481"},{"key":"794_CR9","doi-asserted-by":"crossref","unstructured":"Doherty, A., & Smeaton, A. (2008). Combining face detection and novelty to identify important events in a visual lifelog. In International Conference on Computer and Information Technology Workshops.","DOI":"10.1109\/CIT.2008.Workshops.31"},{"key":"794_CR10","doi-asserted-by":"crossref","unstructured":"Doherty, A., Byrne, D., Smeaton, A., Jones, G., & Hughes, M. (2008). Investigating keyframe selection methods in the novel domain of passively captured visual lifelogs. In CIVR.","DOI":"10.1145\/1386352.1386389"},{"key":"794_CR11","doi-asserted-by":"crossref","unstructured":"Endres, I., & Hoiem, D. (2010). Category independent object proposals. In ECCV.","DOI":"10.1007\/978-3-642-15555-0_42"},{"key":"794_CR12","doi-asserted-by":"crossref","unstructured":"Fathi, A., Farhadi, A., & Rehg, J. (2011). Understanding egocentric activities. In ICCV.","DOI":"10.1109\/ICCV.2011.6126269"},{"key":"794_CR13","doi-asserted-by":"crossref","unstructured":"Fathi, A., Hodgins, J. K., & Rehg, J. M. (2012). Social interactions: A first-person perspective. In CVPR.","DOI":"10.1109\/CVPR.2012.6247805"},{"key":"794_CR14","doi-asserted-by":"crossref","unstructured":"Felzenszwalb, P., & Huttenlocher, D. (2004). Efficient graph-based image segmentation. IJCV, 59(2), 167\u2013181.","DOI":"10.1023\/B:VISI.0000022288.19776.77"},{"key":"794_CR15","unstructured":"Gao, D., Mahadevan, V., & Vasconcelos, N. (2007). The discriminant center-surround hypothesis for bottom-up saliency. In NIPS."},{"key":"794_CR16","doi-asserted-by":"crossref","unstructured":"Goldman, D., Curless, B., Salesin, D., & Seitz, S. (2006). Schematic storyboarding for video visualization and editing. In SIGGRAPH.","DOI":"10.1145\/1179352.1141967"},{"key":"794_CR17","doi-asserted-by":"crossref","unstructured":"Healey, J., & Picard, R. (1998). Startlecam: A cybernetic wearable camera. In Wearable Computers.","DOI":"10.1109\/ISWC.1998.729528"},{"key":"794_CR18","doi-asserted-by":"crossref","unstructured":"Hodges, S., Williams, L., Berry, E., Izadi, S., Srinivasan, J., Butler, A., Smyth, G., Kapur, N., & Wood, K. (2006). SenseCam: A retrospective memory aid. In UBICOMP.","DOI":"10.1007\/11853565_11"},{"key":"794_CR19","doi-asserted-by":"crossref","unstructured":"Hodges, S., Berry, E., & Wood, K. (2011). Sensecam: A wearable camera which stimulates and rehabilitates autobiographical memory. Memory, 19(7), 685\u2013696.","DOI":"10.1080\/09658211.2011.605591"},{"key":"794_CR20","doi-asserted-by":"crossref","unstructured":"Huynh, T., Fritz, M., & Schiele, B. (2008). Discovery of activity patterns using topic models. In UBICOMP.","DOI":"10.1145\/1409635.1409638"},{"key":"794_CR21","doi-asserted-by":"crossref","unstructured":"Hwang, S. J., & Grauman, K. (2010). Accounting for the relative importance of objects in image retrieval. In BMVC.","DOI":"10.5244\/C.24.58"},{"key":"794_CR22","doi-asserted-by":"crossref","unstructured":"Itti, L., Koch, C., & Niebur, E. (1998). A model of saliency-based visual attention for rapid scene analysis. TPAMI, 20(11), 1254\u20131259.","DOI":"10.1109\/34.730558"},{"key":"794_CR23","unstructured":"Jojic, N., Perina, A., & Murino, V. (2010). Structural epitome: A way to summarize one\u2019s visual experience. In NIPS."},{"key":"794_CR24","doi-asserted-by":"crossref","unstructured":"Jones, M., & Rehg, J. (2002). Statistical color models with application to skin detection. IJCV, 46(1), 81\u201396.","DOI":"10.1023\/A:1013200319198"},{"key":"794_CR25","doi-asserted-by":"crossref","unstructured":"Kitani, K., Okabe, T., Sato, Y., & Sugimoto, A. (2011). Fast Unsupervised Ego-Action Learning for First-Person Sports Video. In CVPR.","DOI":"10.1109\/CVPR.2011.5995406"},{"key":"794_CR26","doi-asserted-by":"crossref","unstructured":"Kolsch, M., & Turk, M. (2004). Robust hand detection. In FG.","DOI":"10.1109\/AFGR.2004.1301601"},{"key":"794_CR27","doi-asserted-by":"crossref","unstructured":"Lee, M., & Dey, A. (2007). Providing good memory cues for people with episodic memory impairment. In ACM SIGACCESS Conference on Computers and Accessibility.","DOI":"10.1145\/1296843.1296867"},{"key":"794_CR28","doi-asserted-by":"crossref","unstructured":"Lee, Y. J., Kim, J., & Grauman, K. (2011). Key-segments for video object segmentation. In ICCV.","DOI":"10.1109\/ICCV.2011.6126471"},{"key":"794_CR29","unstructured":"Lee, Y. J., Ghosh, J., & Grauman, K. (2012). Discovering important people and objects for egocentric video summarization. In CVPR."},{"key":"794_CR30","doi-asserted-by":"crossref","unstructured":"Li, C., & Kitani, K. M. (2013). Pixel-level hand detection for ego-centric videos. In CVPR.","DOI":"10.1109\/CVPR.2013.458"},{"key":"794_CR31","doi-asserted-by":"crossref","unstructured":"Li, Y., Fathi, A. & Rehg, J. M. (2013). Learning to predict gaze in egocentric video. In ICCV.","DOI":"10.1109\/ICCV.2013.399"},{"key":"794_CR32","doi-asserted-by":"crossref","unstructured":"Lin, W., & Hauptmann, A. (2006). Structuring continuous video recordings of everyday life using time-constrained clustering. In IS&T\/SPIE Symposium on Electronic Imaging.","DOI":"10.1117\/12.642009"},{"key":"794_CR33","doi-asserted-by":"crossref","unstructured":"Liu, T., & Kender, J. R. (2002). Optimization algorithms for the selection of key frame sequences of variable length. In ECCV.","DOI":"10.1007\/3-540-47979-1_27"},{"key":"794_CR34","doi-asserted-by":"crossref","unstructured":"Liu, T., Sun, J., Zheng, N., Tang, X., & Shum, H. (2007). Learning to detect a salient object. In CVPR.","DOI":"10.1109\/CVPR.2007.383047"},{"key":"794_CR35","unstructured":"Liu, D., Hua, G., & Chen, T. (2009). A hierarchical visual model for video object summarization. In TPAMI."},{"key":"794_CR36","doi-asserted-by":"crossref","unstructured":"Lowe, D. (2004). Distinctive image features from scale-invariant keypoints. IJCV, 60(2), 91\u2013110.","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"794_CR37","doi-asserted-by":"crossref","unstructured":"Lu, Z., & Grauman, K. (2013). Story-driven summarization for egocentric video. In CVPR.","DOI":"10.1109\/CVPR.2013.350"},{"key":"794_CR38","doi-asserted-by":"crossref","unstructured":"Mann, S. (1998). Wearcam (the wearable camera): Personal imaging systems for long term use in wearable tetherless computer mediated reality and personal photo\/videographic memory prosthesis. In Wearable Computers.","DOI":"10.1109\/ISWC.1998.729538"},{"issue":"2","key":"794_CR39","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1016\/j.jvcir.2007.04.002","volume":"19","author":"A Money","year":"2008","unstructured":"Money, A., & Agius, H. (2008). Video summarisation: A conceptual framework and survey of the state of the art. Journal of Visual Communication and Image Representation, 19(2), 121\u2013143.","journal-title":"Journal of Visual Communication and Image Representation"},{"key":"794_CR40","unstructured":"Ng, H. W., Sawahata, Y., & Aizawa, K. (2002). Summarizing wearable videos using support vector machine. In ICME."},{"key":"794_CR41","doi-asserted-by":"crossref","unstructured":"Perona, P., & Freeman, W. (1998). A factorization approach to grouping. In ECCV.","DOI":"10.1007\/BFb0055696"},{"key":"794_CR42","doi-asserted-by":"crossref","unstructured":"Pirsiavash, H., & Ramanan, D. (2012). Detecting activities of daily living in first-person camera views. In CVPR.","DOI":"10.1109\/CVPR.2012.6248010"},{"key":"794_CR43","doi-asserted-by":"crossref","unstructured":"Pritch, Y., Rav-Acha, A., Gutman, A., & Peleg, S. (2007). Webcam synopsis: Peeking around the world. In ICCV.","DOI":"10.1109\/ICCV.2007.4408934"},{"key":"794_CR44","unstructured":"Rav-Acha, A., Pritch, Y., & Peleg, S. (2006). Making a long video short. In CVPR."},{"key":"794_CR45","doi-asserted-by":"crossref","unstructured":"Ren, X., & Gu, C. (2010). Figure-ground segmentation improves handled object recognition in egocentric video. In CVPR.","DOI":"10.1109\/CVPR.2010.5540074"},{"key":"794_CR46","doi-asserted-by":"crossref","unstructured":"Ryoo, M. S., & Matthies, L. (2013). First-person activity recognition: What are they doing to me? In CVPR.","DOI":"10.1109\/CVPR.2013.352"},{"key":"794_CR47","doi-asserted-by":"crossref","unstructured":"Simakov, D., Caspi, Y., Shechtman, E., & Irani, M. (2008). Summarizing visual data using bidirectional similarity. In CVPR.","DOI":"10.1109\/CVPR.2008.4587842"},{"key":"794_CR48","doi-asserted-by":"crossref","unstructured":"Spain, M., & Perona, P. (2008). Some objects are more equal than others: Measuring and predicting importance. In ECCV.","DOI":"10.1007\/978-3-540-88682-2_40"},{"key":"794_CR49","doi-asserted-by":"crossref","unstructured":"Spriggs, E., la Torre, F. D., & Hebert, M. (2009). Temporal segmentation and activity classification from first-person sensing. In CVPR Workshop on Egocentric Vision.","DOI":"10.1109\/CVPRW.2009.5204354"},{"key":"794_CR50","doi-asserted-by":"crossref","unstructured":"Starner, T., Schiele, B., & Pentland, A. (1998a). Visual contextual awareness in wearable computing. In ISWC.","DOI":"10.1109\/ISWC.1998.729529"},{"key":"794_CR51","doi-asserted-by":"crossref","unstructured":"Starner, T., Weaver, J., & Pentland, A. (1998b). Real-time american sign language recognition using desk and wearable computer based video. PAMI, 20(12), 1371\u20131375.","DOI":"10.1109\/34.735811"},{"key":"794_CR52","doi-asserted-by":"crossref","unstructured":"Viola, P., & Jones, M. (2001). Rapid object detection using a boosted cascade of simple features. In CVPR.","DOI":"10.1109\/CVPR.2001.990517"},{"key":"794_CR53","doi-asserted-by":"crossref","first-page":"1395","DOI":"10.1016\/j.neunet.2006.10.001","volume":"19","author":"D Walther","year":"2006","unstructured":"Walther, D., & Koch, C. (2006). Modeling attention to salient proto-objects. Neural Networks, 19, 1395\u20131407.","journal-title":"Neural Networks"},{"key":"794_CR54","doi-asserted-by":"crossref","unstructured":"Weng, F., & Merialdo, B. (2009). Multi-document video summarization. In ICME.","DOI":"10.1109\/ICME.2009.5202747"},{"key":"794_CR55","unstructured":"Wolf, W. (1996). Keyframe selection by motion analysis. In ICASSP."},{"key":"794_CR56","doi-asserted-by":"crossref","unstructured":"Zhang, H. J., Wu, J., Zhong, D., & Smoliar, S. (1997). An integrated system for content-based video retrieval and browsing. In Pattern Recognition.","DOI":"10.1016\/S0031-3203(96)00109-4"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-014-0794-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-014-0794-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-014-0794-5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,19]],"date-time":"2019-08-19T04:18:10Z","timestamp":1566188290000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-014-0794-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,1,7]]},"references-count":56,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2015,8]]}},"alternative-id":["794"],"URL":"https:\/\/doi.org\/10.1007\/s11263-014-0794-5","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,1,7]]}}}