{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,13]],"date-time":"2025-11-13T07:08:06Z","timestamp":1763017686287,"version":"3.40.3"},"publisher-location":"Cham","reference-count":51,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319464534"},{"type":"electronic","value":"9783319464541"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-46454-1_27","type":"book-chapter","created":{"date-parts":[[2016,9,15]],"date-time":"2016-09-15T09:15:09Z","timestamp":1473930909000},"page":"437-453","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":55,"title":["Spot On: Action Localization from Pointly-Supervised Proposals"],"prefix":"10.1007","author":[{"given":"Pascal","family":"Mettes","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jan C.","family":"van Gemert","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cees G. M.","family":"Snoek","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,9,16]]},"reference":[{"key":"27_CR1","doi-asserted-by":"crossref","unstructured":"Tian, Y., Sukthankar, R., Shah, M.: Spatiotemporal deformable part models for action detection. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.341"},{"key":"27_CR2","doi-asserted-by":"crossref","unstructured":"Jain, M., Van Gemert, J., J\u00e9gou, H., Bouthemy, P., Snoek, C.G.M.: Action localization with tubelets from motion. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.100"},{"key":"27_CR3","doi-asserted-by":"crossref","unstructured":"Yu, G., Yuan, J.: Fast action proposals for human action detection and search. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298735"},{"key":"27_CR4","doi-asserted-by":"crossref","unstructured":"van Gemert, J.C., Jain, M., Gati, E., Snoek, C.G.M.: APT: action localization proposals from dense trajectories. In: BMVC (2015)","DOI":"10.5244\/C.29.177"},{"key":"27_CR5","doi-asserted-by":"crossref","unstructured":"Soomro, K., Idrees, H., Shah, M.: Action localization in videos through context walk. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.375"},{"key":"27_CR6","unstructured":"Kim, G., Torralba, A.: Unsupervised detection of regions of interest using iterative link analysis. In: NIPS (2009)"},{"key":"27_CR7","series-title":"Lecture Notes in Computer Science","first-page":"1","volume-title":"Computer Vision \u2013 ECCV 2012","author":"O Russakovsky","year":"2012","unstructured":"Russakovsky, O., Lin, Y., Yu, K., Fei-Fei, L.: Object-centric spatial pooling for image classification. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012, Part II. LNCS, vol. 7573, pp. 1\u201315. Springer, Heidelberg (2012)"},{"key":"27_CR8","doi-asserted-by":"crossref","unstructured":"Cinbis, R.G., Verbeek, J., Schmid, C.: Multi-fold MIL training for weakly supervised object localization. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.309"},{"key":"27_CR9","doi-asserted-by":"crossref","unstructured":"Nguyen, M., Torresani, L., de la Torre, F., Rother, C.: Weakly supervised discriminative localization and classification: a joint learning process. In: ICCV (2009)","DOI":"10.21236\/ADA507101"},{"key":"27_CR10","unstructured":"Andrews, S., Tsochantaridis, I., Hofmann, T.: Support vector machines for multiple-instance learning. In: NIPS (2002)"},{"key":"27_CR11","doi-asserted-by":"crossref","unstructured":"Xu, J., Schwing, A.G., Urtasun, R.: Learning to segment under various forms of weak supervision. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7299002"},{"key":"27_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"549","DOI":"10.1007\/978-3-319-46478-7_34","volume-title":"ECCV 2016, Part VII","author":"A Bearman","year":"2016","unstructured":"Bearman, A., Russakovsky, O., Ferrari, V., Fei-Fei, L.: What\u2019s the point: semantic segmentation with point supervision. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016, Part VII. LNCS, vol. 9909, pp. 549\u2013565. Springer, Heidelberg (2016)"},{"key":"27_CR13","doi-asserted-by":"crossref","unstructured":"Marsza\u0142ek, M., Laptev, I., Schmid, C.: Actions in context. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206557"},{"key":"27_CR14","unstructured":"Lan, T., Wang, Y., Mori, G.: Discriminative figure-centric models for joint action localization and recognition. In: ICCV (2011)"},{"key":"27_CR15","doi-asserted-by":"crossref","unstructured":"Gkioxari, G., Malik, J.: Finding action tubes. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298676"},{"key":"27_CR16","doi-asserted-by":"crossref","unstructured":"Weinzaepfel, P., Harchaoui, Z., Schmid, C.: Learning to track for spatio-temporal action localization. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.362"},{"key":"27_CR17","unstructured":"Lu, J., Xu, R., Corso, J.J.: Human action segmentation with hierarchical supervoxel consistency. In: CVPR (2015)"},{"key":"27_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"565","DOI":"10.1007\/978-3-319-10602-1_37","volume-title":"Computer Vision \u2013 ECCV 2014","author":"L Wang","year":"2014","unstructured":"Wang, L., Qiao, Y., Tang, X.: Video action detection with relational dynamic-poselets. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014, Part V. LNCS, vol. 8693, pp. 565\u2013580. Springer, Heidelberg (2014)"},{"key":"27_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"737","DOI":"10.1007\/978-3-319-10578-9_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"D Oneata","year":"2014","unstructured":"Oneata, D., Revaud, J., Verbeek, J., Schmid, C.: Spatio-temporal object detection proposals. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014, Part III. LNCS, vol. 8691, pp. 737\u2013752. Springer, Heidelberg (2014)"},{"key":"27_CR20","doi-asserted-by":"crossref","unstructured":"Chen, W., Corso, J.J.: Action detection by implicit intentional motion clustering. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.377"},{"key":"27_CR21","doi-asserted-by":"crossref","unstructured":"Marian Puscas, M., Sangineto, E., Culibrk, D., Sebe, N.: Unsupervised tube extraction using transductive learning and dense trajectories. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.193"},{"key":"27_CR22","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1007\/978-3-319-09396-3_9","volume-title":"Computer Vision in Sports","author":"Khurram Soomro","year":"2014","unstructured":"Soomro, K., Zamir, A.R.: Action recognition in realistic sports videos. In: Moeslund, T.B., Thomas, G., Hilton, A. (eds.) Computer Vision in Sports, pp 181-208. Springer, Heidelberg (2014)"},{"key":"27_CR23","doi-asserted-by":"crossref","unstructured":"Raptis, M., Kokkinos, I., Soatto, S.: Discovering discriminative action parts from mid-level video representations. In: CVPR (2012)","DOI":"10.1109\/CVPR.2012.6247807"},{"key":"27_CR24","doi-asserted-by":"crossref","unstructured":"Cao, L., Liu, Z., Huang, T.S.: Cross-dataset action detection. In: CVPR (2010)","DOI":"10.1109\/CVPR.2010.5539875"},{"key":"27_CR25","unstructured":"Soomro, K., Zamir, A.R., Shah, M.: Ucf101: A dataset of 101 human actions classes from videos in the wild (2012). \n                      arXiv:1212.0402"},{"key":"27_CR26","doi-asserted-by":"crossref","unstructured":"Zhang, W., Zhu, M., Derpanis, K.: From actemes to action: a strongly-supervised representation for detailed action understanding. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.280"},{"key":"27_CR27","doi-asserted-by":"crossref","unstructured":"Jhuang, H., Gall, J., Zuffi, S., Schmid, C., Black, M.: Towards understanding action recognition. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.396"},{"key":"27_CR28","unstructured":"Gorban, A., Idrees, H., Jiang, Y., Zamir, A.R., Laptev, I., Shah, M., Sukthankar, R.: Thumos challenge: action recognition with a large number of classes. In: CVPR Workshop (2015)"},{"key":"27_CR29","doi-asserted-by":"crossref","unstructured":"Karpathy, A., Toderici, G., Shetty, S., Leung, T., Sukthankar, R., Fei-Fei, L.: Large-scale video classification with convolutional neural networks. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.223"},{"key":"27_CR30","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Jhuang, H., Garrote, E., Poggio, T., Serre, T.: HMDB: a large video database for human motion recognition. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"27_CR31","unstructured":"Mihalcik, D., Doermann, D.: The design and implementation of viper. Technical report (2003)"},{"issue":"1","key":"27_CR32","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1007\/s11263-012-0564-1","volume":"101","author":"C Vondrick","year":"2013","unstructured":"Vondrick, C., Patterson, D., Ramanan, D.: Efficiently scaling up crowdsourced video annotation. IJCV 101(1), 184\u2013204 (2013)","journal-title":"IJCV"},{"key":"27_CR33","doi-asserted-by":"crossref","unstructured":"Yuen, J., Russell, B., Liu, C., Torralba, A.: Labelme video: building a video database with human annotations. In: ICCV (2009)","DOI":"10.1109\/ICCV.2009.5459289"},{"key":"27_CR34","unstructured":"Settles, B.: Active Learning Literature Survey, vol. 52, pp. 55\u201366. University of Wisconsin, Madison (2010)"},{"key":"27_CR35","unstructured":"Vondrick, C., Ramanan, D.: Video annotation and tracking with active learning. In: NIPS (2011)"},{"key":"27_CR36","first-page":"88","volume":"131","author":"S Bianco","year":"2015","unstructured":"Bianco, S., Ciocca, G., Napoletano, P., Schettini, R.: An interactive tool for manual, semi-automatic and automatic video annotation. CVIU 131, 88\u201399 (2015)","journal-title":"CVIU"},{"key":"27_CR37","doi-asserted-by":"crossref","unstructured":"Bilen, H., Pedersoli, M., Tuytelaars, T.: Weakly supervised object detection with convex clustering. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298711"},{"key":"27_CR38","doi-asserted-by":"crossref","unstructured":"Oquab, M., Bottou, L., Laptev, I., Sivic, J.: Is object localization for free? - weakly-supervised learning with convolutional neural networks. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298668"},{"key":"27_CR39","doi-asserted-by":"crossref","unstructured":"Cho, M., Kwak, S., Schmid, C., Ponce, J.: Unsupervised object discovery and localization in the wild: part-based matching with bottom-up region proposals. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298724"},{"key":"27_CR40","unstructured":"Ali, K., Hasler, D., Fleuret, F.: Flowboost - appearance learning from sparsely annotated video. In: CVPR (2011)"},{"key":"27_CR41","doi-asserted-by":"crossref","unstructured":"Misra, I., Shrivastava, A., Hebert, M.: Watch and learn: semi-supervised learning for object detectors from video. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298982"},{"key":"27_CR42","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"640","DOI":"10.1007\/978-3-319-10593-2_42","volume-title":"Computer Vision \u2013 ECCV 2014","author":"L Wang","year":"2014","unstructured":"Wang, L., Hua, G., Sukthankar, R., Xue, J., Zheng, N.: Video object discovery and co-segmentation with extremely weak supervision. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014, Part IV. LNCS, vol. 8692, pp. 640\u2013655. Springer, Heidelberg (2014)"},{"key":"27_CR43","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"594","DOI":"10.1007\/978-3-642-33712-3_43","volume-title":"Computer Vision \u2013 ECCV 2012","author":"P Siva","year":"2012","unstructured":"Siva, P., Russell, C., Xiang, T.: In defence of negative mining for annotating weakly labelled data. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012, Part III. LNCS, vol. 7574, pp. 594\u2013608. Springer, Heidelberg (2012)"},{"key":"27_CR44","doi-asserted-by":"crossref","unstructured":"Kwak, S., Cho, M., Laptev, I., Ponce, J., Schmid, C.: Unsupervised object discovery and tracking in video collections. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.363"},{"key":"27_CR45","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1007\/978-3-319-16814-2_16","volume-title":"Computer Vision \u2013 ACCV 2014","author":"E Adeli Mosabbeb","year":"2015","unstructured":"Adeli Mosabbeb, E., Cabral, R., De la Torre, F., Fathy, M.: Multi-label discriminative weakly-supervised human activity recognition and localization. In: Cremers, D., Reid, I., Saito, H., Yang, M.-H. (eds.) ACCV 2014. LNCS, vol. 9007, pp. 241\u2013258. Springer, Heidelberg (2015)"},{"key":"27_CR46","doi-asserted-by":"crossref","unstructured":"Siva, P., Xiang, T.: Weakly supervised action detection. In: BMVC (2011)","DOI":"10.5244\/C.25.65"},{"key":"27_CR47","doi-asserted-by":"crossref","unstructured":"Jain, M., van Gemert, J.C., Mensink, T., Snoek, C.G.M.: Objects2action: Classifying and localizing actions without any video example. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.521"},{"issue":"7","key":"27_CR48","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1167\/9.7.4","volume":"9","author":"PH Tseng","year":"2009","unstructured":"Tseng, P.H., Carmi, R., Cameron, I.G., Munoz, D.P., Itti, L.: Quantifying center bias of observers in free viewing of dynamic natural scenes. JoV 9(7), 4 (2009)","journal-title":"JoV"},{"key":"27_CR49","doi-asserted-by":"crossref","unstructured":"Rodriguez, M.D., Ahmed, J., Shah, M.: Action MACH: a spatio-temporal maximum average correlation height filter for action recognition. In: CVPR (2008)","DOI":"10.1109\/CVPR.2008.4587727"},{"key":"27_CR50","doi-asserted-by":"crossref","unstructured":"Wang, H., Schmid, C.: Action recognition with improved trajectories. In: ICCV (2013)","DOI":"10.1109\/ICCV.2013.441"},{"issue":"3","key":"27_CR51","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1007\/s11263-013-0636-x","volume":"105","author":"J S\u00e1nchez","year":"2013","unstructured":"S\u00e1nchez, J., Perronnin, F., Mensink, T., Verbeek, J.: Image classification with the fisher vector: theory and practice. IJCV 105(3), 222\u2013245 (2013)","journal-title":"IJCV"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2016"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-46454-1_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,10]],"date-time":"2020-10-10T01:43:59Z","timestamp":1602294239000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-46454-1_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319464534","9783319464541"],"references-count":51,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-46454-1_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016]]},"assertion":[{"value":"16 September 2016","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Amsterdam","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2016","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 October 2016","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 October 2016","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2016","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.eccv2016.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}