{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T15:31:19Z","timestamp":1774539079968,"version":"3.50.1"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030585471","type":"print"},{"value":"9783030585488","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58548-8_25","type":"book-chapter","created":{"date-parts":[[2020,10,28]],"date-time":"2020-10-28T23:02:42Z","timestamp":1603926162000},"page":"420-437","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":108,"title":["SF-Net: Single-Frame Supervision for Temporal Action Localization"],"prefix":"10.1007","author":[{"given":"Fan","family":"Ma","sequence":"first","affiliation":[]},{"given":"Linchao","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Yi","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Shengxin","family":"Zha","sequence":"additional","affiliation":[]},{"given":"Gourab","family":"Kundu","sequence":"additional","affiliation":[]},{"given":"Matt","family":"Feiszli","sequence":"additional","affiliation":[]},{"given":"Zheng","family":"Shou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,10,29]]},"reference":[{"key":"25_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1007\/978-3-030-01240-3_16","volume-title":"Computer Vision \u2013 ECCV 2018","author":"H Alwassel","year":"2018","unstructured":"Alwassel, H., Caba Heilbron, F., Ghanem, B.: Action search: spotting actions in videos and its application to temporal action localization. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11213, pp. 253\u2013269. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01240-3_16"},{"key":"25_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"549","DOI":"10.1007\/978-3-319-46478-7_34","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Bearman","year":"2016","unstructured":"Bearman, A., Russakovsky, O., Ferrari, V., Fei-Fei, L.: What\u2019s the point: semantic segmentation with point supervision. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9911, pp. 549\u2013565. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_34"},{"key":"25_CR3","doi-asserted-by":"crossref","unstructured":"Caba Heilbron, F., Escorcia, V., Ghanem, B., Carlos Niebles, J.: ActivityNet: a large-scale video benchmark for human activity understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 961\u2013970 (2015)","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"25_CR4","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), July 2017","DOI":"10.1109\/CVPR.2017.502"},{"key":"25_CR5","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"25_CR6","doi-asserted-by":"crossref","unstructured":"Chao, Y.W., Vijayanarasimhan, S., Seybold, B., Ross, D.A., Deng, J., Sukthankar, R.: Rethinking the faster R-CNN architecture for temporal action localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1130\u20131139 (2018)","DOI":"10.1109\/CVPR.2018.00124"},{"key":"25_CR7","unstructured":"Ch\u00e9ron, G., Alayrac, J.B., Laptev, I., Schmid, C.: A flexible model for training action localization with varying levels of supervision. In: Advances in Neural Information Processing Systems, pp. 942\u2013953 (2018)"},{"key":"25_CR8","doi-asserted-by":"crossref","unstructured":"Dai, X., Singh, B., Zhang, G., Davis, L.S., Qiu Chen, Y.: Temporal context network for activity localization in videos. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5793\u20135802 (2017)","DOI":"10.1109\/ICCV.2017.610"},{"key":"25_CR9","doi-asserted-by":"crossref","unstructured":"Damen, D., Leelasawassuk, T., Haines, O., Calway, A., Mayol-Cuevas, W.W.: You-Do, I-Learn: discovering task relevant objects and their modes of interaction from multi-user egocentric video. In: BMVC, vol. 2, p. 3 (2014)","DOI":"10.5244\/C.28.30"},{"key":"25_CR10","unstructured":"Ding, L., Xu, C.: Weakly-supervised action segmentation with iterative soft boundary assignment. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6508\u20136516 (2018)"},{"key":"25_CR11","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: SlowFast networks for video recognition. In: The IEEE International Conference on Computer Vision (ICCV), October 2019","DOI":"10.1109\/ICCV.2019.00630"},{"key":"25_CR12","doi-asserted-by":"crossref","unstructured":"Gao, J., Yang, Z., Nevatia, R.: Cascaded boundary regression for temporal action detection. arXiv preprint arXiv:1705.01180 (2017)","DOI":"10.5244\/C.31.52"},{"key":"25_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.cviu.2016.10.018","volume":"155","author":"H Idrees","year":"2017","unstructured":"Idrees, H., et al.: The THUMOS challenge on action recognition for videos \u201cin the wild\u201d. Comput. Vis. Image Underst. 155, 1\u201323 (2017)","journal-title":"Comput. Vis. Image Underst."},{"key":"25_CR14","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"25_CR15","doi-asserted-by":"crossref","unstructured":"Lei, P., Todorovic, S.: Temporal deformable residual networks for action segmentation in videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6742\u20136751 (2018)","DOI":"10.1109\/CVPR.2018.00705"},{"key":"25_CR16","doi-asserted-by":"crossref","unstructured":"Lin, T., Liu, X., Li, X., Ding, E., Wen, S.: BMN: boundary-matching network for temporal action proposal generation. In: The IEEE International Conference on Computer Vision (ICCV), October 2019","DOI":"10.1109\/ICCV.2019.00399"},{"key":"25_CR17","doi-asserted-by":"crossref","unstructured":"Lin, T., Zhao, X., Shou, Z.: Single shot temporal action detection. In: Proceedings of the 25th ACM International Conference on Multimedia, pp. 988\u2013996. ACM (2017)","DOI":"10.1145\/3123266.3123343"},{"key":"25_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-01225-0_1","volume-title":"Computer Vision \u2013 ECCV 2018","author":"T Lin","year":"2018","unstructured":"Lin, T., Zhao, X., Su, H., Wang, C., Yang, M.: BSN: boundary sensitive network for temporal action proposal generation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11208, pp. 3\u201321. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01225-0_1"},{"key":"25_CR19","doi-asserted-by":"crossref","unstructured":"Liu, D., Jiang, T., Wang, Y.: Completeness modeling and context separation for weakly supervised temporal action localization. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2019","DOI":"10.1109\/CVPR.2019.00139"},{"key":"25_CR20","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Weakly supervised temporal action localization through contrast based evaluation networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3899\u20133908 (2019)","DOI":"10.1109\/ICCV.2019.00400"},{"key":"25_CR21","doi-asserted-by":"crossref","unstructured":"Long, F., Yao, T., Qiu, Z., Tian, X., Luo, J., Mei, T.: Gaussian temporal awareness networks for action localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 344\u2013353 (2019)","DOI":"10.1109\/CVPR.2019.00043"},{"key":"25_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"437","DOI":"10.1007\/978-3-319-46454-1_27","volume-title":"Computer Vision \u2013 ECCV 2016","author":"P Mettes","year":"2016","unstructured":"Mettes, P., van Gemert, J.C., Snoek, C.G.M.: Spot on: action localization from pointly-supervised proposals. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9909, pp. 437\u2013453. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46454-1_27"},{"key":"25_CR23","doi-asserted-by":"crossref","unstructured":"Moltisanti, D., Fidler, S., Damen, D.: Action recognition from single timestamp supervision in untrimmed videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 9915\u20139924 (2019)","DOI":"10.1109\/CVPR.2019.01015"},{"key":"25_CR24","doi-asserted-by":"crossref","unstructured":"Narayan, S., Cholakkal, H., Khan, F.S., Shao, L.: 3C-Net: category count and center loss for weakly-supervised action localization. In: The IEEE International Conference on Computer Vision (ICCV), October 2019","DOI":"10.1109\/ICCV.2019.00877"},{"key":"25_CR25","doi-asserted-by":"crossref","unstructured":"Nguyen, P., Liu, T., Prasad, G., Han, B.: Weakly supervised action localization by sparse temporal pooling network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6752\u20136761 (2018)","DOI":"10.1109\/CVPR.2018.00706"},{"key":"25_CR26","doi-asserted-by":"crossref","unstructured":"Nguyen, P.X., Ramanan, D., Fowlkes, C.C.: Weakly-supervised action localization with background modeling. In: The IEEE International Conference on Computer Vision (ICCV), October 2019","DOI":"10.1109\/ICCV.2019.00560"},{"key":"25_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"588","DOI":"10.1007\/978-3-030-01225-0_35","volume-title":"Computer Vision \u2013 ECCV 2018","author":"S Paul","year":"2018","unstructured":"Paul, S., Roy, S., Roy-Chowdhury, A.K.: W-TALC: weakly-supervised temporal activity localization and classification. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11208, pp. 588\u2013607. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01225-0_35"},{"key":"25_CR28","doi-asserted-by":"crossref","unstructured":"Shou, Z., Chan, J., Zareian, A., Miyazawa, K., Chang, S.F.: CDC: convolutional-de-convolutional networks for precise temporal action localization in untrimmed videos. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), July 2017","DOI":"10.1109\/CVPR.2017.155"},{"key":"25_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1007\/978-3-030-01270-0_10","volume-title":"Computer Vision \u2013 ECCV 2018","author":"Z Shou","year":"2018","unstructured":"Shou, Z., Gao, H., Zhang, L., Miyazawa, K., Chang, S.-F.: AutoLoc: weakly-supervised temporal action localization in untrimmed videos. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11220, pp. 162\u2013179. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01270-0_10"},{"key":"25_CR30","doi-asserted-by":"crossref","unstructured":"Shou, Z., Wang, D., Chang, S.F.: Temporal action localization in untrimmed videos via multi-stage CNNs. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2016","DOI":"10.1109\/CVPR.2016.119"},{"key":"25_CR31","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: Advances in Neural Information Processing Systems, pp. 568\u2013576 (2014)"},{"key":"25_CR32","doi-asserted-by":"crossref","unstructured":"Singh, K.K., Lee, Y.J.: Hide-and-seek: forcing a network to be meticulous for weakly-supervised object and action localization. In: 2017 IEEE International Conference on Computer Vision (ICCV), pp. 3544\u20133553. IEEE (2017)","DOI":"10.1109\/ICCV.2017.381"},{"key":"25_CR33","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3D convolutional networks. In: The IEEE International Conference on Computer Vision (ICCV), December 2015","DOI":"10.1109\/ICCV.2015.510"},{"key":"25_CR34","doi-asserted-by":"crossref","unstructured":"Wang, H., Schmid, C.: Action recognition with improved trajectories. In: The IEEE International Conference on Computer Vision (ICCV), December 2013","DOI":"10.1109\/ICCV.2013.441"},{"key":"25_CR35","doi-asserted-by":"crossref","unstructured":"Wang, L., Xiong, Y., Lin, D., Van Gool, L.: Untrimmednets for weakly supervised action recognition and detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4325\u20134334 (2017)","DOI":"10.1109\/CVPR.2017.678"},{"key":"25_CR36","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/978-3-319-46484-8_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"L Wang","year":"2016","unstructured":"Wang, L., et al.: Temporal segment networks: towards good practices for deep action recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 20\u201336. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_2"},{"key":"25_CR37","doi-asserted-by":"crossref","unstructured":"Xu, H., Das, A., Saenko, K.: R-C3D: region convolutional 3D network for temporal activity detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5783\u20135792 (2017)","DOI":"10.1109\/ICCV.2017.617"},{"key":"25_CR38","doi-asserted-by":"crossref","unstructured":"Yuan, Z., Stroud, J.C., Lu, T., Deng, J.: Temporal action localization by structured maximal sums. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3684\u20133692 (2017)","DOI":"10.1109\/CVPR.2017.342"},{"key":"25_CR39","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1007\/978-3-540-74936-3_22","volume-title":"Pattern Recognition","author":"C Zach","year":"2007","unstructured":"Zach, C., Pock, T., Bischof, H.: A duality based approach for realtime TV-L$$^1$$ optical flow. In: Hamprecht, F.A., Schn\u00f6rr, C., J\u00e4hne, B. (eds.) DAGM 2007. LNCS, vol. 4713, pp. 214\u2013223. Springer, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-74936-3_22"},{"key":"25_CR40","doi-asserted-by":"crossref","unstructured":"Zeng, R., et al.: Graph convolutional networks for temporal action localization. In: The IEEE International Conference on Computer Vision (ICCV), October 2019","DOI":"10.1109\/ICCV.2019.00719"},{"key":"25_CR41","doi-asserted-by":"crossref","unstructured":"Zhao, H., Torralba, A., Torresani, L., Yan, Z.: HACS: human action clips and segments dataset for recognition and temporal localization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 8668\u20138678 (2019)","DOI":"10.1109\/ICCV.2019.00876"},{"key":"25_CR42","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Xiong, Y., Wang, L., Wu, Z., Tang, X., Lin, D.: Temporal action detection with structured segment networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2914\u20132923 (2017)","DOI":"10.1109\/ICCV.2017.317"},{"key":"25_CR43","doi-asserted-by":"crossref","unstructured":"Zhu, L., Yang, Y.: ActBERT: learning global-local video-text representations. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.00877"},{"key":"25_CR44","doi-asserted-by":"publisher","unstructured":"Zhu, L., Yang, Y.: Label independent memory for semi-supervised few-shot video classification. IEEE Trans. Pattern Anal. Mach. Intell. (2020). https:\/\/doi.org\/10.1109\/TPAMI.2020.3007511","DOI":"10.1109\/TPAMI.2020.3007511"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58548-8_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:10:44Z","timestamp":1730160644000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58548-8_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585471","9783030585488"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58548-8_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"29 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}