{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:18:28Z","timestamp":1775578708049,"version":"3.50.1"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030585259","type":"print"},{"value":"9783030585266","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58526-6_43","type":"book-chapter","created":{"date-parts":[[2020,10,6]],"date-time":"2020-10-06T21:03:07Z","timestamp":1602018187000},"page":"729-745","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":91,"title":["Weakly-Supervised Action Localization with Expectation-Maximization Multi-Instance Learning"],"prefix":"10.1007","author":[{"given":"Zhekun","family":"Luo","sequence":"first","affiliation":[]},{"given":"Devin","family":"Guillory","sequence":"additional","affiliation":[]},{"given":"Baifeng","family":"Shi","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Ke","sequence":"additional","affiliation":[]},{"given":"Fang","family":"Wan","sequence":"additional","affiliation":[]},{"given":"Trevor","family":"Darrell","sequence":"additional","affiliation":[]},{"given":"Huijuan","family":"Xu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,10,7]]},"reference":[{"key":"43_CR1","unstructured":"Alwassel, H., Heilbron, F.C., Thabet, A., Ghanem, B.: Refineloc: iterative refinement for weakly-supervised action localization. arXiv preprint arXiv:1904.00227 (2019)"},{"key":"43_CR2","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1016\/j.patcog.2017.10.009","volume":"77","author":"MA Carbonneau","year":"2018","unstructured":"Carbonneau, M.A., Cheplygina, V., Granger, E., Gagnon, G.: Multiple instance learning: a survey of problem characteristics and applications. Pattern Recogn. 77, 329\u2013353 (2018)","journal-title":"Pattern Recogn."},{"key":"43_CR3","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? a new model and the kinetics dataset. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4724\u20134733 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"43_CR4","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1016\/S0004-3702(96)00034-3","volume":"89","author":"T Dietterich","year":"1997","unstructured":"Dietterich, T., Lathrop, R., Lozano-Perez, T.: Solving the multiple instance problem with axis-parallel rectangles. Artif. Intell. 89, 31\u201371 (1997)","journal-title":"Artif. Intell."},{"key":"43_CR5","first-page":"3","volume":"3","author":"DR Dooly","year":"2001","unstructured":"Dooly, D.R., Zhang, Q., Goldman, S.A., Amar, R.A., Brodley, E., Danyluk, A.: Multiple-instance learning of real-valued data. J. Mach. Learn. Res. 3, 3\u201310 (2001)","journal-title":"J. Mach. Learn. Res."},{"key":"43_CR6","doi-asserted-by":"crossref","unstructured":"Gao, J., Yang, Z., Nevatia, R.: Cascaded boundary regression for temporal action detection. arXiv preprint arXiv:1705.01180 (2017)","DOI":"10.5244\/C.31.52"},{"key":"43_CR7","doi-asserted-by":"crossref","unstructured":"Heilbron, F.C., Escorcia, V., Ghanem, B., Niebles, J.C.: ActivityNet: a large-scale video benchmark for human activity understanding. In: IEEE Conference on Computer Vision and Pattern Recognition, pp. 961\u2013970 (2015)","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"43_CR8","unstructured":"Ilse, M., Tomczak, J.M., Welling, M.: Attention-based deep multiple instance learning. arXiv preprint arXiv:1802.04712 (2018)"},{"key":"43_CR9","unstructured":"Jiang, Y.G., et al.: THUMOS challenge: action recognition with a large number of classes. http:\/\/crcv.ucf.edu\/THUMOS14\/ (2014)"},{"key":"43_CR10","unstructured":"Keeler, J.D., Rumelhart, D.E., Leow, W.K.: Integrated segmentation and recognition of hand-printed numerals. In: Lippmann, R.P., Moody, J.E., Touretzky, D.S. (eds.) Advances in Neural Information Processing Systems 3, pp. 557\u2013563. Morgan-Kaufmann (1991)"},{"key":"43_CR11","doi-asserted-by":"crossref","unstructured":"Li, X., Kan, M., Shan, S., Chen, X.: Weakly supervised object detection with segmentation collaboration. In: The IEEE International Conference on Computer Vision (ICCV), October 2019","DOI":"10.1109\/ICCV.2019.00983"},{"key":"43_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-01225-0_1","volume-title":"Computer Vision \u2013 ECCV 2018","author":"T Lin","year":"2018","unstructured":"Lin, T., Zhao, X., Su, H., Wang, C., Yang, M.: BSN: boundary sensitive network for temporal action proposal generation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11208, pp. 3\u201321. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01225-0_1"},{"key":"43_CR13","doi-asserted-by":"crossref","unstructured":"Liu, D., Jiang, T., Wang, Y.: Completeness modeling and context separation for weakly supervised temporal action localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1298\u20131307 (2019)","DOI":"10.1109\/CVPR.2019.00139"},{"key":"43_CR14","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Weakly supervised temporal action localization through contrast based evaluation networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3899\u20133908 (2019)","DOI":"10.1109\/ICCV.2019.00400"},{"key":"43_CR15","unstructured":"Maron, O., Lozano-P\u00e9rez, T.: A framework for multiple-instance learning. In: Jordan, M.I., Kearns, M.J., Solla, S.A. (eds.) Advances in Neural Information Processing Systems 10, pp. 570\u2013576. MIT Press (1998)"},{"key":"43_CR16","doi-asserted-by":"crossref","unstructured":"Narayan, S., Cholakkal, H., Khan, F.S., Shao, L.: 3c-net: category count and center loss for weakly-supervised action localization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 8679\u20138687 (2019)","DOI":"10.1109\/ICCV.2019.00877"},{"key":"43_CR17","doi-asserted-by":"crossref","unstructured":"Nguyen, P., Liu, T., Prasad, G., Han, B.: Weakly supervised action localization by sparse temporal pooling network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6752\u20136761 (2018)","DOI":"10.1109\/CVPR.2018.00706"},{"key":"43_CR18","doi-asserted-by":"crossref","unstructured":"Nguyen, P.X., Ramanan, D., Fowlkes, C.C.: Weakly-supervised action localization with background modeling. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5502\u20135511 (2019)","DOI":"10.1109\/ICCV.2019.00560"},{"key":"43_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"588","DOI":"10.1007\/978-3-030-01225-0_35","volume-title":"Computer Vision \u2013 ECCV 2018","author":"S Paul","year":"2018","unstructured":"Paul, S., Roy, S., Roy-Chowdhury, A.K.: W-TALC: weakly-supervised temporal activity localization and classification. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11208, pp. 588\u2013607. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01225-0_35"},{"key":"43_CR20","doi-asserted-by":"crossref","unstructured":"Shou, Z., Chan, J., Zareian, A., Miyazawa, K., Chang, S.F.: CDC: convolutional-de-convolutional networks for precise temporal action localization in untrimmed videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5734\u20135743 (2017)","DOI":"10.1109\/CVPR.2017.155"},{"key":"43_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1007\/978-3-030-01270-0_10","volume-title":"Computer Vision \u2013 ECCV 2018","author":"Z Shou","year":"2018","unstructured":"Shou, Z., Gao, H., Zhang, L., Miyazawa, K., Chang, S.-F.: AutoLoc: weakly-supervised temporal action localization in untrimmed videos. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11220, pp. 162\u2013179. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01270-0_10"},{"key":"43_CR22","doi-asserted-by":"crossref","unstructured":"Singh, K.K., Lee, Y.J.: Hide-and-seek: forcing a network to be meticulous for weakly-supervised object and action localization. In: 2017 IEEE International Conference on Computer Vision (ICCV), pp. 3544\u20133553. IEEE (2017)","DOI":"10.1109\/ICCV.2017.381"},{"key":"43_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"558","DOI":"10.1007\/978-3-030-20890-5_36","volume-title":"Computer Vision \u2013 ACCV 2018","author":"H Su","year":"2019","unstructured":"Su, H., Zhao, X., Lin, T.: Cascaded pyramid mining network for weakly supervised temporal action localization. In: Jawahar, C.V., Li, H., Mori, G., Schindler, K. (eds.) ACCV 2018. LNCS, vol. 11362, pp. 558\u2013574. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-20890-5_36"},{"key":"43_CR24","doi-asserted-by":"crossref","unstructured":"Tang, P., Wang, X., Bai, X., Liu, W.: Multiple instance detection network with online instance classifier refinement. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2843\u20132851 (2017)","DOI":"10.1109\/CVPR.2017.326"},{"key":"43_CR25","doi-asserted-by":"crossref","unstructured":"Wan, F., Liu, C., Ke, W., Ji, X., Jiao, J., Ye, Q.: C-MIL: continuation multiple instance learning for weakly supervised object detection. In: CVPR, pp. 2199\u20132208 (2019)","DOI":"10.1109\/CVPR.2019.00230"},{"key":"43_CR26","doi-asserted-by":"crossref","unstructured":"Wang, L., Xiong, Y., Lin, D., Van Gool, L.: Untrimmednets for weakly supervised action recognition and detection. In: Proceedings of the IEEE conference on Computer Vision and Pattern Recognition, pp. 4325\u20134334 (2017)","DOI":"10.1109\/CVPR.2017.678"},{"key":"43_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/978-3-319-46484-8_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"L Wang","year":"2016","unstructured":"Wang, L., et al.: Temporal segment networks: towards good practices for deep action recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 20\u201336. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_2"},{"key":"43_CR28","doi-asserted-by":"crossref","unstructured":"Xu, H., Das, A., Saenko, K.: R-C3D: region convolutional 3D network for temporal activity detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5783\u20135792 (2017)","DOI":"10.1109\/ICCV.2017.617"},{"issue":"10","key":"43_CR29","doi-asserted-by":"publisher","first-page":"2319","DOI":"10.1109\/TPAMI.2019.2921539","volume":"41","author":"H Xu","year":"2019","unstructured":"Xu, H., Das, A., Saenko, K.: Two-stream region convolutional 3D network for temporal activity detection. IEEE Trans. Pattern Anal. Mach. Intell. 41(10), 2319\u20132332 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"43_CR30","doi-asserted-by":"crossref","unstructured":"Yu, T., Ren, Z., Li, Y., Yan, E., Xu, N., Yuan, J.: Temporal structure mining for weakly supervised action detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5522\u20135531 (2019)","DOI":"10.1109\/ICCV.2019.00562"},{"key":"43_CR31","unstructured":"Yuan, Y., Lyu, Y., Shen, X., Tsang, I.W., Yeung, D.Y.: Marginalized average attentional network for weakly-supervised learning. arXiv preprint arXiv:1905.08586 (2019)"},{"key":"43_CR32","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1007\/978-3-540-74936-3_22","volume-title":"Pattern Recognition","author":"C Zach","year":"2007","unstructured":"Zach, C., Pock, T., Bischof, H.: A duality based approach for realtime TV-$${L^{1}}$$ optical flow. In: Hamprecht, F.A., Schn\u00f6rr, C., J\u00e4hne, B. (eds.) DAGM 2007. LNCS, vol. 4713, pp. 214\u2013223. Springer, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-74936-3_22"},{"issue":"12","key":"43_CR33","doi-asserted-by":"publisher","first-page":"5797","DOI":"10.1109\/TIP.2019.2922108","volume":"28","author":"R Zeng","year":"2019","unstructured":"Zeng, R., Gan, C., Chen, P., Huang, W., Wu, Q., Tan, M.: Breaking winner-takes-all: iterative-winners-out networks for weakly supervised temporal action localization. IEEE Trans. Image Process. 28(12), 5797\u20135808 (2019)","journal-title":"IEEE Trans. Image Process."},{"key":"43_CR34","unstructured":"Zhang, C., Platt, J.C., Viola, P.A.: Multiple instance boosting for object detection. In: Weiss, Y., Sch\u00f6lkopf, B., Platt, J.C. (eds.) Advances in Neural Information Processing Systems 18, pp. 1417\u20131424. MIT Press (2006)"},{"key":"43_CR35","unstructured":"Zhang, Q., Goldman, S.A.: EM-DD: an improved multiple-instance learning technique. In: Dietterich, T.G., Becker, S., Ghahramani, Z. (eds.) Advances in Neural Information Processing Systems 14, pp. 1073\u20131080. MIT Press (2002)"},{"key":"43_CR36","unstructured":"Zhang, Q., Goldman, S.A.: EM-DD: an improved multiple-instance learning technique. In: Advances in Neural Information Processing Systems, pp. 1073\u20131080 (2002)"},{"key":"43_CR37","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Xiong, Y., Wang, L., Wu, Z., Tang, X., Lin, D.: Temporal action detection with structured segment networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2914\u20132923 (2017)","DOI":"10.1109\/ICCV.2017.317"},{"key":"43_CR38","doi-asserted-by":"crossref","unstructured":"Zhong, J.X., Li, N., Kong, W., Zhang, T., Li, T.H., Li, G.: Step-by-step erasion, one-by-one collection: a weakly supervised temporal action detector. arXiv preprint arXiv:1807.02929 (2018)","DOI":"10.1145\/3240508.3240511"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58526-6_43","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,6]],"date-time":"2024-10-06T00:23:27Z","timestamp":1728174207000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58526-6_43"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585259","9783030585266"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58526-6_43","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"7 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}