{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T15:07:14Z","timestamp":1742915234687,"version":"3.40.3"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030585259"},{"type":"electronic","value":"9783030585266"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58526-6_28","type":"book-chapter","created":{"date-parts":[[2020,10,6]],"date-time":"2020-10-06T21:03:07Z","timestamp":1602018187000},"page":"470-487","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["Learning Actionness via Long-Range Temporal Order Verification"],"prefix":"10.1007","author":[{"given":"Dimitri","family":"Zhukov","sequence":"first","affiliation":[]},{"given":"Jean-Baptiste","family":"Alayrac","sequence":"additional","affiliation":[]},{"given":"Ivan","family":"Laptev","sequence":"additional","affiliation":[]},{"given":"Josef","family":"Sivic","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,10,7]]},"reference":[{"key":"28_CR1","doi-asserted-by":"crossref","unstructured":"Agrawal, P., Carreira, J., Malik, J.: Learning to see by moving. In: The IEEE International Conference on Computer Vision (ICCV), December 2015","DOI":"10.1109\/ICCV.2015.13"},{"key":"28_CR2","doi-asserted-by":"crossref","unstructured":"Alayrac, J.B., Bojanowski, P., Agrawal, N., Laptev, I., Sivic, J., Lacoste Julien, S.: Unsupervised learning from narrated instruction videos. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.495"},{"key":"28_CR3","doi-asserted-by":"crossref","unstructured":"Alayrac, J.B., Sivic, J., Laptev, I., Lacoste-Julien, S.: Joint discovery of object states and manipulation actions. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.234"},{"key":"28_CR4","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo Vadis, action recognition? A new model and the kinetics dataset. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"28_CR5","doi-asserted-by":"crossref","unstructured":"Chen, W., Xiong, C., Xu, R., Corso, J.J.: Actionness ranking with lattice conditional ordinal random fields. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2014","DOI":"10.1109\/CVPR.2014.101"},{"key":"28_CR6","doi-asserted-by":"crossref","unstructured":"Doersch, C., Gupta, A., Efros, A.A.: Unsupervised visual representation learning by context prediction. In: The IEEE International Conference on Computer Vision (ICCV), December 2015","DOI":"10.1109\/ICCV.2015.167"},{"key":"28_CR7","doi-asserted-by":"crossref","unstructured":"Dwibedi, D., Aytar, Y., Tompson, J., Sermanet, P., Zisserman, A.: Temporal cycle-consistency learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2019","DOI":"10.1109\/CVPR.2019.00190"},{"key":"28_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"768","DOI":"10.1007\/978-3-319-46487-9_47","volume-title":"Computer Vision \u2013 ECCV 2016","author":"V Escorcia","year":"2016","unstructured":"Escorcia, V., Caba Heilbron, F., Niebles, J.C., Ghanem, B.: DAPs: deep action proposals for action understanding. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9907, pp. 768\u2013784. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46487-9_47"},{"key":"28_CR9","doi-asserted-by":"crossref","unstructured":"Fernando, B., Bilen, H., Gavves, E., Gould, S.: Self-supervised video representation learning with odd-one-out networks. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), July 2017","DOI":"10.1109\/CVPR.2017.607"},{"key":"28_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1007\/978-3-030-01216-8_5","volume-title":"Computer Vision \u2013 ECCV 2018","author":"J Gao","year":"2018","unstructured":"Gao, J., Chen, K., Nevatia, R.: CTAP: complementary temporal action proposal generation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11206, pp. 70\u201385. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01216-8_5"},{"key":"28_CR11","unstructured":"Gidaris, S., Singh, P., Komodakis, N.: Unsupervised representation learning by predicting image rotations. In: ICLR, April 2018"},{"key":"28_CR12","doi-asserted-by":"crossref","unstructured":"Huang, D.A., Lim, J.J., Fei-Fei, L., Niebles, J.C.: Unsupervised visual-linguistic reference resolution in instructional videos. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.116"},{"key":"28_CR13","doi-asserted-by":"crossref","unstructured":"Huang, D.A., et al.: Finding \u201cit\u201d: weakly-supervised reference-aware visual grounding in instructional video. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00623"},{"key":"28_CR14","doi-asserted-by":"crossref","unstructured":"Jenni, S., Favaro, P.: Self-supervised feature learning by learning to spot artifacts. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2018","DOI":"10.1109\/CVPR.2018.00289"},{"key":"28_CR15","doi-asserted-by":"crossref","unstructured":"Ji, J., Cao, K., Niebles, J.C.: Learning temporal action proposals with fewer labels. In: The IEEE International Conference on Computer Vision (ICCV), October 2019","DOI":"10.1109\/ICCV.2019.00717"},{"key":"28_CR16","unstructured":"Kingma, D., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"28_CR17","doi-asserted-by":"crossref","unstructured":"Larsson, G., Maire, M., Shakhnarovich, G.: Colorization as a proxy task for visual understanding. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), July 2017","DOI":"10.1109\/CVPR.2017.96"},{"key":"28_CR18","doi-asserted-by":"crossref","unstructured":"Lee, H.Y., Huang, J.B., Singh, M., Yang, M.H.: Unsupervised representation learning by sorting sequences. In: The IEEE International Conference on Computer Vision (ICCV), October 2017","DOI":"10.1109\/ICCV.2017.79"},{"key":"28_CR19","doi-asserted-by":"crossref","unstructured":"Lin, T., Liu, X., Li, X., Ding, E., Wen, S.: BMN: boundary-matching network for temporal action proposal generation. In: The IEEE International Conference on Computer Vision (ICCV), October 2019","DOI":"10.1109\/ICCV.2019.00399"},{"key":"28_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-01225-0_1","volume-title":"Computer Vision \u2013 ECCV 2018","author":"T Lin","year":"2018","unstructured":"Lin, T., Zhao, X., Su, H., Wang, C., Yang, M.: BSN: boundary sensitive network for temporal action proposal generation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11208, pp. 3\u201321. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01225-0_1"},{"key":"28_CR21","doi-asserted-by":"crossref","unstructured":"Liu, Y., Ma, L., Zhang, Y., Liu, W., Chang, S.F.: Multi-granularity generator for temporal action proposal. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2019","DOI":"10.1109\/CVPR.2019.00372"},{"key":"28_CR22","doi-asserted-by":"crossref","unstructured":"Miech, A., Alayrac, J.B., Smaira, L., Laptev, I., Sivic, J., Zisserman, A.: End-to-end learning of visual representations from uncurated instructional videos (2020)","DOI":"10.1109\/CVPR42600.2020.00990"},{"key":"28_CR23","doi-asserted-by":"crossref","unstructured":"Miech, A., Zhukov, D., Alayrac, J.B., Tapaswi, M., Laptev, I., Sivic, J.: HowTo100M: learning a text-video embedding by watching hundred million narrated video clips. In: The IEEE International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00272"},{"key":"28_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"527","DOI":"10.1007\/978-3-319-46448-0_32","volume-title":"Computer Vision \u2013 ECCV 2016","author":"I Misra","year":"2016","unstructured":"Misra, I., Zitnick, C.L., Hebert, M.: Shuffle and learn: unsupervised learning using temporal order verification. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 527\u2013544. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_32"},{"key":"28_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1007\/978-3-319-46466-4_5","volume-title":"Computer Vision \u2013 ECCV 2016","author":"M Noroozi","year":"2016","unstructured":"Noroozi, M., Favaro, P.: Unsupervised learning of visual representations by solving jigsaw puzzles. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9910, pp. 69\u201384. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46466-4_5"},{"key":"28_CR26","doi-asserted-by":"crossref","unstructured":"Pathak, D., Girshick, R., Dollar, P., Darrell, T., Hariharan, B.: Learning features by watching objects move. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), July 2017","DOI":"10.1109\/CVPR.2017.638"},{"key":"28_CR27","doi-asserted-by":"crossref","unstructured":"Pathak, D., Krahenbuhl, P., Donahue, J., Darrell, T., Efros, A.A.: Context encoders: feature learning by inpainting. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2016","DOI":"10.1109\/CVPR.2016.278"},{"key":"28_CR28","doi-asserted-by":"crossref","unstructured":"Sener, F., Yao, A.: Unsupervised learning and segmentation of complex activities from video. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00873"},{"key":"28_CR29","doi-asserted-by":"crossref","unstructured":"Shan, D., Geng, J., Shu, M., Fouhey, D.F.: Understanding human hands in contact at internet scale. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2020","DOI":"10.1109\/CVPR42600.2020.00989"},{"key":"28_CR30","doi-asserted-by":"crossref","unstructured":"Sun, C., Myers, A., Vondrick, C., Murphy, K., Schmid, C.: VideoBERT: a joint model for video and language representation learning. In: The IEEE International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00756"},{"key":"28_CR31","doi-asserted-by":"crossref","unstructured":"Tang, Y., et al.: COIN: a large-scale dataset for comprehensive instructional video analysis. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00130"},{"key":"28_CR32","doi-asserted-by":"crossref","unstructured":"Wang, L., Qiao, Y., Tang, X., Van Gool, L.: Actionness estimation using hybrid fully convolutional networks. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2016","DOI":"10.1109\/CVPR.2016.296"},{"key":"28_CR33","doi-asserted-by":"crossref","unstructured":"Wang, X., Gupta, A.: Unsupervised learning of visual representations using videos. In: The IEEE International Conference on Computer Vision (ICCV), December 2015","DOI":"10.1109\/ICCV.2015.320"},{"key":"28_CR34","doi-asserted-by":"crossref","unstructured":"Wei, D., Lim, J.J., Zisserman, A., Freeman, W.T.: Learning and using the arrow of time. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2018","DOI":"10.1109\/CVPR.2018.00840"},{"key":"28_CR35","doi-asserted-by":"crossref","unstructured":"Xu, D., Xiao, J., Zhao, Z., Shao, J., Xie, D., Zhuang, Y.: Self-supervised spatiotemporal learning via video clip order prediction. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2019","DOI":"10.1109\/CVPR.2019.01058"},{"key":"28_CR36","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"649","DOI":"10.1007\/978-3-319-46487-9_40","volume-title":"Computer Vision \u2013 ECCV 2016","author":"R Zhang","year":"2016","unstructured":"Zhang, R., Isola, P., Efros, A.A.: Colorful image colorization. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9907, pp. 649\u2013666. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46487-9_40"},{"key":"28_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A.: Split-brain autoencoders: unsupervised learning by cross-channel prediction. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), July 2017","DOI":"10.1109\/CVPR.2017.76"},{"key":"28_CR38","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Xiong, Y., Wang, L., Wu, Z., Tang, X., Lin, D.: Temporal action detection with structured segment networks. In: The IEEE International Conference on Computer Vision (ICCV), October 2017","DOI":"10.1109\/ICCV.2017.317"},{"key":"28_CR39","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"831","DOI":"10.1007\/978-3-030-01246-5_49","volume-title":"Computer Vision \u2013 ECCV 2018","author":"B Zhou","year":"2018","unstructured":"Zhou, B., Andonian, A., Oliva, A., Torralba, A.: Temporal relational reasoning in videos. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11205, pp. 831\u2013846. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01246-5_49"},{"key":"28_CR40","doi-asserted-by":"crossref","unstructured":"Zhou, L., Chenliang, X., Corso, J.J.: Towards automatic learning of procedures from web instructional videos. In: AAAI (2018)","DOI":"10.1609\/aaai.v32i1.12342"},{"key":"28_CR41","doi-asserted-by":"crossref","unstructured":"Zhukov, D., Alayrac, J.B., Cinbis, R.G., Fouhey, D., Laptev, I., Sivic, J.: Cross-task weakly supervised learning from instructional videos. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2019","DOI":"10.1109\/CVPR.2019.00365"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58526-6_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,6]],"date-time":"2024-10-06T00:19:13Z","timestamp":1728173953000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58526-6_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585259","9783030585266"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58526-6_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"7 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}