{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T16:24:01Z","timestamp":1778084641334,"version":"3.51.4"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030586201","type":"print"},{"value":"9783030586218","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58621-8_20","type":"book-chapter","created":{"date-parts":[[2020,11,26]],"date-time":"2020-11-26T19:03:23Z","timestamp":1606417403000},"page":"334-350","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":51,"title":["Procedure Planning in Instructional Videos"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6508-1161","authenticated-orcid":false,"given":"Chien-Yi","family":"Chang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6945-7768","authenticated-orcid":false,"given":"De-An","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8744-3861","authenticated-orcid":false,"given":"Danfei","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0579-7763","authenticated-orcid":false,"given":"Ehsan","family":"Adeli","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7481-0810","authenticated-orcid":false,"given":"Li","family":"Fei-Fei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8225-9793","authenticated-orcid":false,"given":"Juan Carlos","family":"Niebles","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,11,27]]},"reference":[{"key":"20_CR1","doi-asserted-by":"crossref","unstructured":"Abu Farha, Y., Richard, A., Gall, J.: When will you do what?-anticipating temporal occurrences of activities. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00560"},{"key":"20_CR2","doi-asserted-by":"crossref","unstructured":"Alayrac, J.B., Laptev, I., Sivic, J., Lacoste-Julien, S.: Joint discovery of object states and manipulation actions. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.234"},{"key":"20_CR3","first-page":"679","volume":"6","author":"R Bellman","year":"1957","unstructured":"Bellman, R.: A Markovian decision process. J. Math. Mech. 6, 679\u2013684 (1957)","journal-title":"J. Math. Mech."},{"key":"20_CR4","doi-asserted-by":"crossref","unstructured":"Chang, C.Y., Huang, D.A., Sui, Y., Fei-Fei, L., Niebles, J.C.: D3TW: discriminative differentiable dynamic time warping for weakly supervised action alignment and segmentation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00366"},{"key":"20_CR5","doi-asserted-by":"crossref","unstructured":"Ehsani, K., Bagherinezhad, H., Redmon, J., Mottaghi, R., Farhadi, A.: Who let the dogs out? modeling dog behavior from visual data. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00426"},{"key":"20_CR6","unstructured":"Farha, Y.A., Gall, J.: Uncertainty-aware anticipation of activities. arXiv preprint arXiv:1908.09540 (2019)"},{"key":"20_CR7","doi-asserted-by":"crossref","unstructured":"Finn, C., Levine, S.: Deep visual foresight for planning robot motion. In: ICRA (2017)","DOI":"10.1109\/ICRA.2017.7989324"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Finn, C., Tan, X.Y., Duan, Y., Darrell, T., Levine, S., Abbeel, P.: Deep spatial autoencoders for visuomotor learning. In: ICRA (2016)","DOI":"10.1109\/ICRA.2016.7487173"},{"key":"20_CR9","doi-asserted-by":"crossref","unstructured":"Furnari, A., Farinella, G.M.: What would you expect? anticipating egocentric actions with rolling-unrolling LSTMs and modality attention. In: ICCV, pp. 6252\u20136261 (2019)","DOI":"10.1109\/ICCV.2019.00635"},{"key":"20_CR10","volume-title":"Automated Planning: Theory and Practice","author":"M Ghallab","year":"2004","unstructured":"Ghallab, M., Nau, D., Traverso, P.: Automated Planning: Theory and Practice. Elsevier, Amsterdam (2004)"},{"key":"20_CR11","unstructured":"Hafner, D., et al.: Learning latent dynamics for planning from pixels. In: ICML (2019)"},{"key":"20_CR12","doi-asserted-by":"crossref","unstructured":"Hayes, B., Scassellati, B.: Autonomously constructing hierarchical task networks for planning and human-robot collaboration. In: ICRA (2016)","DOI":"10.1109\/ICRA.2016.7487760"},{"key":"20_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1007\/978-3-319-46493-0_9","volume-title":"Computer Vision \u2013 ECCV 2016","author":"D-A Huang","year":"2016","unstructured":"Huang, D.-A., Fei-Fei, L., Niebles, J.C.: Connectionist temporal modeling for weakly supervised action labeling. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 137\u2013153. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_9"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"Ke, Q., Fritz, M., Schiele, B.: Time-conditioned action anticipation in one shot. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01016"},{"key":"20_CR15","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1613\/jair.5575","volume":"61","author":"G Konidaris","year":"2018","unstructured":"Konidaris, G., Kaelbling, L.P., Lozano-Perez, T.: From skills to symbols: learning symbolic representations for abstract high-level planning. J. Artif. Intell. Res. 61, 215\u2013289 (2018)","journal-title":"J. Artif. Intell. Res."},{"key":"20_CR16","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Arslan, A., Serre, T.: The language of actions: recovering the syntax and semantics of goal-directed human activities. In: CVPR (2014)","DOI":"10.1109\/CVPR.2014.105"},{"key":"20_CR17","unstructured":"Kurutach, T., Tamar, A., Yang, G., Russell, S.J., Abbeel, P.: Learning plannable representations with causal infogan. In: NeurIPS (2018)"},{"key":"20_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"689","DOI":"10.1007\/978-3-319-10578-9_45","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T Lan","year":"2014","unstructured":"Lan, T., Chen, T.-C., Savarese, S.: A hierarchical representation for future action prediction. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8691, pp. 689\u2013704. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10578-9_45"},{"key":"20_CR19","unstructured":"Lee, A.X., Zhang, R., Ebert, F., Abbeel, P., Finn, C., Levine, S.: Stochastic adversarial video prediction. arXiv preprint arXiv:1804.01523 (2018)"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"Lu, C., Hirsch, M., Scholkopf, B.: Flexible spatio-temporal networks for video prediction. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.230"},{"key":"20_CR21","unstructured":"McDermott, D., et al.: PDDL-the planning domain definition language (1998)"},{"key":"20_CR22","doi-asserted-by":"crossref","unstructured":"Mehrasa, N., Jyothi, A.A., Durand, T., He, J., Sigal, L., Mori, G.: A variational auto-encoder model for stochastic point processes. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00328"},{"key":"20_CR23","doi-asserted-by":"crossref","unstructured":"Miech, A., Zhukov, D., Alayrac, J.B., Tapaswi, M., Laptev, I., Sivic, J.: Howto100m: Learning a text-video embedding by watching hundred million narrated video clips. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00272"},{"key":"20_CR24","unstructured":"Oh, J., Guo, X., Lee, H., Lewis, R.L., Singh, S.: Action-conditional video prediction using deep networks in atari games. In: NeurIPS (2015)"},{"key":"20_CR25","doi-asserted-by":"crossref","unstructured":"Pathak, D., Agrawal, P., Efros, A.A., Darrell, T.: Curiosity-driven exploration by self-supervised prediction. In: CVPR Workshops (2017)","DOI":"10.1109\/CVPRW.2017.70"},{"key":"20_CR26","unstructured":"Ranzato, M., Szlam, A., Bruna, J., Mathieu, M., Collobert, R., Chopra, S.: Video (language) modeling: a baseline for generative models of natural videos. arXiv preprint arXiv:1412.6604 (2014)"},{"key":"20_CR27","doi-asserted-by":"crossref","unstructured":"Rhinehart, N., Kitani, K.M.: First-person activity forecasting with online inverse reinforcement learning. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.399"},{"key":"20_CR28","doi-asserted-by":"crossref","unstructured":"Richard, A., Kuehne, H., Iqbal, A., Gall, J.: Neuralnetwork-viterbi: a framework for weakly supervised video learning. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00771"},{"key":"20_CR29","doi-asserted-by":"crossref","unstructured":"Sener, F., Yao, A.: Zero-shot anticipation for instructional activities. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00095"},{"key":"20_CR30","doi-asserted-by":"crossref","unstructured":"Sermanet, P., et al.: Time-contrastive networks: self-supervised learning from video. In: ICRA (2018)","DOI":"10.1109\/CVPRW.2017.69"},{"key":"20_CR31","unstructured":"Srinivas, A., Jabri, A., Abbeel, P., Levine, S., Finn, C.: Universal planning networks. In: ICML (2018)"},{"key":"20_CR32","doi-asserted-by":"crossref","unstructured":"Sun, C., Myers, A., Vondrick, C., Murphy, K., Schmid, C.: Videobert: a joint model for video and language representation learning. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00756"},{"key":"20_CR33","doi-asserted-by":"crossref","unstructured":"Tang, Y., et al.: Coin: a large-scale dataset for comprehensive instructional video analysis. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00130"},{"key":"20_CR34","doi-asserted-by":"crossref","unstructured":"Vondrick, C., Pirsiavash, H., Torralba, A.: Anticipating visual representations from unlabeled video. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.18"},{"key":"20_CR35","doi-asserted-by":"crossref","unstructured":"Wang, X., Hu, J.F., Lai, J.H., Zhang, J., Zheng, W.S.: Progressive teacher-student learning for early action prediction. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00367"},{"key":"20_CR36","doi-asserted-by":"crossref","unstructured":"Zeng, K.H., Shen, W.B., Huang, D.A., Sun, M., Carlos Niebles, J.: Visual forecasting by imitating dynamics in natural sequences. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.326"},{"key":"20_CR37","doi-asserted-by":"crossref","unstructured":"Zhou, L., Xu, C., Corso, J.J.: Towards automatic learning of procedures from web instructional videos. In: AAAI (2018)","DOI":"10.1609\/aaai.v32i1.12342"},{"key":"20_CR38","doi-asserted-by":"crossref","unstructured":"Zhukov, D., Alayrac, J.B., Cinbis, R.G., Fouhey, D., Laptev, I., Sivic, J.: Cross-task weakly supervised learning from instructional videos. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00365"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58621-8_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T00:07:13Z","timestamp":1732579633000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58621-8_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030586201","9783030586218"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58621-8_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"27 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}