{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:52:19Z","timestamp":1778082739475,"version":"3.51.4"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031198328","type":"print"},{"value":"9783031198335","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19833-5_19","type":"book-chapter","created":{"date-parts":[[2022,11,4]],"date-time":"2022-11-04T00:40:30Z","timestamp":1667522430000},"page":"319-335","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["Flow Graph to\u00a0Video Grounding for\u00a0Weakly-Supervised Multi-step Localization"],"prefix":"10.1007","author":[{"given":"Nikita","family":"Dvornik","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Isma","family":"Hadji","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hai","family":"Pham","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dhaivat","family":"Bhatt","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Brais","family":"Martinez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Afsaneh","family":"Fazly","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Allan D.","family":"Jepson","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,11,4]]},"reference":[{"key":"19_CR1","doi-asserted-by":"crossref","unstructured":"Bi, J., Luo, J., Xu, C.: Procedure planning in instructional videos via contextual modeling and model-based policy learning. In: Proceedings of the International Conference on Computer Vision (ICCV) (2021)","DOI":"10.1109\/ICCV48922.2021.01532"},{"key":"19_CR2","unstructured":"Cai, X., Xu, T., Yi, J., Huang, J., Rajasekaran, S.: DTWNet: A dynamic time warping network. In: Advances in Neural Information Processing Systems (NeurIPS) (2019)"},{"key":"19_CR3","doi-asserted-by":"crossref","unstructured":"Cao, K., Ji, J., Cao, Z., Chang, C., Niebles, J.C.: Few-shot video classification via temporal alignment. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.01063"},{"key":"19_CR4","doi-asserted-by":"crossref","unstructured":"Chang, C., Huang, D., Sui, Y., Fei-Fei, L., Niebles, J.C.: D3TW: Discriminative differentiable dynamic time warping for weakly supervised action alignment and segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00366"},{"key":"19_CR5","doi-asserted-by":"crossref","unstructured":"Chang, C.Y., Huang, D.A., Xu, D., Adeli, E., Fei-Fei, L., Niebles, J.C.: Procedure planning in instructional videos. In: Proceedings of the European Conference on Computer Vision (ECCV) (2020)","DOI":"10.1007\/978-3-030-58621-8_20"},{"key":"19_CR6","doi-asserted-by":"crossref","unstructured":"Chang, X., Tung, F., Mori, G.: Learning discriminative prototypes with dynamic time warping. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2021)","DOI":"10.1109\/CVPR46437.2021.00829"},{"key":"19_CR7","unstructured":"Cuturi, M., Blondel, M.: Soft-DTW: A differentiable loss function for time-series. In: International Conference on Machine Learning (ICML) (2017)"},{"key":"19_CR8","unstructured":"Ding, L., Xu, C.: Weakly-supervised action segmentation with iterative soft boundary assignment. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)"},{"key":"19_CR9","doi-asserted-by":"crossref","unstructured":"Donatelli, L., Schmidt, T., Biswas, D., K\u00f6hn, A., Zhai, F., Koller, A.: Aligning actions across recipe graphs. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.554"},{"key":"19_CR10","unstructured":"Dvornik, N., Hadji, I., Derpanis, K.G., Garg, A., Jepson, A.: Drop-DTW: Aligning common signal between sequences while dropping outliers. In: Advances in Neural Information Processing Systems (NeurIPS) (2021)"},{"key":"19_CR11","doi-asserted-by":"crossref","unstructured":"Dwibedi, D., Aytar, Y., Tompson, J., Sermanet, P., Zisserman, A.: Temporal cycle-consistency learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00190"},{"key":"19_CR12","doi-asserted-by":"crossref","unstructured":"Caba Heilbron, F., Victor Escorcia, B.G., Niebles, J.C.: ActivityNet: A large-scale video benchmark for human activity understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2015)","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"19_CR13","doi-asserted-by":"crossref","unstructured":"Girdhar, R., Grauman, K.: Anticipative Video Transformer. In: Proceedings of the International Conference on Computer Vision (ICCV) (2021)","DOI":"10.1109\/ICCV48922.2021.01325"},{"key":"19_CR14","doi-asserted-by":"crossref","unstructured":"Hadji, I., Derpanis, K.G., Jepson, A.D.: Representation learning via global temporal alignment and cycle-consistency. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2021)","DOI":"10.1109\/CVPR46437.2021.01092"},{"key":"19_CR15","doi-asserted-by":"crossref","unstructured":"Huang, D., Fei-Fei, L., Niebles, J.C.: Connectionist temporal modeling for weakly supervised action labeling. In: Proceedings of the European Conference on Computer Vision (ECCV) (2016)","DOI":"10.1007\/978-3-319-46493-0_9"},{"key":"19_CR16","doi-asserted-by":"crossref","unstructured":"Jain, C., Zhang, H., Gao, Y., Aluru, S.: On the complexity of sequence to graph alignment. J. Comput. Biol. 27(4), 640\u2013654 (2020)","DOI":"10.1089\/cmb.2019.0066"},{"key":"19_CR17","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1089\/cmb.2017.0264","volume":"261","author":"VNS Kavya","year":"2019","unstructured":"Kavya, V.N.S., Tayal, K., Srinivasan, R., Sivadasan, N.: Sequence alignment on directed graphs. J. Comput. Biol.\u202f: J. Comput. Mol. Cell Biol. 261, 53\u201367 (2019)","journal-title":"J. Comput. Biol. : J. Comput. Mol. Cell Biol."},{"key":"19_CR18","doi-asserted-by":"crossref","unstructured":"Kiddon, C., Ponnuraj, G.T., Zettlemoyer, L., Choi, Y.: Mise en place: Unsupervised interpretation of instructional recipes. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP) (2015)","DOI":"10.18653\/v1\/D15-1114"},{"key":"19_CR19","doi-asserted-by":"crossref","unstructured":"Lee, C., Grasso, C., Sharlow, M.F.: Multiple sequence alignment using partial order graphs. Bioinformatics 18(3), 452\u2013464 (2002)","DOI":"10.1093\/bioinformatics\/18.3.452"},{"key":"19_CR20","unstructured":"Luo, H., et al.: UniVL: A unified video and language pre-training model for multimodal understanding and generation. arXiv preprint arXiv:2002.06353 (2020)"},{"key":"19_CR21","doi-asserted-by":"crossref","unstructured":"Ma, M., Fan, H., Kitani, K.M.: Going deeper into first-person activity recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.209"},{"key":"19_CR22","doi-asserted-by":"crossref","unstructured":"Miech, A., Alayrac, J.B., Smaira, L., Laptev, I., Sivic, J., Zisserman, A.: End-to-End Learning of Visual Representations from Uncurated Instructional Videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.00990"},{"key":"19_CR23","doi-asserted-by":"crossref","unstructured":"Miech, A., Zhukov, D., Alayrac, J.B., Tapaswi, M., Laptev, I., Sivic, J.: HowTo100M: Learning a Text-Video Embedding by Watching Hundred Million Narrated Video Clips. In: Proceedings of the International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00272"},{"key":"19_CR24","doi-asserted-by":"publisher","unstructured":"M\u00fcller, M.: Information Retrieval for Music and Motion. Springer-Verlag, Berlin, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-74048-3","DOI":"10.1007\/978-3-540-74048-3"},{"key":"19_CR25","doi-asserted-by":"crossref","unstructured":"Nakatsu, N., Kambayashi, Y., Yajima, S.: A longest common subsequence algorithm suitable for similar text strings. Acta Inf. 18(2), 17\u201319 (1982)","DOI":"10.1007\/BF00264437"},{"issue":"1","key":"19_CR26","doi-asserted-by":"publisher","first-page":"455","DOI":"10.1016\/S0304-3975(99)00333-3","volume":"237","author":"G Navarro","year":"2000","unstructured":"Navarro, G.: Improved approximate pattern matching on hypertext. Theoret. Comput. Sci. 237(1), 455\u2013463 (2000)","journal-title":"Theoret. Comput. Sci."},{"key":"19_CR27","doi-asserted-by":"crossref","unstructured":"Needleman, S.B., Wunsch, C.D.: A general method applicable to the search for similarities in the amino acid sequence of two proteins. J. Mol. Biol. 48(3), 443\u2013453 (1970)","DOI":"10.1016\/0022-2836(70)90057-4"},{"issue":"19","key":"19_CR28","doi-asserted-by":"publisher","first-page":"3599","DOI":"10.1093\/bioinformatics\/btz162","volume":"35","author":"M Rautiainen","year":"2019","unstructured":"Rautiainen, M., M\u00e4kinen, V., Marschall, T.: Bit-parallel sequence-to-graph alignment. Bioinformatics 35(19), 3599\u20133607 (2019)","journal-title":"Bioinformatics"},{"key":"19_CR29","doi-asserted-by":"crossref","unstructured":"Richard, A., Kuehne, H., Iqbal, A., Gall, J.: NeuralNetwork-Viterbi: A framework for weakly supervised video learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00771"},{"key":"19_CR30","doi-asserted-by":"crossref","unstructured":"Sakoe, H., Chiba, S.: Dynamic programming algorithm optimization for spoken processing recognition. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), vol.26, pp. 43\u201349 (1978)","DOI":"10.1109\/TASSP.1978.1163055"},{"key":"19_CR31","doi-asserted-by":"crossref","unstructured":"Sakurai, Y., Faloutsos, C., Yamamuro, M.: Stream monitoring under the time warping distance. In: International Conference on Data Engineering (ICDE) (2007)","DOI":"10.1109\/ICDE.2007.368963"},{"key":"19_CR32","doi-asserted-by":"crossref","unstructured":"Schumacher, P., Minor, M., Walter, K., Bergmann, R.: Extraction of procedural knowledge from the web: A comparison of two workflow extraction approaches. In: Proceedings of the 21st International Conference on World Wide Web (2012)","DOI":"10.1145\/2187980.2188194"},{"key":"19_CR33","doi-asserted-by":"crossref","unstructured":"Senner, F., Yao, A.: Zero-shot anticipation for instructional activities (2019)","DOI":"10.1109\/ICCV.2019.00095"},{"key":"19_CR34","doi-asserted-by":"crossref","unstructured":"Sermanet, P., et al.: Time-contrastive networks: Self-supervised learning from video. In: IEEE International Conference on Robotics and Automation (ICRA) (2018)","DOI":"10.1109\/ICRA.2018.8462891"},{"key":"19_CR35","doi-asserted-by":"crossref","unstructured":"Smith, T.F., Waterman, M.S.: Identification of common molecular subsequences. J. Mol. Biol. 147(1), 195\u2013197 (1981)","DOI":"10.1016\/0022-2836(81)90087-5"},{"key":"19_CR36","doi-asserted-by":"crossref","unstructured":"Tang, Y., et al.: COIN: A large-scale dataset for comprehensive instructional video analysis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00130"},{"key":"19_CR37","unstructured":"Wikipedia: Topological sorting \u2013 Wikipedia, the free encyclopedia. https:\/\/en.wikipedia.org\/w\/index.php?title=Topological%20sorting &oldid=1062117596. Accessed 07 Mar 2022"},{"key":"19_CR38","unstructured":"Yamakata, Y., Mori, S., Carroll, J.: English recipe flow graph corpus. In: Proceedings of the 12th Language Resources and Evaluation Conference (2020)"},{"key":"19_CR39","doi-asserted-by":"crossref","unstructured":"Yang, A., Miech, A., Sivic, J., Laptev, I., Schmid, C.: Just Ask: Learning to Answer Questions from Millions of Narrated Videos. In: Proceedings of the International Conference on Computer Vision (ICCV), (2021)","DOI":"10.1109\/ICCV48922.2021.00171"},{"key":"19_CR40","doi-asserted-by":"crossref","unstructured":"Zhou, L., Xu, C., Corso, J.J.: Towards automatic learning of procedures from web instructional videos. In: AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.12342"},{"key":"19_CR41","doi-asserted-by":"crossref","unstructured":"Zhukov, D., Alayrac, J.B., Cinbis, R.G., Fouhey, D., Laptev, I., Sivic, J.: Cross-task weakly supervised learning from instructional videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00365"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19833-5_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T15:37:35Z","timestamp":1673278655000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19833-5_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031198328","9783031198335"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19833-5_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"4 November 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"From the workshops, 367 reviewed full papers have been selected for publication","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}