{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:22:19Z","timestamp":1772907739153,"version":"3.50.1"},"publisher-location":"Cham","reference-count":65,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030585730","type":"print"},{"value":"9783030585747","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58574-7_46","type":"book-chapter","created":{"date-parts":[[2020,11,12]],"date-time":"2020-11-12T16:19:07Z","timestamp":1605197947000},"page":"767-786","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":24,"title":["LEMMA: A Multi-view Dataset for L Earning Multi-agent Multi-task Activities"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4968-3290","authenticated-orcid":false,"given":"Baoxiong","family":"Jia","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8176-0241","authenticated-orcid":false,"given":"Yixin","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1524-7148","authenticated-orcid":false,"given":"Siyuan","family":"Huang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7024-1545","authenticated-orcid":false,"given":"Yixin","family":"Zhu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1925-5973","authenticated-orcid":false,"given":"Song-Chun","family":"Zhu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,11,13]]},"reference":[{"key":"46_CR1","doi-asserted-by":"crossref","unstructured":"Alayrac, J.B., Bojanowski, P., Agrawal, N., Sivic, J., Laptev, I., Lacoste-Julien, S.: Unsupervised learning from narrated instruction videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.495"},{"key":"46_CR2","doi-asserted-by":"crossref","unstructured":"Antol, S., et al.: VQA: visual question answering. In: Proceedings of International Conference on Computer Vision (ICCV) (2015)","DOI":"10.1109\/ICCV.2015.279"},{"key":"46_CR3","unstructured":"Baker, B., et al.: Emergent tool use from multi-agent autocurricula. In: International Conference on Learning Representations (ICLR) (2020)"},{"issue":"4","key":"46_CR4","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1016\/S1364-6613(00)01615-6","volume":"5","author":"DA Baldwin","year":"2001","unstructured":"Baldwin, D.A., Baird, J.A.: Discerning intentions in dynamic human action. Trends in Cognitive Sciences 5(4), 171\u2013178 (2001)","journal-title":"Trends in Cognitive Sciences"},{"issue":"3","key":"46_CR5","doi-asserted-by":"publisher","first-page":"708","DOI":"10.1111\/1467-8624.00310","volume":"72","author":"DA Baldwin","year":"2001","unstructured":"Baldwin, D.A., Baird, J.A., Saylor, M.M., Clark, M.A.: Infants parse dynamic action. Child Dev. 72(3), 708\u2013717 (2001)","journal-title":"Child Dev."},{"key":"46_CR6","unstructured":"Berner, C., et al.: Dota 2 with large scale deep reinforcement learning. arXiv preprint arXiv:1912.06680 (2019)"},{"key":"46_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"628","DOI":"10.1007\/978-3-319-10602-1_41","volume-title":"Computer Vision \u2013 ECCV 2014","author":"P Bojanowski","year":"2014","unstructured":"Bojanowski, P., et al.: Weakly supervised action labeling in videos under ordering constraints. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 628\u2013643. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_41"},{"key":"46_CR8","doi-asserted-by":"crossref","unstructured":"Caba Heilbron, F., Escorcia, V., Ghanem, B., Carlos Niebles, J.: Activitynet: a large-scale video benchmark for human activity understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2015)","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"46_CR9","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"46_CR10","doi-asserted-by":"crossref","unstructured":"Chen, Y., Huang, S., Yuan, T., Qi, S., Zhu, Y., Zhu, S.C.: Holistic++ scene understanding: single-view 3d holistic scene parsing and human pose estimation with human-object interaction and physical commonsense. In: Proceedings of International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00874"},{"key":"46_CR11","unstructured":"Choi, W., Shahid, K., Savarese, S.: What are they doing?: Collective activity classification using spatio-temporal relationship among people. In: International Conference on Computer Vision Workshops (ICCV Workshops) (2009)"},{"key":"46_CR12","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.actpsy.2006.09.007","volume":"124","author":"G Csibra","year":"2007","unstructured":"Csibra, G., Gergely, G.: \u2018Obsessed with goals\u2019: functions and mechanisms of teleological interpretation of actions in humans. Acta Psychol. 124, 60\u201378 (2007)","journal-title":"Acta Psychol."},{"key":"46_CR13","doi-asserted-by":"crossref","unstructured":"Damen, D., et al.: Scaling egocentric vision: the epic-kitchens dataset. In: Proceedings of European Conference on Computer Vision (ECCV) (2018)","DOI":"10.1007\/978-3-030-01225-0_44"},{"key":"46_CR14","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: Slowfast networks for video recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"key":"46_CR15","doi-asserted-by":"crossref","unstructured":"Fouhey, D.F., Kuo, W.C., Efros, A.A., Malik, J.: From lifestyle vlogs to everyday interactions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00524"},{"issue":"6873","key":"46_CR16","doi-asserted-by":"publisher","first-page":"755","DOI":"10.1038\/415755a","volume":"415","author":"G Gergely","year":"2002","unstructured":"Gergely, G., Bekkering, H., Kir\u00e1ly, I.: Rational imitation in preverbal infants. Nature 415(6873), 755\u2013755 (2002)","journal-title":"Nature"},{"key":"46_CR17","unstructured":"Girdhar, R., Ramanan, D.: Cater: a diagnostic dataset for compositional actions and temporal reasoning (2020)"},{"issue":"11","key":"46_CR18","doi-asserted-by":"publisher","first-page":"818","DOI":"10.1016\/j.tics.2016.08.005","volume":"20","author":"ND Goodman","year":"2016","unstructured":"Goodman, N.D., Frank, M.C.: Pragmatic language interpretation as probabilistic inference. Trends Cogn. Sci. 20(11), 818\u2013829 (2016)","journal-title":"Trends Cogn. Sci."},{"key":"46_CR19","unstructured":"Goyal, P., et al.: Accurate, large minibatch SGD: Training imagenet in 1 hour. arXiv preprint arXiv:1706.02677 (2017)"},{"key":"46_CR20","doi-asserted-by":"crossref","unstructured":"Goyal, R., et al.: The \u201csomething something\" video database for learning and evaluating visual common sense. In: Proceedings of International Conference on Computer Vision (ICCV) (2017)","DOI":"10.1109\/ICCV.2017.622"},{"key":"46_CR21","doi-asserted-by":"crossref","unstructured":"Grice, H.P.: Logic and conversation. In: Cole, P., Morgan, J.L. (eds.) Speech acts, pp. 41\u201358. Brill, Netherlands (1975)","DOI":"10.1163\/9789004368811_003"},{"key":"46_CR22","doi-asserted-by":"crossref","unstructured":"Gu, C., et al.: Ava: a video dataset of spatio-temporally localized atomic visual actions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00633"},{"key":"46_CR23","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: Proceedings of International Conference on Computer Vision (ICCV) (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"46_CR24","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"46_CR25","doi-asserted-by":"crossref","unstructured":"Huang, S., Qi, S., Zhu, Y., Xiao, Y., Xu, Y., Zhu, S.C.: Holistic 3D scene parsing and reconstruction from a single RGB image. In: Proceedings of European Conference on Computer Vision (ECCV) (2018)","DOI":"10.1007\/978-3-030-01234-2_12"},{"key":"46_CR26","doi-asserted-by":"crossref","unstructured":"Ibrahim, M.S., Muralidharan, S., Deng, Z., Vahdat, A., Mori, G.: A hierarchical deep temporal model for group activity recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.217"},{"key":"46_CR27","doi-asserted-by":"crossref","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3. 6m: large scale datasets and predictive methods for 3D human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI) 36(7), 1325\u20131339 (2013)","DOI":"10.1109\/TPAMI.2013.248"},{"issue":"2","key":"46_CR28","doi-asserted-by":"publisher","first-page":"201","DOI":"10.3758\/BF03212378","volume":"14","author":"G Johansson","year":"1973","unstructured":"Johansson, G.: Visual perception of biological motion and a model for its analysis. Percept. Psychophysics 14(2), 201\u2013211 (1973). https:\/\/doi.org\/10.3758\/BF03212378","journal-title":"Percept. Psychophysics"},{"key":"46_CR29","doi-asserted-by":"crossref","unstructured":"Karpathy, A., Toderici, G., Shetty, S., Leung, T., Sukthankar, R., Fei-Fei, L.: Large-scale video classification with convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2014)","DOI":"10.1109\/CVPR.2014.223"},{"key":"46_CR30","unstructured":"Kleiman-Weiner, M., Ho, M.K., Austerweil, J.L., Littman, M.L., Tenenbaum, J.B.: Coordinate to cooperate or compete: abstract goals and joint intentions in social interaction. In: Proceedings of the Annual Meeting of the Cognitive Science Society (CogSci) (2016)"},{"issue":"8","key":"46_CR31","doi-asserted-by":"publisher","first-page":"951","DOI":"10.1177\/0278364913478446","volume":"32","author":"HS Koppula","year":"2013","unstructured":"Koppula, H.S., Gupta, R., Saxena, A.: Learning human activities and object affordances from RGB-D videos. Int. J. Robot. Res. (IJRR) 32(8), 951\u2013970 (2013)","journal-title":"Int. J. Robot. Res. (IJRR)"},{"key":"46_CR32","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Arslan, A., Serre, T.: The language of actions: recovering the syntax and semantics of goal-directed human activities. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2014)","DOI":"10.1109\/CVPR.2014.105"},{"issue":"11","key":"46_CR33","doi-asserted-by":"publisher","first-page":"1311","DOI":"10.1068\/p2935","volume":"28","author":"M Land","year":"1999","unstructured":"Land, M., Mennie, N., Rusted, J.: The roles of vision and eye movements in the control of activities of daily living. Perception 28(11), 1311\u20131328 (1999)","journal-title":"Perception"},{"key":"46_CR34","doi-asserted-by":"crossref","unstructured":"Lerner, A., Chrysanthou, Y., Lischinski, D.: Crowds by example. In: Proceedings of Computer Graphics Forum (2007)","DOI":"10.1111\/j.1467-8659.2007.01089.x"},{"key":"46_CR35","doi-asserted-by":"crossref","unstructured":"Li, W., Zhang, Z., Liu, Z.: Action recognition based on a bag of 3d points. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2010)","DOI":"10.1109\/CVPRW.2010.5543273"},{"key":"46_CR36","doi-asserted-by":"crossref","unstructured":"Li, Y., Liu, M., Rehg, J.M.: In the eye of beholder: joint learning of gaze and actions in first person video. In: Proceedings of European Conference on Computer Vision (ECCV) (2018)","DOI":"10.1007\/978-3-030-01228-1_38"},{"key":"46_CR37","doi-asserted-by":"publisher","first-page":"502","DOI":"10.1109\/TPAMI.2019.2901464","volume":"42","author":"M Monfort","year":"2019","unstructured":"Monfort, M., et al.: Moments in time dataset: one million videos for event understanding. IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI) 42, 502\u2013508 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI)"},{"issue":"3","key":"46_CR38","doi-asserted-by":"publisher","first-page":"134","DOI":"10.1016\/S1364-6613(03)00028-7","volume":"7","author":"S Monsell","year":"2003","unstructured":"Monsell, S.: Task switching. Trends Cogn. Sci. 7(3), 134\u2013140 (2003)","journal-title":"Trends Cogn. Sci."},{"key":"46_CR39","unstructured":"Oh, S., et al.: A large-scale benchmark dataset for event recognition in surveillance video. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2011)"},{"key":"46_CR40","doi-asserted-by":"crossref","unstructured":"Pellegrini, S., Ess, A., Schindler, K., Van Gool, L.: You\u2019ll never walk alone: modeling social behavior for multi-target tracking. In: Proceedings of International Conference on Computer Vision (ICCV) (2009)","DOI":"10.1109\/ICCV.2009.5459260"},{"key":"46_CR41","doi-asserted-by":"crossref","unstructured":"Pirsiavash, H., Ramanan, D.: Detecting activities of daily living in first-person camera views. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2012)","DOI":"10.1109\/CVPR.2012.6248010"},{"key":"46_CR42","unstructured":"Qi, S., Jia, B., Huang, S., Wei, P., Zhu, S.C.: A generalized earley parser for human activity parsing and prediction. IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 1 (2020)"},{"key":"46_CR43","doi-asserted-by":"crossref","unstructured":"Ramanathan, V., Huang, J., Abu-El-Haija, S., Gorban, A., Murphy, K., Fei-Fei, L.: Detecting events and key actors in multi-person videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.332"},{"key":"46_CR44","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Proceedings of Advances in Neural Information Processing Systems (NeurIPS) (2015)"},{"key":"46_CR45","doi-asserted-by":"crossref","unstructured":"Rohrbach, M., Amin, S., Andriluka, M., Schiele, B.: A database for fine grained activity detection of cooking activities. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2012)","DOI":"10.1109\/CVPR.2012.6247801"},{"issue":"3","key":"46_CR46","doi-asserted-by":"publisher","first-page":"346","DOI":"10.1007\/s11263-015-0851-8","volume":"119","author":"M Rohrbach","year":"2016","unstructured":"Rohrbach, M., et al.: Recognizing fine-grained and composite activities using hand-centric features and script data. Int. J. Comput. Vis. (IJCV) 119(3), 346\u2013373 (2016). https:\/\/doi.org\/10.1007\/s11263-015-0851-8","journal-title":"Int. J. Comput. Vis. (IJCV)"},{"issue":"4","key":"46_CR47","doi-asserted-by":"publisher","first-page":"763","DOI":"10.1037\/0096-1523.27.4.763","volume":"27","author":"JS Rubinstein","year":"2001","unstructured":"Rubinstein, J.S., Meyer, D.E., Evans, J.E.: Executive control of cognitive processes in task switching. J. Exp. Psychol. Hum. Percept. Perform. 27(4), 763 (2001)","journal-title":"J. Exp. Psychol. Hum. Percept. Perform."},{"key":"46_CR48","doi-asserted-by":"crossref","unstructured":"Ryoo, M.S.: Human activity prediction: early recognition of ongoing activities from streaming videos. In: Proceedings of International Conference on Computer Vision (ICCV) (2011)","DOI":"10.1109\/ICCV.2011.6126349"},{"issue":"4","key":"46_CR49","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2897824.2925867","volume":"35","author":"M Savva","year":"2016","unstructured":"Savva, M., Chang, A.X., Hanrahan, P., Fisher, M., Nie\u00dfner, M.: Pigraphs: learning interaction snapshots from observations. ACM Trans. Graph. (TOG) 35(4), 1\u201312 (2016)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"46_CR50","unstructured":"Shu, T., Xie, D., Rothrock, B., Todorovic, S., Chun Zhu, S.: Joint inference of groups, events and human roles in aerial videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2015)"},{"key":"46_CR51","unstructured":"Sigurdsson, G.A., Gupta, A., Schmid, C., Farhadi, A., Alahari, K.: Charades-ego: a large-scale dataset of paired third and first person videos. arXiv preprint arXiv:1804.09626 (2018)"},{"key":"46_CR52","doi-asserted-by":"crossref","unstructured":"Sigurdsson, G.A., Varol, G., Wang, X., Farhadi, A., Laptev, I., Gupta, A.: Hollywood in homes: crowdsourcing data collection for activity understanding. In: Proceedings of European Conference on Computer Vision (ECCV) (2016)","DOI":"10.1007\/978-3-319-46448-0_31"},{"key":"46_CR53","unstructured":"Soomro, K., Zamir, A.R., Shah, M.: Ucf101: a dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)"},{"key":"46_CR54","doi-asserted-by":"crossref","unstructured":"Stein, S., McKenna, S.J.: Combining embedded accelerometers with computer vision for recognizing food preparation activities. In: ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies (2013)","DOI":"10.1145\/2493432.2493482"},{"key":"46_CR55","doi-asserted-by":"crossref","unstructured":"Tang, Y., et al.: Coin: a large-scale dataset for comprehensive instructional video analysis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00130"},{"key":"46_CR56","doi-asserted-by":"crossref","unstructured":"Tapaswi, M., Zhu, Y., Stiefelhagen, R., Torralba, A., Urtasun, R., Fidler, S.: Movieqa: understanding stories in movies through question-answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.501"},{"key":"46_CR57","doi-asserted-by":"crossref","unstructured":"Toyer, S., Cherian, A., Han, T., Gould, S.: Human pose forecasting via deep markov models. In: International Conference on Digital Image Computing: Techniques and Applications (DICTA) (2017)","DOI":"10.1109\/DICTA.2017.8227441"},{"issue":"11","key":"46_CR58","first-page":"1473","volume":"18","author":"P Turaga","year":"2008","unstructured":"Turaga, P., Chellappa, R., Subrahmanian, V.S., Udrea, O.: Machine recognition of human activities: a survey. IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI) 18(11), 1473\u20131488 (2008)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI)"},{"issue":"7782","key":"46_CR59","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., et al.: Grandmaster level in starcraft ii using multi-agent reinforcement learning. Nature 575(7782), 350\u2013354 (2019)","journal-title":"Nature"},{"issue":"1","key":"46_CR60","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1007\/978-3-642-15561-1_44","volume":"101","author":"C Vondrick","year":"2013","unstructured":"Vondrick, C., Patterson, D., Ramanan, D.: Efficiently scaling up crowdsourced video annotation. Int. J. Comput. Vis. (IJCV) 101(1), 184\u2013204 (2013). https:\/\/doi.org\/10.1007\/978-3-642-15561-1_44","journal-title":"Int. J. Comput. Vis. (IJCV)"},{"issue":"1","key":"46_CR61","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/S0010-0277(98)00058-4","volume":"69","author":"AL Woodward","year":"1998","unstructured":"Woodward, A.L.: Infants selectively encode the goal object of an actor\u2019s reach. Cognition 69(1), 1\u201334 (1998)","journal-title":"Cognition"},{"key":"46_CR62","doi-asserted-by":"crossref","unstructured":"Wu, C.Y., Feichtenhofer, C., Fan, H., He, K., Krahenbuhl, P., Girshick, R.: Long-term feature banks for detailed video understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00037"},{"key":"46_CR63","doi-asserted-by":"crossref","unstructured":"Wu, C., Zhang, J., Savarese, S., Saxena, A.: Watch-n-patch: unsupervised understanding of actions and relations. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2015)","DOI":"10.1109\/CVPR.2015.7299065"},{"issue":"2\u20134","key":"46_CR64","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1007\/s11263-017-1013-y","volume":"126","author":"S Yeung","year":"2018","unstructured":"Yeung, S., Russakovsky, O., Jin, N., Andriluka, M., Mori, G., Fei-Fei, L.: Every moment counts: dense detailed labeling of actions in complex videos. Int. J. Comput. Vis. (IJCV) 126(2\u20134), 375\u2013389 (2018). https:\/\/doi.org\/10.1007\/s11263-017-1013-y","journal-title":"Int. J. Comput. Vis. (IJCV)"},{"key":"46_CR65","doi-asserted-by":"crossref","unstructured":"Zhou, L., Xu, C., Corso, J.J.: Towards automatic learning of procedures from web instructional videos. In: Proceedings of AAAI Conference on Artificial Intelligence (AAAI) (2018)","DOI":"10.1609\/aaai.v32i1.12342"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58574-7_46","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T00:14:34Z","timestamp":1731370474000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58574-7_46"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585730","9783030585747"],"references-count":65,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58574-7_46","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"13 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}