{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T04:09:38Z","timestamp":1773115778229,"version":"3.50.1"},"publisher-location":"Cham","reference-count":70,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031198328","type":"print"},{"value":"9783031198335","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19833-5_3","type":"book-chapter","created":{"date-parts":[[2022,11,4]],"date-time":"2022-11-04T00:40:30Z","timestamp":1667522430000},"page":"33-51","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":35,"title":["Spotting Temporally Precise, Fine-Grained Events in\u00a0Video"],"prefix":"10.1007","author":[{"given":"James","family":"Hong","sequence":"first","affiliation":[]},{"given":"Haotian","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Micha\u00ebl","family":"Gharbi","sequence":"additional","affiliation":[]},{"given":"Matthew","family":"Fisher","sequence":"additional","affiliation":[]},{"given":"Kayvon","family":"Fatahalian","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,11,4]]},"reference":[{"key":"3_CR1","doi-asserted-by":"crossref","unstructured":"Ahn, H., Lee, D.: Refining action segmentation with hierarchical video representations. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 16302\u201316310, October 2021","DOI":"10.1109\/ICCV48922.2021.01599"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Alwassel, H., Giancola, S., Ghanem, B.: TSP: temporally-sensitive pretraining of video encoders for localization tasks. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) Workshops, pp. 3173\u20133183, October 2021","DOI":"10.1109\/ICCVW54120.2021.00356"},{"key":"3_CR3","doi-asserted-by":"crossref","unstructured":"Buch, S., Escorcia, V., Ghanem, B., Niebles, J.C.: End-to-end, single-stream temporal action detection in untrimmed videos. In: Proceedings of the British Machine Vision Conference (BMVC), September 2017","DOI":"10.5244\/C.31.93"},{"key":"3_CR4","doi-asserted-by":"crossref","unstructured":"Buch, S., Escorcia, V., Shen, C., Ghanem, B., Carlos Niebles, J.: SST: single-stream temporal action proposals. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), July 2017","DOI":"10.1109\/CVPR.2017.675"},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Caba Heilbron, F., Escorcia, V., Ghanem, B., Carlos Niebles, J.: ActivityNet: a large-scale video benchmark for human activity understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2015","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo Vadis, action recognition? A new model and the Kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), July 2017","DOI":"10.1109\/CVPR.2017.502"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Chen, M.H., Li, B., Bao, Y., AlRegib, G.: Action segmentation with mixed temporal domain adaptation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), March 2020","DOI":"10.1109\/WACV45572.2020.9093535"},{"key":"3_CR8","unstructured":"Chung, J., Gulcehre, C., Cho, K., Bengio, Y.: Empirical evaluation of gated recurrent neural networks on sequence modeling. In: Proceedings of NIPS Deep Learning and Representation Learning Workshop (2014)"},{"key":"3_CR9","doi-asserted-by":"crossref","unstructured":"Cioppa, A., et al.: A context-aware loss function for action spotting in soccer videos. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2020","DOI":"10.1109\/CVPR42600.2020.01314"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Dai, R., Das, S., Minciullo, L., Garattoni, L., Francesca, G., Bremond, F.: PDAN: pyramid dilated attention network for action detection. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 2970\u20132979, January 2021","DOI":"10.1109\/WACV48630.2021.00301"},{"key":"3_CR11","unstructured":"Dai, R., et al.: Toyota smarthome untrimmed: real-world untrimmed videos for activity detection (2020). arXiv:2010.14982"},{"key":"3_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"753","DOI":"10.1007\/978-3-030-01225-0_44","volume-title":"Computer Vision \u2013 ECCV 2018","author":"D Damen","year":"2018","unstructured":"Damen, D., et al.: Scaling egocentric vision: the EPIC-kitchens dataset. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11208, pp. 753\u2013771. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01225-0_44"},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Deliege, A., et al.: SoccerNet-v2: a dataset and benchmarks for holistic understanding of broadcast soccer videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, pp. 4508\u20134519, June 2021","DOI":"10.1109\/CVPRW53098.2021.00508"},{"key":"3_CR14","unstructured":"Deli\u00e8ge, A., et al.: SoccerNet - action spotting (2022). https:\/\/github.com\/SoccerNet\/sn-spotting"},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2009","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"3_CR16","unstructured":"Ding, L., Xu, C.: TricorNet: a hybrid temporal convolutional and recurrent network for video action segmentation (2017). arXiv:1705.07818"},{"key":"3_CR17","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (ICLR) (2021)"},{"key":"3_CR18","doi-asserted-by":"crossref","unstructured":"Fan, H., et al.: Multiscale vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 6824\u20136835, October 2021","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"3_CR19","doi-asserted-by":"crossref","unstructured":"Farha, Y.A., Gall, J.: MS-TCN: multi-stage temporal convolutional network for action segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2019","DOI":"10.1109\/CVPR.2019.00369"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Fathi, A., Ren, X., Rehg, J.M.: Learning to recognize objects in egocentric activities. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2011","DOI":"10.1109\/CVPR.2011.5995444"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: SlowFast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), October 2019","DOI":"10.1109\/ICCV.2019.00630"},{"key":"3_CR22","doi-asserted-by":"crossref","unstructured":"Giancola, S., Ghanem, B.: Temporally-aware feature pooling for action spotting in soccer broadcasts. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, pp. 4490\u20134499, June 2021","DOI":"10.1109\/CVPRW53098.2021.00506"},{"key":"3_CR23","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2014","DOI":"10.1109\/CVPR.2014.81"},{"key":"3_CR24","doi-asserted-by":"crossref","unstructured":"Hao, Y., Zhang, H., Ngo, C.W., Liu, Q., Hu, X.: Compact bilinear augmented query structured attention for sport highlights classification. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 628\u2013636. Association for Computing Machinery, New York (2020)","DOI":"10.1145\/3394171.3413595"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"Hong, J., Fisher, M., Gharbi, M., Fatahalian, K.: Video pose distillation for few-shot, fine-grained sports action recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 9254\u20139263, October 2021","DOI":"10.1109\/ICCV48922.2021.00912"},{"key":"3_CR26","doi-asserted-by":"crossref","unstructured":"Ibrahim, M.S., Muralidharan, S., Deng, Z., Vahdat, A., Mori, G.: A hierarchical deep temporal model for group activity recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2016","DOI":"10.1109\/CVPR.2016.217"},{"key":"3_CR27","doi-asserted-by":"crossref","unstructured":"Ishikawa, Y., Kasai, S., Aoki, Y., Kataoka, H.: Alleviating over-segmentation errors by detecting action boundaries. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 2322\u20132331, January 2021","DOI":"10.1109\/WACV48630.2021.00237"},{"key":"3_CR28","unstructured":"Jiang, Y.G., et al.: THUMOS challenge: action recognition with a large number of classes (2014)"},{"key":"3_CR29","unstructured":"Kay, W., et al.: The Kinetics human action video dataset (2017). arXiv:1705.06950"},{"key":"3_CR30","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Arslan, A., Serre, T.: The language of actions: recovering the syntax and semantics of goal-directed human activities. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2014","DOI":"10.1109\/CVPR.2014.105"},{"key":"3_CR31","unstructured":"Li, A., Thotakuri, M., Ross, D.A., Carreira, J., Vostrikov, A., Zisserman, A.: The AVA-kinetics localized human actions video dataset (2020). arXiv:2005.00214"},{"key":"3_CR32","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"520","DOI":"10.1007\/978-3-030-01231-1_32","volume-title":"Computer Vision \u2013 ECCV 2018","author":"Y Li","year":"2018","unstructured":"Li, Y., Li, Y., Vasconcelos, N.: RESOUND: towards action recognition without representation bias. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11210, pp. 520\u2013535. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01231-1_32"},{"key":"3_CR33","doi-asserted-by":"crossref","unstructured":"Li, Y., Chen, L., He, R., Wang, Z., Wu, G., Wang, L.: MultiSports: a multi-person video dataset of spatio-temporally localized sports actions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 13536\u201313545, October 2021","DOI":"10.1109\/ICCV48922.2021.01328"},{"key":"3_CR34","doi-asserted-by":"crossref","unstructured":"Lin, C., et al.: Learning salient boundary feature for anchor-free temporal action localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 3320\u20133329, June 2021","DOI":"10.1109\/CVPR46437.2021.00333"},{"key":"3_CR35","doi-asserted-by":"crossref","unstructured":"Lin, J., Gan, C., Han, S.: TSM: temporal shift module for efficient video understanding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), October 2019","DOI":"10.1109\/ICCV.2019.00718"},{"key":"3_CR36","doi-asserted-by":"crossref","unstructured":"Lin, T., Liu, X., Li, X., Ding, E., Wen, S.: BMN: boundary-matching network for temporal action proposal generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), October 2019","DOI":"10.1109\/ICCV.2019.00399"},{"key":"3_CR37","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-01225-0_1","volume-title":"Computer Vision \u2013 ECCV 2018","author":"T Lin","year":"2018","unstructured":"Lin, T., Zhao, X., Su, H., Wang, C., Yang, M.: BSN: boundary sensitive network for temporal action proposal generation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11208, pp. 3\u201321. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01225-0_1"},{"key":"3_CR38","doi-asserted-by":"crossref","unstructured":"Liu, X., Bai, S., Bai, X.: An empirical study of end-to-end temporal action detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 20010\u201320019, June 2022","DOI":"10.1109\/CVPR52688.2022.01938"},{"key":"3_CR39","doi-asserted-by":"crossref","unstructured":"Liu, X., Hu, Y., Bai, S., Ding, F., Bai, X., Torr, P.H.S.: Multi-shot temporal event localization: a benchmark. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12596\u201312606, June 2021","DOI":"10.1109\/CVPR46437.2021.01241"},{"key":"3_CR40","unstructured":"Loshchilov, I., Hutter, F.: SGDR: stochastic gradient descent with warm restarts. In: Proceedings of the International Conference on Learning Representations (ICLR) (2017)"},{"key":"3_CR41","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: Proceedings of the International Conference on Learning Representations (ICLR) (2019)"},{"key":"3_CR42","unstructured":"Nvidia: Nvidia RTX A5000 data sheet (2021)"},{"key":"3_CR43","doi-asserted-by":"crossref","unstructured":"Radosavovic, I., Kosaraju, R.P., Girshick, R., He, K., Dollar, P.: Designing network design spaces. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2020","DOI":"10.1109\/CVPR42600.2020.01044"},{"key":"3_CR44","doi-asserted-by":"crossref","unstructured":"Shao, D., Zhao, Y., Dai, B., Lin, D.: FineGym: a hierarchical video dataset for fine-grained action understanding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2020","DOI":"10.1109\/CVPR42600.2020.00269"},{"key":"3_CR45","unstructured":"Sigurdsson, G., Choi, J.: Charades challenge (2017)"},{"key":"3_CR46","doi-asserted-by":"crossref","unstructured":"Sigurdsson, G.A., Divvala, S., Farhadi, A., Gupta, A.: Asynchronous temporal fields for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), July 2017","DOI":"10.1109\/CVPR.2017.599"},{"key":"3_CR47","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"510","DOI":"10.1007\/978-3-319-46448-0_31","volume-title":"Computer Vision \u2013 ECCV 2016","author":"GA Sigurdsson","year":"2016","unstructured":"Sigurdsson, G.A., Varol, G., Wang, X., Farhadi, A., Laptev, I., Gupta, A.: Hollywood in homes: crowdsourcing data collection for activity understanding. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 510\u2013526. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_31"},{"key":"3_CR48","doi-asserted-by":"crossref","unstructured":"Singh, B., Marks, T.K., Jones, M., Tuzel, O., Shao, M.: A multi-stream bi-directional recurrent neural network for fine-grained action detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2016","DOI":"10.1109\/CVPR.2016.216"},{"key":"3_CR49","unstructured":"Singhania, D., Rahaman, R., Yao, A.: Coarse to fine multi-resolution temporal convolutional network (2021). arXiv:2105.10859"},{"key":"3_CR50","unstructured":"Soares, J.V.B., Shah, A.: Action spotting using dense detection anchors revisited: submission to the SoccerNet Challenge 2022 (2022). arXiv:2206.07846"},{"key":"3_CR51","doi-asserted-by":"crossref","unstructured":"Soares, J.V.B., Shah, A., Biswas, T.: Temporally precise action spotting in soccer videos using dense detection anchors (2022). arXiv:2205.10450","DOI":"10.1109\/ICIP46576.2022.9897256"},{"key":"3_CR52","doi-asserted-by":"crossref","unstructured":"Stein, S., McKenna, S.J.: Combining embedded accelerometers with computer vision for recognizing food preparation activities. In: Proceedings of ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp), pp. 729\u2013738. Association for Computing Machinery, New York (2013)","DOI":"10.1145\/2493432.2493482"},{"key":"3_CR53","doi-asserted-by":"crossref","unstructured":"Sudhakaran, S., Escalera, S., Lanz, O.: Gate-shift networks for video action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2020","DOI":"10.1109\/CVPR42600.2020.00118"},{"key":"3_CR54","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"402","DOI":"10.1007\/978-3-030-58536-5_24","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Z Teed","year":"2020","unstructured":"Teed, Z., Deng, J.: RAFT: recurrent all-pairs field transforms for optical flow. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12347, pp. 402\u2013419. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58536-5_24"},{"key":"3_CR55","doi-asserted-by":"crossref","unstructured":"Tirupattur, P., Duarte, K., Rawat, Y.S., Shah, M.: Modeling multi-label action dependencies for temporal action localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1460\u20131470, June 2021","DOI":"10.1109\/CVPR46437.2021.00151"},{"key":"3_CR56","doi-asserted-by":"crossref","unstructured":"Tomei, M., Baraldi, L., Calderara, S., Bronzin, S., Cucchiara, R.: RMS-Net: regression and masking for soccer event spotting. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 7699\u20137706. IEEE Computer Society, Los Alamitos, January 2021","DOI":"10.1109\/ICPR48806.2021.9412268"},{"key":"3_CR57","doi-asserted-by":"crossref","unstructured":"Tran, D., Wang, H., Torresani, L., Ray, J., LeCun, Y., Paluri, M.: A closer look at spatiotemporal convolutions for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2018","DOI":"10.1109\/CVPR.2018.00675"},{"key":"3_CR58","unstructured":"TwentyBN: The 20BN-something-something dataset v2"},{"key":"3_CR59","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/978-3-319-46484-8_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"L Wang","year":"2016","unstructured":"Wang, L., et al.: Temporal segment networks: towards good practices for deep action recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 20\u201336. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_2"},{"key":"3_CR60","doi-asserted-by":"crossref","unstructured":"Xu, J., Rao, Y., Yu, X., Chen, G., Zhou, J., Lu, J.: FineDiving: a fine-grained dataset for procedure-aware action quality assessment. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2949\u20132958, June 2022","DOI":"10.1109\/CVPR52688.2022.00296"},{"key":"3_CR61","doi-asserted-by":"crossref","unstructured":"Xu, M., Zhao, C., Rojas, D.S., Thabet, A., Ghanem, B.: G-TAD: sub-graph localization for temporal action detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2020","DOI":"10.1109\/CVPR42600.2020.01017"},{"issue":"2\u20134","key":"3_CR62","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1007\/s11263-017-1013-y","volume":"126","author":"S Yeung","year":"2018","unstructured":"Yeung, S., Russakovsky, O., Jin, N., Andriluka, M., Mori, G., Fei-Fei, L.: Every moment counts: dense detailed labeling of actions in complex videos. Int. J. Comput. Vision 126(2\u20134), 375\u2013389 (2018)","journal-title":"Int. J. Comput. Vision"},{"key":"3_CR63","unstructured":"Yi, F., Wen, H., Jiang, T.: ASFormer: transformer for action segmentation. In: Proceedings of the British Machine Vision Conference (BMVC), November 2021"},{"key":"3_CR64","doi-asserted-by":"crossref","unstructured":"Zeng, R., et al.: Graph convolutional networks for temporal action localization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), October 2019","DOI":"10.1109\/ICCV.2019.00719"},{"key":"3_CR65","doi-asserted-by":"crossref","unstructured":"Zhang, C., Wu, J., Li, Y.: ActionFormer: localizing moments of actions with transformers (2022). arXiv:2202.07925","DOI":"10.1007\/978-3-031-19772-7_29"},{"key":"3_CR66","doi-asserted-by":"crossref","unstructured":"Zhang, H., Sciutto, C., Agrawala, M., Fatahalian, K.: Vid2Player: controllable video sprites that behave and appear like professional tennis players. ACM Trans. Graph. 40(3) (2021)","DOI":"10.1145\/3448978"},{"key":"3_CR67","unstructured":"Zhang, H., Cisse, M., Dauphin, Y.N., Lopez-Paz, D.: mixup: Beyond empirical risk minimization. In: Proceedings of the International Conference on Learning Representations (ICLR) (2018)"},{"key":"3_CR68","doi-asserted-by":"crossref","unstructured":"Zhao, C., Thabet, A.K., Ghanem, B.: Video self-stitching graph network for temporal action localization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 13658\u201313667, October 2021","DOI":"10.1109\/ICCV48922.2021.01340"},{"key":"3_CR69","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"831","DOI":"10.1007\/978-3-030-01246-5_49","volume-title":"Computer Vision \u2013 ECCV 2018","author":"B Zhou","year":"2018","unstructured":"Zhou, B., Andonian, A., Oliva, A., Torralba, A.: Temporal relational reasoning in videos. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11205, pp. 831\u2013846. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01246-5_49"},{"key":"3_CR70","unstructured":"Zhou, X., Kang, L., Cheng, Z., He, B., Xin, J.: Feature combination meets attention: Baidu soccer embeddings and transformer based temporal detection (2021). arXiv:2106.14447"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19833-5_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T15:29:14Z","timestamp":1673278154000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19833-5_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031198328","9783031198335"],"references-count":70,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19833-5_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"4 November 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"From the workshops, 367 reviewed full papers have been selected for publication","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}