{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T03:12:37Z","timestamp":1768014757754,"version":"3.49.0"},"reference-count":56,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,10]]},"DOI":"10.1109\/iccv48922.2021.00792","type":"proceedings-article","created":{"date-parts":[[2022,2,28]],"date-time":"2022-02-28T22:08:02Z","timestamp":1646086082000},"page":"8002-8012","source":"Crossref","is-referenced-by-count":13,"title":["Temporal Action Detection with Multi-level Supervision"],"prefix":"10.1109","author":[{"given":"Baifeng","family":"Shi","sequence":"first","affiliation":[{"name":"UC Berkeley"}]},{"given":"Qi","family":"Dai","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia"}]},{"given":"Judy","family":"Hoffman","sequence":"additional","affiliation":[{"name":"Georgia Tech"}]},{"given":"Kate","family":"Saenko","sequence":"additional","affiliation":[{"name":"Boston University &#x0026; MIT-IBM Watson AI Lab"}]},{"given":"Trevor","family":"Darrell","sequence":"additional","affiliation":[{"name":"UC Berkeley"}]},{"given":"Huijuan","family":"Xu","sequence":"additional","affiliation":[{"name":"Penn State University"}]}],"member":"263","reference":[{"key":"ref39","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.119"},{"key":"ref33","first-page":"91","article-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","author":"ren","year":"2015","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref32","first-page":"779","article-title":"You only look once: Unified, real-time object detection","author":"redmon","year":"2016","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"ref31","first-page":"5533","article-title":"Learning spatio-temporal representation with pseudo-3d residual networks","author":"qiu","year":"2017","journal-title":"Proceedings of the IEEE International Conference on Computer Vision (ICCV)"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.5201\/ipol.2013.26"},{"key":"ref37","first-page":"154","article-title":"Autoloc: Weakly-supervised temporal action localization in untrimmed videos","author":"shou","year":"2018","journal-title":"Proceedings of the European Conference on Computer Vision (ECCV)"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.155"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00109"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58529-7_18"},{"key":"ref28","first-page":"3235","article-title":"Realistic evaluation of deep semi-supervised learning algorithms","author":"oliver","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00560"},{"key":"ref29","first-page":"563","article-title":"W-talc: Weakly-supervised temporal activity localization and classification","author":"paul","year":"2018","journal-title":"Proceedings of the European Conference on Computer Vision (ECCV)"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"ref1","first-page":"5049","article-title":"Mixmatch: A holistic approach to semi-supervised learning","author":"berthelot","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref20","first-page":"21","article-title":"Ssd: Single shot multibox detector","author":"liu","year":"2016","journal-title":"European Conference on Computer Vision (ECCV)"},{"key":"ref22","first-page":"420","article-title":"Sf-net: Single-frame supervision for temporal action localization","author":"ma","year":"2020","journal-title":"European Conference on Computer Vision"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58526-6_43"},{"key":"ref24","first-page":"3","article-title":"Object-centric spatio-temporal pyramids for egocentric activity recognition","volume":"2","author":"mccandless","year":"2013","journal-title":"BMVC"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00113"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00706"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00877"},{"key":"ref50","first-page":"5783","article-title":"R-c3d: Region convolutional 3d network for temporal activity detection","author":"xu","year":"2017","journal-title":"Proceedings of the IEEE International Conference on Computer Vision"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01017"},{"key":"ref56","first-page":"803","article-title":"Temporal relational reasoning in videos","author":"zhou","year":"2018","journal-title":"Proceedings of the European Conference on Computer Vision (ECCV)"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.317"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00719"},{"key":"ref53","article-title":"Marginalized average attentional network for weakly-supervised learning","author":"yuan","year":"2019","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00097"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"ref11","first-page":"529","article-title":"Semi-supervised learning by entropy minimization","author":"grandvalet","year":"2005","journal-title":"Advances in neural information processing systems"},{"key":"ref40","article-title":"Fixmatch: Simplifying semi-supervised learning with consistency and confidence","author":"sohn","year":"2020"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.cviu.2016.10.018","article-title":"The thumos challenge on action recognition for videos &#x201C;in the wild","volume":"155","author":"idrees","year":"2017","journal-title":"Computer Vision and Image Understanding (CVIU)"},{"key":"ref14","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00717"},{"key":"ref16","article-title":"Temporal ensembling for semi-supervised learning","author":"laine","year":"2016"},{"key":"ref17","article-title":"Pseudo-label: The simple and efficient semi-supervised learning method for deep neural networks","volume":"3","author":"lee","year":"2013","journal-title":"Workshop on Challenges in Representation Learning"},{"key":"ref18","first-page":"3","article-title":"Bsn: Boundary sensitive network for temporal action proposal generation","author":"lin","year":"2018","journal-title":"Proceedings of the European Conference on Computer Vision (ECCV)"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00139"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00124"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref6","article-title":"A flexible model for training action lo-calization with varying levels of supervision","author":"ch\u00e9ron","year":"2018"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.81"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.333"},{"key":"ref7","first-page":"853","article-title":"Why can&#x2019;t i dance in the mall? learning to mitigate scene bias in action recognition","author":"choi","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref49","first-page":"399","article-title":"Videos as space-time region graphs","author":"wang","year":"2018","journal-title":"Proceedings of the European Conference on Computer Vision (ECCV)"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.480"},{"key":"ref48","first-page":"20","article-title":"Temporal segment networks: Towards good practices for deep action recognition","author":"wang","year":"2016","journal-title":"European Conference on Computer Vision"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.678"},{"key":"ref42","first-page":"1195","article-title":"Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results","author":"tarvainen","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref41","first-page":"715","article-title":"Pyramid dilated deeper convlstm for video salient object detection","author":"song","year":"2018","journal-title":"Proceedings of the European Conference on Computer Vision (ECCV)"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.64"},{"key":"ref43","article-title":"The information bottleneck method","author":"tishby","year":"2000"}],"event":{"name":"2021 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Montreal, QC, Canada","start":{"date-parts":[[2021,10,10]]},"end":{"date-parts":[[2021,10,17]]}},"container-title":["2021 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9709627\/9709628\/09711363.pdf?arnumber=9711363","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,12]],"date-time":"2022-07-12T19:37:13Z","timestamp":1657654633000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9711363\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10]]},"references-count":56,"URL":"https:\/\/doi.org\/10.1109\/iccv48922.2021.00792","relation":{},"subject":[],"published":{"date-parts":[[2021,10]]}}}