{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,24]],"date-time":"2025-03-24T08:33:14Z","timestamp":1742805194388,"version":"3.37.3"},"reference-count":30,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/access.2021.3058998","type":"journal-article","created":{"date-parts":[[2021,2,13]],"date-time":"2021-02-13T02:46:38Z","timestamp":1613184398000},"page":"92134-92142","source":"Crossref","is-referenced-by-count":6,"title":["Complete Video-Level Representations for Action Recognition"],"prefix":"10.1109","volume":"9","author":[{"given":"Min","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2917-0877","authenticated-orcid":false,"given":"Ruwen","family":"Bai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9766-7722","authenticated-orcid":false,"given":"Bo","family":"Meng","sequence":"additional","affiliation":[]},{"given":"Junxing","family":"Ren","sequence":"additional","affiliation":[]},{"given":"Miao","family":"Jiang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5601-7384","authenticated-orcid":false,"given":"Yang","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Linghan","family":"Li","sequence":"additional","affiliation":[]},{"given":"Hong","family":"Du","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.213"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299101"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.590"},{"key":"ref15","article-title":"ConvNet architecture search for spatiotemporal feature learning","author":"tran","year":"2017","journal-title":"arXiv 1708 05038"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICTAI.2019.00250"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00675"},{"key":"ref18","first-page":"33","article-title":"Pyramid methods in image processing","volume":"29","author":"adelson","year":"1984","journal-title":"RCA Eng"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"journal-title":"Ultralytics\/Yolov5 V3 1&#x2014;Bug Fixes and Performance Improvements","year":"2020","author":"jocher","key":"ref28"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref27","article-title":"YOLOv4: Optimal speed and accuracy of object detection","author":"bochkovskiy","year":"2020","journal-title":"arXiv 2004 10934"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref6","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref29","article-title":"UCF101: A dataset of 101 human actions classes from videos in the wild","author":"soomro","year":"2012","journal-title":"arXiv 1212 0402"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref7","article-title":"Inception-v4, inception-ResNet and the impact of residual connections on learning","author":"szegedy","year":"2016","journal-title":"arXiv 1602 07261"},{"key":"ref2","first-page":"20","article-title":"Temporal segment networks: Towards good practices for deep action recognition","author":"wang","year":"2016","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3001234"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.3390\/s19051005"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2389824"},{"key":"ref22","article-title":"ICAN: Instance-centric attention network for human-object interaction detection","author":"gao","year":"2018","journal-title":"arXiv 1808 10437"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01021"},{"key":"ref24","first-page":"91","article-title":"Faster R-CNN: Towards real-time object detection with region proposal networks","author":"ren","year":"2015","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref26","article-title":"YOLOv3: An incremental improvement","author":"redmon","year":"2018","journal-title":"arXiv 1804 02767"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/9312710\/09353486.pdf?arnumber=9353486","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,12,17]],"date-time":"2021-12-17T19:57:23Z","timestamp":1639771043000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9353486\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/access.2021.3058998","relation":{},"ISSN":["2169-3536"],"issn-type":[{"type":"electronic","value":"2169-3536"}],"subject":[],"published":{"date-parts":[[2021]]}}}