{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T02:03:33Z","timestamp":1773799413348,"version":"3.50.1"},"reference-count":71,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T00:00:00Z","timestamp":1556668800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T00:00:00Z","timestamp":1556668800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T00:00:00Z","timestamp":1556668800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Singapore Ministry of Education Academic Research Fund Tier 2","award":["MOE2015-T2-2-114"],"award-info":[{"award-number":["MOE2015-T2-2-114"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61501198"],"award-info":[{"award-number":["61501198"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003819","name":"Natural Science Foundation of Hubei Province","doi-asserted-by":"publisher","award":["2014CFB461"],"award-info":[{"award-number":["2014CFB461"]}],"id":[{"id":"10.13039\/501100003819","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100008209","name":"University at Buffalo","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100008209","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. Video Technol."],"published-print":{"date-parts":[[2019,5]]},"DOI":"10.1109\/tcsvt.2018.2830102","type":"journal-article","created":{"date-parts":[[2018,4,25]],"date-time":"2018-04-25T19:29:14Z","timestamp":1524684554000},"page":"1423-1437","source":"Crossref","is-referenced-by-count":107,"title":["Semantic Cues Enhanced Multimodality Multistream CNN for Action Recognition"],"prefix":"10.1109","volume":"29","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5003-2260","authenticated-orcid":false,"given":"Zhigang","family":"Tu","sequence":"first","affiliation":[]},{"given":"Wei","family":"Xie","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4390-1568","authenticated-orcid":false,"given":"Justin","family":"Dauwels","sequence":"additional","affiliation":[]},{"given":"Baoxin","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7324-7034","authenticated-orcid":false,"given":"Junsong","family":"Yuan","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2009.5202577"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.273"},{"key":"ref39","first-page":"142","article-title":"Extended Lucas-Kanade tracking","author":"oron","year":"2014","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.223"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2015.2433171"},{"key":"ref32","first-page":"1063","article-title":"Video saliency detection via dynamic consistent spatio-temporal attention modelling","author":"zhong","year":"2013","journal-title":"Proc 27th AAAI Conf Artif Intell"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/1180639.1180824"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/34.868680"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2314663"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298961"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref62","first-page":"204","article-title":"Beyond Gaussian pyramid: Multi-skip feature stacking for action recognition","author":"lan","year":"2015","journal-title":"Proc Comput Vis Pattern Recognit"},{"key":"ref61","doi-asserted-by":"crossref","first-page":"109","DOI":"10.1016\/j.cviu.2016.03.013","article-title":"Bag of visual words and fusion methods for action recognition: Comprehensive study and good practice","volume":"150","author":"peng","year":"2016","journal-title":"Comput Vis Image Understand"},{"key":"ref63","first-page":"3165","article-title":"Action-VLAD: Learning spatio-temporal aggregation for action classification","author":"girdhar","year":"2017","journal-title":"Proc Comput Vis Pattern Recognit"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.441"},{"key":"ref64","first-page":"3468","article-title":"Spatiotemporal residual networks for video action recognition","author":"feichtenhofer","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299059"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.151"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.377"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2011.2125450"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6247743"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-011-0512-5"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/2671188.2749406"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2017.07.028"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.209"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-013-0620-5"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298676"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540039"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.5244\/C.28.6"},{"key":"ref26","first-page":"978","article-title":"Multilayer and multimodal fusion of deep neural networks for video classification","author":"yang","year":"2016","journal-title":"Proc ACM Multimedia Conf"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2964328"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1979.4310076"},{"key":"ref51","first-page":"1","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2015","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.291"},{"key":"ref58","first-page":"1","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"ref56","first-page":"1","article-title":"UCF101: A dataset of 101 human actions classes from videos in the wild","author":"soomro","year":"2012","journal-title":"CoRR"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.396"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2008.4587727"},{"key":"ref53","first-page":"744","article-title":"Multi-region two-stream R-CNN for action detection","author":"peng","year":"2016","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.135"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"ref11","first-page":"91","article-title":"Faster R-CNN: Towards real-time object detection with region proposal networks","author":"ren","year":"2015","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.423"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.296"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2014.115"},{"key":"ref15","article-title":"Long-term temporal convolutions for action recognition","author":"varol","year":"2017","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2558148"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806216"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.216"},{"key":"ref4","first-page":"20","article-title":"Temporal segment networks: Towards good practices for deep action recognition","author":"wang","year":"2016","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2016.7900180"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.213"},{"key":"ref5","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref8","first-page":"3218","article-title":"P-CNN: Pose-based CNN features for action recognition","author":"ch\u00e9ron","year":"2015","journal-title":"Proc Int Conf Comput Vis"},{"key":"ref7","first-page":"1","article-title":"Two-stream SR-CNNs for action recognition in videos","author":"wang","year":"2016","journal-title":"Proc Brit Mach Vis Conf"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.360"},{"key":"ref9","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2015.7351654"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.352"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.120"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.414"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.179"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298873"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2016.10.027"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2013.11.026"}],"container-title":["IEEE Transactions on Circuits and Systems for Video Technology"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/76\/8705610\/08347006.pdf?arnumber=8347006","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,13]],"date-time":"2022-07-13T20:42:59Z","timestamp":1657744979000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8347006\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,5]]},"references-count":71,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tcsvt.2018.2830102","relation":{},"ISSN":["1051-8215","1558-2205"],"issn-type":[{"value":"1051-8215","type":"print"},{"value":"1558-2205","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,5]]}}}