{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T16:13:07Z","timestamp":1761581587981,"version":"3.37.3"},"reference-count":51,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2017,11,1]],"date-time":"2017-11-01T00:00:00Z","timestamp":1509494400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100003399","name":"Science and Technology Commission of Shanghai Municipality","doi-asserted-by":"publisher","award":["17ZR1402300"],"award-info":[{"award-number":["17ZR1402300"]}],"id":[{"id":"10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004543","name":"China Scholarship Council","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004543","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Signal Process. Lett."],"published-print":{"date-parts":[[2017,11]]},"DOI":"10.1109\/lsp.2017.2731952","type":"journal-article","created":{"date-parts":[[2017,7,26]],"date-time":"2017-07-26T18:47:42Z","timestamp":1501094862000},"page":"1666-1670","source":"Crossref","is-referenced-by-count":50,"title":["Joint Human Detection and Head Pose Estimation via Multistream Networks for RGB-D Videos"],"prefix":"10.1109","volume":"24","author":[{"given":"Guyue","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Jun","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Hengduo","family":"Li","sequence":"additional","affiliation":[]},{"given":"Yan Qiu","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Larry S.","family":"Davis","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-014-0733-5"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472028"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"ref31","first-page":"3218","article-title":"P-CNN:\n Pose-based CNN features for action recognition","author":"ch\u00e9ron","year":"0","journal-title":"Proc IEEE Int Conf Comput Vis"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-16808-1_21"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2015.06.014"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2014.09.013"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.236"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2017.2690339"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"ref29","first-page":"1799","article-title":"Joint training of a convolutional\n network and a graphical model for human pose estimation","author":"tompson","year":"0","journal-title":"Proc 27th Int Conf Neural Inf Process Syst"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539906"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.177"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.257"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7139256"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.120"},{"key":"ref24","first-page":"4694","article-title":"Beyond short snippets: Deep networks for video classification","author":"ng","year":"0","journal-title":"Proc Comput Vis Pattern Recognit"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2017.8019323"},{"key":"ref26","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"0","journal-title":"Proc 27th Int Conf Neural Inf Process Syst"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806222"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2016.7552949"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-08338-4_119"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6907688"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2017.8019303"},{"key":"ref40","article-title":"Indoor semantic segmentation using depth\n information","author":"couprie","year":"2015","journal-title":"ICLRE"},{"key":"ref12","first-page":"304","article-title":"Robust real-time human perception with depth\n camera","volume":"285","author":"zhang","year":"0","journal-title":"Proc Eur Conf Artif Intell"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299059"},{"key":"ref14","first-page":"1110","article-title":"Hierarchical recurrent\n neural network for skeleton based action recognition","author":"du","year":"0","journal-title":"Proc Comput Vis Pattern Recognit"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46487-9_50"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-48881-3_28"},{"key":"ref17","first-page":"1647","article-title":"Global context-aware attention LSTM networks for 3D action recognition","author":"liu","year":"0","journal-title":"Proc Comput Vis Pattern Recognit"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.214"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.222"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248017"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2011.155"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2300479"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.465"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2011.6095074"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2011.5981811"},{"key":"ref49","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2013","journal-title":"ICLRE"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.248"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.135"},{"key":"ref45","first-page":"4703","article-title":"segDeepM: Exploiting segmentation and context in\n deep neural networks for object detection","author":"zhu","year":"0","journal-title":"Proc Comput Vis Pattern Recognit"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"ref47","first-page":"345","article-title":"Learning rich features from\n RGB-D images for object detection and segmentation","author":"gupta","year":"0","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2015.7353446"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7139363"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2014.12.007"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.175"}],"container-title":["IEEE Signal Processing Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/97\/8048073\/07993051.pdf?arnumber=7993051","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T16:03:04Z","timestamp":1642003384000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7993051\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,11]]},"references-count":51,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/lsp.2017.2731952","relation":{},"ISSN":["1070-9908","1558-2361"],"issn-type":[{"type":"print","value":"1070-9908"},{"type":"electronic","value":"1558-2361"}],"subject":[],"published":{"date-parts":[[2017,11]]}}}