{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T15:42:19Z","timestamp":1774366939900,"version":"3.50.1"},"reference-count":60,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2018,10,1]],"date-time":"2018-10-01T00:00:00Z","timestamp":1538352000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61532009"],"award-info":[{"award-number":["61532009"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61772244"],"award-info":[{"award-number":["61772244"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61572498"],"award-info":[{"award-number":["61572498"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Key Research Program of Frontier Sciences, CAS","award":["QYZDJ-SSW-JSC039"],"award-info":[{"award-number":["QYZDJ-SSW-JSC039"]}]},{"name":"Postgraduate Research & Practice Innovation Program of Jiangsu Province","award":["SJCX17_0599"],"award-info":[{"award-number":["SJCX17_0599"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Multimedia"],"published-print":{"date-parts":[[2018,10]]},"DOI":"10.1109\/tmm.2018.2815998","type":"journal-article","created":{"date-parts":[[2018,3,15]],"date-time":"2018-03-15T20:42:42Z","timestamp":1521146562000},"page":"2693-2705","source":"Crossref","is-referenced-by-count":56,"title":["Three-Dimensional Attention-Based Deep Ranking Model for Video Highlight Detection"],"prefix":"10.1109","volume":"20","author":[{"given":"Yifan","family":"Jiao","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7804-0286","authenticated-orcid":false,"given":"Zhetao","family":"Li","sequence":"additional","affiliation":[]},{"given":"Shucheng","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Xiaoshan","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Bin","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1856-9564","authenticated-orcid":false,"given":"Tianzhu","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/S0031-3203(96)00109-4"},{"key":"ref38","first-page":"1228","article-title":"Key frame selection by motion analysis","author":"wolf","year":"0","journal-title":"Proc IEEE Int Conf Acoust Speech Signal Process"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2015.2443559"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2745109"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/2602633"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2567393"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2017.2654445"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2781304"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2012.2185041"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.31"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2016.2602938"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2011.2133090"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2016.2515640"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2694222"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2012.2191944"},{"key":"ref1","first-page":"787","article-title":"Ranking domain-specific\n highlights by analyzing edited videos","author":"sun","year":"0","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref20","first-page":"45","article-title":"Keyframe extraction for\n video tagging & summarization","author":"borth","year":"0","journal-title":"Proc Informatiktage 2008 Fachwissenschaftlicher Informatik-Kongress 14 und 15 M&#x00E4;rz 2008"},{"key":"ref22","first-page":"199","article-title":"Video summarization based on shot boundary detection with penalized contrasts","author":"medentzidou","year":"0","journal-title":"Proc 9th Int Symp Image Signal Process Anal Zagreb Croatia"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.4304\/jsw.8.7.1751-1758"},{"key":"ref24","first-page":"443","article-title":"Summarizing while\n recording: Context-based highlight detection for egocentric videos","author":"lin","year":"0","journal-title":"Proc 2015 IEEE Int Conf Comput Vis Workshop Santiago Chile"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3089249"},{"key":"ref26","doi-asserted-by":"crossref","first-page":"1167","DOI":"10.1109\/TMM.2007.902847","article-title":"Human behavior analysis for highlight ranking in broadcast\n racket sports video","volume":"9","author":"zhu","year":"2007","journal-title":"IEEE Trans Multimedia"},{"key":"ref25","first-page":"982","article-title":"Highlight detection with\n pairwise deep ranking for first-person video summarization","author":"yao","year":"0","journal-title":"Proc 2016 IEEE Conf Comput Vis Pattern Recognit Las Vegas NV USA"},{"key":"ref50","first-page":"379","article-title":"A neural\n attention model for abstractive sentence summarization","author":"rush","year":"0","journal-title":"Proc 2015 Conf Empirical Methods Natural Lang Process Lisbon Portugal"},{"key":"ref51","first-page":"1","article-title":"Reasoning about entailment with neural attention","author":"rockt\u00e4schel","year":"0","journal-title":"Proc ICLR"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2014.2375793"},{"key":"ref58","article-title":"UCF101: A\n dataset of 101 human actions classes from videos in the wild","author":"soomro","year":"2012","journal-title":"CRCV-TR-12-01"},{"key":"ref57","first-page":"505","article-title":"Creating summaries from user\n videos","author":"gygli","year":"0","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2016.2582379"},{"key":"ref55","first-page":"1106","article-title":"ImageNet\n classification with deep convolutional neural networks","author":"krizhevsky","year":"0","journal-title":"Proc 25th Int Conf Neural Inf Process Syst"},{"key":"ref54","first-page":"675","article-title":"Caffe: Convolutional architecture for fast feature embedding","author":"jia","year":"0","journal-title":"Proc 22nd ACM Int Conf Multimedia"},{"key":"ref53","first-page":"4507","article-title":"Describing videos by exploiting temporal structure","author":"yao","year":"0","journal-title":"Proc 2015 IEEE Int Conf Comput Vis Santiago Chile"},{"key":"ref52","first-page":"2048","article-title":"Show, attend and tell: Neural image caption generation with\n visual attention","author":"xu","year":"0","journal-title":"Proc 32nd Int Conf Mach Learn Lille France"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/1352012.1352015"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2658957"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2013.2285526"},{"key":"ref12","first-page":"921","article-title":"Event on demand with MPEG-21 video adaptation system","author":"xu","year":"0","journal-title":"Proc 14th ACM Int'l Conf Multimedia"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/354384.354443"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/500178.500181"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"112","DOI":"10.1109\/TCE.2005.1405707","article-title":"A highlight scene detection and video summarization system using\n audio feature for a personal video recorder","volume":"51","author":"otsuka","year":"0","journal-title":"in Proc IEEE Dig Tech Paper Int Conf Trans Consum Electron"},{"key":"ref16","first-page":"519","article-title":"Highlight ranking for sports video browsing","author":"tong","year":"0","journal-title":"Proc 13th ACM Int Conf Multimedia Singapore"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/11735106_44"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2004.841694"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s00799-005-0129-9"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2015.2393635"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2012.2237023"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2013.2285526"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2015.2415497"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-017-1033-7"},{"key":"ref7","article-title":"Learning\n multi-task correlation particle filters for visual tracking","author":"zhang","year":"2018","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref49","first-page":"1","article-title":"Multiple object\n recognition with visual attention","author":"ba","year":"0","journal-title":"Proc ICLR"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2016.2614132"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2011.170"},{"key":"ref45","first-page":"540","article-title":"Category-specific video summarization","author":"potapov","year":"0","journal-title":"Proc 13th Eur Conf Comput Vis Zurich Switzerland"},{"key":"ref48","first-page":"1","article-title":"Neural\n machine translation by jointly learning to align and translate","author":"bahdanau","year":"0","journal-title":"Proc ICLR"},{"key":"ref47","first-page":"4633","article-title":"Unsupervised extraction of video highlights\n via robust recurrent auto-encoders","author":"yang","year":"0","journal-title":"Proc IEEE Int Conf Comput Vis Santiago Chile"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-012-1046-8"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2011.2174780"},{"key":"ref44","first-page":"505","article-title":"Creating summaries from user\n videos","author":"gygli","year":"0","journal-title":"Proc 13th Eur Conf Comput Vis"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.sigpro.2012.06.026"}],"container-title":["IEEE Transactions on Multimedia"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6046\/8466674\/08316891.pdf?arnumber=8316891","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,26]],"date-time":"2022-01-26T09:05:12Z","timestamp":1643187912000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8316891\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,10]]},"references-count":60,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tmm.2018.2815998","relation":{},"ISSN":["1520-9210","1941-0077"],"issn-type":[{"value":"1520-9210","type":"print"},{"value":"1941-0077","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,10]]}}}