{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T18:47:22Z","timestamp":1775069242697,"version":"3.50.1"},"reference-count":67,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T00:00:00Z","timestamp":1556668800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T00:00:00Z","timestamp":1556668800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T00:00:00Z","timestamp":1556668800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61333018"],"award-info":[{"award-number":["61333018"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61673380"],"award-info":[{"award-number":["61673380"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Knowl. Data Eng."],"published-print":{"date-parts":[[2019,5,1]]},"DOI":"10.1109\/tkde.2018.2848260","type":"journal-article","created":{"date-parts":[[2018,6,22]],"date-time":"2018-06-22T22:05:56Z","timestamp":1529705156000},"page":"996-1009","source":"Crossref","is-referenced-by-count":56,"title":["Read, Watch, Listen, and Summarize: Multi-Modal Summarization for Asynchronous Text, Image, Audio and Video"],"prefix":"10.1109","volume":"31","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2368-7541","authenticated-orcid":false,"given":"Haoran","family":"Li","sequence":"first","affiliation":[]},{"given":"Junnan","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Cong","family":"Ma","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5293-7434","authenticated-orcid":false,"given":"Jiajun","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Chengqing","family":"Zong","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2016.2624140"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2400461"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1523"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/MMSP.2001.962717"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/274644.274670"},{"key":"ref30","first-page":"111","article-title":"Summarisation of spoken audio through information extraction","author":"valenza","year":"1999","journal-title":"Proc ESCA Tut Res Workshop Accessing Inf Spoken Audio"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2014.2344015"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123326"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2017.2701825"},{"key":"ref34","first-page":"142","article-title":"Monolingual machine translation for paraphrase generation","author":"quirk","year":"2004","journal-title":"Proc Conf Empirical Methods Natural Language Process"},{"key":"ref60","first-page":"912","article-title":"Multi-document summarization via budgeted maximization of submodular functions","author":"lin","year":"2010","journal-title":"Proc Annu Conf North Amer Chapter Assoc Comput Linguistics"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2587640"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1232"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.3115\/1075096.1075150"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/2324796.2324832"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1041"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/S0169-7552(98)00110-X"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.3115\/1073445.1073465"},{"key":"ref66","first-page":"1","article-title":"An assessment of the accuracy of automatic evaluation in summarization","author":"owczarzak","year":"2012","journal-title":"Proceedings of Workshop on Evaluation Metrics and System Comparison for Automatic Summarization"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/2822907"},{"key":"ref67","article-title":"Google&#x2019;s neural machine translation system: Bridging the gap between human and machine translation","author":"wu","year":"2016","journal-title":"arXiv 1609 08144"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2003.1221239"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1114"},{"key":"ref20","article-title":"TextRank: Bringing order into texts","author":"mihalcea","year":"2004","journal-title":"EMNLP"},{"key":"ref22","article-title":"LexRank: Graph-based lexical centrality as salience in text summarization","volume":"22","author":"erkan","year":"2011","journal-title":"J Journal of Qiqihar Junior Teachers'College"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.3115\/1614049.1614095"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2012.42"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2016.2541148"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-50496-4_54"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2012.114"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2012.114"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2014.2345379"},{"key":"ref59","first-page":"525","article-title":"A comparison of document clustering techniques","author":"steinbach","year":"2000","journal-title":"Proc KDD Workshop Text Mining"},{"key":"ref58","first-page":"510","article-title":"A class of submodular functions for document summarization","author":"lin","year":"2011","journal-title":"Proc of the 3rd Conf on Applied Natural Language Processing"},{"key":"ref57","author":"manning","year":"1999","journal-title":"Foundations of Statistical Natural Language Processing"},{"key":"ref56","first-page":"479","article-title":"Identifying word translations from comparable corpora using latent topic models","author":"vuli?","year":"2011","journal-title":"Proc of the 3rd Conf on Applied Natural Language Processing"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.4324\/9780203936399.ch21"},{"key":"ref54","first-page":"340","article-title":"Letter to the editor: The kullback-leibler distance","volume":"41","author":"kullback","year":"1987"},{"key":"ref53","first-page":"2210","article-title":"A novel neural topic model and its supervised extension","author":"cao","year":"2015","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref52","first-page":"993","article-title":"Latent dirichlet allocation","volume":"3","author":"blei","year":"2003","journal-title":"J Mach Learn Res"},{"key":"ref10","first-page":"119","article-title":"Summarization of real-life events based on community-contributed content","author":"del fabro","year":"2012","journal-title":"Proc 4th Int Conf Multimedia"},{"key":"ref11","first-page":"1807","article-title":"Multimedia summarization for trending topics in microblogs","author":"bian","year":"2013","journal-title":"Proc ACM Conf Inf Knowl Manage"},{"key":"ref40","first-page":"866","article-title":"Adaptive key frame extraction using unsupervised clustering","author":"zhuang","year":"1998","journal-title":"Proc Int Conf Image Process"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2809933"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2014.2384912"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2809932"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2016.05.022"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/S0020-0190(99)00031-9"},{"key":"ref17","first-page":"46","article-title":"Sentence position revisited: A robust light-weight update summarization &#x2018;baseline&#x2019; algorithm","author":"varma","year":"2009","journal-title":"Proc 3rd Int Workshop Cross Lingual Inf Access Addressing Inf Need Multilingual Soc"},{"key":"ref18","first-page":"919","article-title":"A study on position information in document summarization","author":"ouyang","year":"2010","journal-title":"Proc 23rd Int Conf Comput Linguistics Posters"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2003.10.006"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2011.5711541"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2000.871074"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2013.2267205"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1186\/1687-6180-2013-173"},{"key":"ref8","first-page":"683","article-title":"Generating pictorial storylines via minimum-weight connected dominating set approximation in multi-view graphs","author":"wang","year":"2012","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2016.2615289"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ESEM.2013.13"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-1008"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1162\/089120102760275983"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1162\/0891201053630264"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P15-2036"},{"key":"ref42","first-page":"740","article-title":"Microsoft COCO: Common objects in context","author":"lin","year":"2014","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"ref44","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2014","journal-title":"ICLRE"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.541"}],"container-title":["IEEE Transactions on Knowledge and Data Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/69\/8680720\/08387512.pdf?arnumber=8387512","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,13]],"date-time":"2022-07-13T20:47:13Z","timestamp":1657745233000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8387512\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,5,1]]},"references-count":67,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tkde.2018.2848260","relation":{},"ISSN":["1041-4347","1558-2191","2326-3865"],"issn-type":[{"value":"1041-4347","type":"print"},{"value":"1558-2191","type":"electronic"},{"value":"2326-3865","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,5,1]]}}}