{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:38:47Z","timestamp":1778081927827,"version":"3.51.4"},"reference-count":195,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2016,6,1]],"date-time":"2016-06-01T00:00:00Z","timestamp":1464739200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61411136002"],"award-info":[{"award-number":["61411136002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61473036"],"award-info":[{"award-number":["61473036"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. on Image Process."],"published-print":{"date-parts":[[2016,6]]},"DOI":"10.1109\/tip.2016.2554321","type":"journal-article","created":{"date-parts":[[2016,4,14]],"date-time":"2016-04-14T18:09:48Z","timestamp":1460657388000},"page":"2752-2773","source":"Crossref","is-referenced-by-count":174,"title":["Text Detection, Tracking and Recognition in Video: A Comprehensive Survey"],"prefix":"10.1109","volume":"25","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0023-0220","authenticated-orcid":false,"given":"Xu-Cheng","family":"Yin","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ze-Yu","family":"Zuo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shu","family":"Tian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cheng-Lin","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref170","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2011.5711546"},{"key":"ref172","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2005.1570797"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2001.990990"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.1997.609372"},{"key":"ref173","doi-asserted-by":"publisher","DOI":"10.1109\/ISWC.1999.806642"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.1145\/1027527.1027724"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1049\/el:19990977"},{"key":"ref178","doi-asserted-by":"publisher","DOI":"10.5220\/0005273501300138"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2005.858619"},{"key":"ref168","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2004.1334351"},{"key":"ref169","article-title":"Read it project report: A portable text reading system for the blind people","author":"chmiel","year":"2005"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2009.83"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2005.01.004"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2013.2255396"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2012.2198129"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/DAS.2012.6"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2011.226"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-8655(03)00105-3"},{"key":"ref36","doi-asserted-by":"crossref","first-page":"1631","DOI":"10.1109\/TPAMI.2003.1251157","article-title":"Texture-based approach for text detection in images using support vector machines and continuously adaptive mean shift algorithm","volume":"25","author":"kim","year":"2003","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref35","doi-asserted-by":"crossref","first-page":"1015","DOI":"10.1109\/ICPR.2002.1048210","article-title":"Hybrid Chinese\/English text detection in images and video frames","volume":"3","author":"mao","year":"2002","journal-title":"Proc 16th Int Conf Pattern Recognit"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/83.817607"},{"key":"ref181","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0126200"},{"key":"ref180","doi-asserted-by":"publisher","DOI":"10.1155\/S1110865704408142"},{"key":"ref185","doi-asserted-by":"publisher","DOI":"10.1016\/S0031-3203(03)00132-8"},{"key":"ref184","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2014.07.008"},{"key":"ref183","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2011.36"},{"key":"ref182","doi-asserted-by":"crossref","first-page":"1442","DOI":"10.1109\/TPAMI.2013.230","article-title":"Visual tracking: An experimental survey","volume":"36","author":"smeulders","year":"2014","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref189","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2007.4376980"},{"key":"ref188","doi-asserted-by":"publisher","DOI":"10.1007\/s10032-004-0129-0"},{"key":"ref187","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2001.990498"},{"key":"ref186","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2005.01.003"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/1815330.1815395"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2009.153"},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2003.1249282"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2010.03.004"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2011.6130221"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/DAS.2008.72"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2004.841653"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2009.5202546"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/DAS.2008.17"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2008.4761415"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2009.85"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540041"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2012.2199327"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2010.790"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2013.221"},{"key":"ref156","first-page":"1","article-title":"The TREC-2002 video track report","author":"smeaton","year":"2002","journal-title":"Proc 17th Text Retrieval Conf (TREC)"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2003.1227749"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2004.825538"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2008.57"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1007\/11669487_51"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1007\/s005300050140"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2009.58"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2001.953954"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2014.2363167"},{"key":"ref59","first-page":"282","article-title":"Conditional random fields: Probabilistic models for segmenting and labeling sequence data","author":"lafferty","year":"2001","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref58","doi-asserted-by":"crossref","first-page":"800","DOI":"10.1109\/TIP.2010.2070803","article-title":"A hybrid approach to detect and localize texts in natural scene images","volume":"20","author":"pan","year":"2011","journal-title":"IEEE Trans Image Process"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298871"},{"key":"ref56","first-page":"3304","article-title":"End-to-end text recognition with convolutional neural networks","author":"wang","year":"2012","journal-title":"Proc Int Conf Pattern Recognit (ICPR)"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2011.93"},{"key":"ref54","first-page":"ii-366","article-title":"Detecting and reading text in natural scenes","author":"chen","year":"2004","journal-title":"Proc Int Conf Comput Vis Pattern Recognit (CVPR)"},{"key":"ref53","first-page":"725","article-title":"Effective text localization in natural scene images with MSER, geometry-based grouping and AdaBoost","author":"yin","year":"2012","journal-title":"Proc Int Conf Pattern Recognit (ICPR)"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/2393347.2396307"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2010.2077772"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1109\/AIPR.2004.22"},{"key":"ref166","article-title":"AdaBoost learning for detecting and reading text in city scenes","author":"chen","year":"2004"},{"key":"ref165","first-page":"401","article-title":"Development of a guide dog system for the blind with character recognition ability","volume":"1","author":"iwatsuka","year":"2004","journal-title":"Proc 17th Int Conf Pattern Recognit (ICPR)"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-76386-4_18"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2000.902945"},{"key":"ref162","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1109\/ICIP.1999.817127","article-title":"Neural network-based text location for news video indexing","volume":"3","author":"jeong","year":"1999","journal-title":"Proc Int Conf Image Process (ICIP)"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.1998.711301"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1109\/MMCS.1997.609763"},{"key":"ref4","first-page":"52","article-title":"TRECVID 2014&#x2014;An overview of the goals, tasks, data, evaluation mechanisms and metrics","author":"over","year":"2014","journal-title":"Proc TRECVID"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2536798"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"970","DOI":"10.1109\/TPAMI.2013.182","article-title":"Robust text detection in natural scene images","volume":"36","author":"yin","year":"2014","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/34.845381"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/34.824820"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1109\/IVL.1999.781133"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/2505515.2507880"},{"key":"ref49","doi-asserted-by":"crossref","first-page":"147","DOI":"10.1007\/s10032-004-0133-4","article-title":"Rectification and recognition of text in 3-D scenes","volume":"7","author":"myers","year":"2004","journal-title":"Int J Document Anal Recognit"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2000.871472"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2003.10.012"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.1145\/641007.641073"},{"key":"ref46","doi-asserted-by":"crossref","first-page":"-117i","DOI":"10.1109\/ICIP.2002.1037973","article-title":"A new approach for video text detection","volume":"1","author":"cai","year":"2002","journal-title":"Proc Int Conf Image Process"},{"key":"ref45","first-page":"318","article-title":"Text detection in video frames using hybrid features","volume":"1","author":"ji","year":"2009","journal-title":"Proc Int Conf Mach Learn Cybern"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/s10032-001-0072-2"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1016\/S0031-3203(02)00230-3"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1049\/iet-ipr.2010.0397"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2010.01.009"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-013-1385-0"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.166"},{"key":"ref73","first-page":"512","article-title":"Deep features for text spotting","author":"jaderberg","year":"2014","journal-title":"In Proc European Conf Comp Vis"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2014.2353813"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.157"},{"key":"ref70","first-page":"1083","article-title":"Detecting texts of arbitrary orientations in natural images","author":"yao","year":"2012","journal-title":"Proc Int Conf Comput Vis Pattern Recognit (CVPR)"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2007.4377040"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2015.7333942"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33266-1_22"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2003.06.001"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.1996.517139"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.126"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.5244\/C.16.36"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2012.09.019"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248097"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1145\/2484028.2484197"},{"key":"ref64","doi-asserted-by":"crossref","first-page":"2296","DOI":"10.1109\/TIP.2013.2249082","article-title":"Scene text detection via connected component clustering and nontext filtering","volume":"22","author":"koo","year":"2013","journal-title":"IEEE Trans Image Process"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2014.469"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.514"},{"key":"ref67","first-page":"497","article-title":"Robust scene text detection with convolution neural network induced MSER trees","author":"huang","year":"2014","journal-title":"In Proc European Conf Comp Vis"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2388210"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2015.04.002"},{"key":"ref193","doi-asserted-by":"publisher","DOI":"10.1145\/2505377.2505390"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-85729-859-1_9"},{"key":"ref195","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2014.09.003"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1007\/s005300050006"},{"key":"ref94","doi-asserted-by":"crossref","first-page":"180","DOI":"10.1117\/12.234741","article-title":"Automatic text recognition in digital videos","author":"lienhart","year":"1996","journal-title":"Electron Imag Sci Technol"},{"key":"ref190","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_26"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2005.188"},{"key":"ref191","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2300479"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.355"},{"key":"ref192","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.76"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.103"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1145\/1177352.1177355"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/76.999203"},{"key":"ref99","first-page":"1","article-title":"Detecting moving text in video using temporal information","author":"huang","year":"2008","journal-title":"Proc Int Conf Pattern Recognit (ICPR)"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2006.243"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2007.4377101"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15549-9_43"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.381"},{"key":"ref84","first-page":"1","article-title":"End-to-end text recognition with hybrid HMM maxout models","author":"alsharif","year":"2014","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.102"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6247990"},{"key":"ref89","first-page":"776","article-title":"Video text detection and recognition: Dataset and benchmark","author":"nguyen","year":"2014","journal-title":"Proc IEEE Winter Conf Appl Comput Vis"},{"key":"ref85","article-title":"Synthetic data and artificial neural networks for natural scene text recognition","volume":"abs 1406 2227","author":"jaderberg","year":"2014","journal-title":"CoRR"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0823-z"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2339814"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298914"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15702-8_36"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1117\/12.298354"},{"key":"ref126","doi-asserted-by":"crossref","first-page":"988","DOI":"10.1117\/12.298412","article-title":"Moving-object detection from MPEG coded data","volume":"3309","author":"nakajima","year":"1998","journal-title":"Proc SPIE"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2014.6890248"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1117\/12.2009441"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.1999.791717"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2000.905537"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1007\/s10032-002-0091-7"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/ISM.2008.59"},{"key":"ref134","first-page":"1","article-title":"Computer vision face tracking for use in a perceptual user interface","author":"bradski","year":"1998","journal-title":"Proc IEEE Workshop Appl Comput Vis (WACV)"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1145\/1027527.1027581"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/76.475896"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2010.2068553"},{"key":"ref135","first-page":"1","article-title":"A robust method for tracking scene text in video imagery","author":"myers","year":"2005","journal-title":"Proc CBDAR"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1049\/iet-cvi.2013.0217"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2013.2273672"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1109\/ASSPCC.2000.882463"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2015.7333727"},{"key":"ref141","article-title":"Scene text detection in video by learning locally and globally","author":"tian","year":"2016","journal-title":"Proc Intern Joint Conf Artificial Intel (IJCAI)"},{"key":"ref142","doi-asserted-by":"crossref","first-page":"847","DOI":"10.1109\/ICPR.2000.905546","article-title":"Superresolution-based enhancement of text in digital video","volume":"1","author":"li","year":"2000","journal-title":"Proc 15th Int Conf Pattern Recognit"},{"key":"ref143","first-page":"ii-397","article-title":"Efficient video text recognition using multiple frame integration","volume":"2","author":"hua","year":"2002","journal-title":"Proc Int Conf Image Process"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-015-4488-0"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1145\/500933.500941"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2366765"},{"key":"ref145","first-page":"618","article-title":"Automatic text extraction from video for content-based annotation and retrieval","volume":"1","author":"shim","year":"1998","journal-title":"Proc 14th Int Conf Pattern Recognit"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2004.1389474"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2011.5711545"},{"key":"ref107","first-page":"873","article-title":"A video text detection and recognition system","author":"xi","year":"2001","journal-title":"Proc ICME"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1145\/319463.319466"},{"key":"ref105","first-page":"21","article-title":"Automatic text tracking in digital videos","author":"li","year":"1998","journal-title":"Proc IEEE 2nd Workshop Multimedia Signal Process"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/ISM.2013.106"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2007.09.014"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2013.122"},{"key":"ref111","first-page":"331","article-title":"Fast approximate nearest neighbors with automatic algorithm configuration","volume":"1","author":"muja","year":"2009","journal-title":"Proc VISAPP"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2014.536"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1145\/358669.358692"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/s10032-004-0138-z"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/DAS.2008.49"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2013.05.037"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.image.2007.06.005"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2000.905537"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1991996.1992019"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2007.07.003"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2011.6116563"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/S0031-3203(98)00067-3"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-29364-1_3"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2000.902976"},{"key":"ref19","first-page":"1","article-title":"Text detection in images and video sequences","author":"le\u00f3n","year":"2005","journal-title":"Proc IADAT Int Conf Multi-Media Image Process Comput Vis"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2002.1048482"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2008.4761548"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2006.107"},{"key":"ref116","first-page":"10","article-title":"A framework towards realtime detection and tracking of text","author":"merino","year":"2007","journal-title":"Proc 2nd Int Workshop Camera-Based Document Anal Recognition (CBDAR2007)"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2009.102"},{"key":"ref120","first-page":"678","article-title":"A novel video text extraction approach based on multiple frames","author":"mi","year":"2005","journal-title":"5th Intl Conf on Inf Comm and Signal Proc"},{"key":"ref121","first-page":"119","article-title":"A robust system for text extraction in video","author":"zhou","year":"2007","journal-title":"Proc 2nd Int Conf Mach Vis (ICMV)"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/IHMSC.2010.50"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2011.2177646"}],"container-title":["IEEE Transactions on Image Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/83\/7452455\/07452620.pdf?arnumber=7452620","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T16:01:49Z","timestamp":1642003309000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7452620\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,6]]},"references-count":195,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tip.2016.2554321","relation":{},"ISSN":["1057-7149","1941-0042"],"issn-type":[{"value":"1057-7149","type":"print"},{"value":"1941-0042","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,6]]}}}