{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T09:28:50Z","timestamp":1761989330506,"version":"3.37.3"},"reference-count":77,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-1400802"],"award-info":[{"award-number":["IIS-1400802"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. on Image Process."],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/tip.2019.2930176","type":"journal-article","created":{"date-parts":[[2019,7,26]],"date-time":"2019-07-26T19:56:58Z","timestamp":1564171018000},"page":"591-601","source":"Crossref","is-referenced-by-count":33,"title":["Unambiguous Scene Text Segmentation With Referring Expression Comprehension"],"prefix":"10.1109","volume":"29","author":[{"given":"Xuejian","family":"Rong","sequence":"first","affiliation":[]},{"given":"Chucai","family":"Yi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4458-360X","authenticated-orcid":false,"given":"Yingli","family":"Tian","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref73","article-title":"COCO-text: Dataset and benchmark for text detection and recognition in natural images","author":"veit","year":"2016","journal-title":"arXiv 1601 07140"},{"doi-asserted-by":"publisher","key":"ref72","DOI":"10.1109\/ICDAR.2013.221"},{"doi-asserted-by":"publisher","key":"ref71","DOI":"10.3115\/v1\/D14-1086"},{"doi-asserted-by":"publisher","key":"ref70","DOI":"10.1145\/1015706.1015720"},{"doi-asserted-by":"publisher","key":"ref76","DOI":"10.1007\/978-3-319-10602-1_48"},{"doi-asserted-by":"publisher","key":"ref77","DOI":"10.1109\/CVPR.2016.254"},{"key":"ref74","article-title":"COCO-stuff: Thing and stuff classes in context","author":"caesar","year":"2017","journal-title":"arXiv 1612 03716"},{"doi-asserted-by":"publisher","key":"ref39","DOI":"10.1007\/s10032-015-0237-z"},{"doi-asserted-by":"publisher","key":"ref75","DOI":"10.1007\/978-3-319-46475-6_5"},{"doi-asserted-by":"publisher","key":"ref38","DOI":"10.1109\/CVPR.2012.6248097"},{"key":"ref33","article-title":"R2CNN: Rotational region CNN for orientation robust scene text detection","author":"jiang","year":"2017","journal-title":"arXiv 1706 09579"},{"key":"ref32","article-title":"Detecting curve text in the wild: New dataset and new solution","author":"yuliang","year":"2017","journal-title":"arXiv 1712 02170"},{"doi-asserted-by":"publisher","key":"ref31","DOI":"10.1109\/CVPR.2017.283"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.1109\/CVPR.2017.371"},{"doi-asserted-by":"publisher","key":"ref37","DOI":"10.1007\/978-3-030-01264-9_5"},{"doi-asserted-by":"publisher","key":"ref36","DOI":"10.1109\/CVPR.2018.00595"},{"doi-asserted-by":"publisher","key":"ref35","DOI":"10.1109\/CVPR.2018.00527"},{"key":"ref34","first-page":"1","article-title":"PixelLink: Detecting scene text via instance segmentation","author":"deng","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"doi-asserted-by":"publisher","key":"ref60","DOI":"10.1109\/CVPR.2015.7298878"},{"doi-asserted-by":"publisher","key":"ref62","DOI":"10.1109\/CVPR.2017.349"},{"key":"ref61","article-title":"Deep captioning with multimodal recurrent neural networks (m-RNN)","author":"mao","year":"2014","journal-title":"arXiv 1412 6632"},{"doi-asserted-by":"publisher","key":"ref63","DOI":"10.1109\/ICCV.2015.303"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1109\/ICCV.2017.331"},{"doi-asserted-by":"publisher","key":"ref64","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref27","first-page":"4161","article-title":"TextBoxes: A fast text detector with a single deep neural network","author":"liao","year":"2017","journal-title":"Proc AAAI"},{"doi-asserted-by":"publisher","key":"ref65","DOI":"10.1109\/ICCV.2017.143"},{"key":"ref66","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2014","journal-title":"arXiv 1409 1556"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.1109\/CVPR.2017.368"},{"key":"ref67","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"doi-asserted-by":"publisher","key":"ref68","DOI":"10.1007\/s11263-015-0816-y"},{"doi-asserted-by":"publisher","key":"ref69","DOI":"10.1007\/978-3-319-46448-0_2"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1109\/ICIP.2011.6116198"},{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1109\/ISM.2011.21"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1109\/ICDAR.2015.7333861"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/TPAMI.2014.2388210"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"2594","DOI":"10.1109\/TIP.2011.2126586","article-title":"Text string detection from natural scenes by structure-based partition and grouping","volume":"20","author":"yi","year":"2011","journal-title":"IEEE Trans Image Process"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1109\/CVPR.2016.451"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1109\/TPAMI.2015.2496234"},{"key":"ref26","article-title":"Accurate text localization in natural image with cascaded convolutional text network","author":"he","year":"2016","journal-title":"arXiv 1603 09423"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1007\/978-3-319-46484-8_4"},{"key":"ref50","first-page":"1889","article-title":"Deep fragment embeddings for bidirectional image sentence mapping","author":"karpathy","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"doi-asserted-by":"publisher","key":"ref51","DOI":"10.1109\/CVPR.2016.494"},{"doi-asserted-by":"publisher","key":"ref59","DOI":"10.1007\/978-3-319-46448-0_51"},{"doi-asserted-by":"publisher","key":"ref58","DOI":"10.1109\/CVPR.2015.7298713"},{"doi-asserted-by":"publisher","key":"ref57","DOI":"10.1109\/CVPR.2015.7298990"},{"doi-asserted-by":"publisher","key":"ref56","DOI":"10.1109\/CVPR.2011.5995711"},{"key":"ref55","first-page":"926","article-title":"Reasoning with neural tensor networks for knowledge base completion","author":"socher","year":"2013","journal-title":"Proc Adv Neural Inf Process Syst"},{"doi-asserted-by":"publisher","key":"ref54","DOI":"10.18653\/v1\/N16-1023"},{"key":"ref53","article-title":"From recognition to cognition: Visual commonsense reasoning","author":"zellers","year":"2018","journal-title":"arXiv 1811 10830"},{"doi-asserted-by":"publisher","key":"ref52","DOI":"10.1109\/CVPR.2018.00611"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1007\/978-3-319-46454-1_49"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1109\/TIP.2017.2710620"},{"doi-asserted-by":"publisher","key":"ref40","DOI":"10.1109\/TPAMI.2016.2572683"},{"key":"ref12","first-page":"1","article-title":"CAT2000: A large scale fixation dataset for boosting saliency research","author":"borji","year":"2015","journal-title":"Proc CVPR Workshop"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1109\/CVPR.2016.493"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1007\/978-3-319-46448-0_7"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1109\/CVPR.2016.9"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1007\/978-3-319-46448-0_49"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1007\/978-3-319-46484-8_42"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"970","DOI":"10.1109\/TPAMI.2013.182","article-title":"Robust text detection in natural scene images","volume":"36","author":"yin","year":"2014","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1109\/TPAMI.2014.2366765"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/ICRA.2015.7139313"},{"year":"2016","author":"parkinson","article-title":"Instant translation system","key":"ref3"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/ICCVW.2011.6130221"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1109\/TITS.2004.838509"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/ICCV.2009.5459462"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1109\/TIP.2014.2302896"},{"doi-asserted-by":"publisher","key":"ref49","DOI":"10.1109\/CVPR.2015.7298932"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1167\/9.12.10"},{"doi-asserted-by":"publisher","key":"ref46","DOI":"10.1007\/s10032-016-0264-4"},{"doi-asserted-by":"publisher","key":"ref45","DOI":"10.1109\/TIP.2017.2656474"},{"doi-asserted-by":"publisher","key":"ref48","DOI":"10.1109\/ICCV.2015.178"},{"doi-asserted-by":"publisher","key":"ref47","DOI":"10.1109\/ICCV.2015.179"},{"key":"ref42","article-title":"Understanding convolution for semantic segmentation","author":"wang","year":"2017","journal-title":"arXiv 1702 08502"},{"doi-asserted-by":"publisher","key":"ref41","DOI":"10.1109\/CVPR.2015.7298965"},{"doi-asserted-by":"publisher","key":"ref44","DOI":"10.1007\/s11263-015-0823-z"},{"doi-asserted-by":"publisher","key":"ref43","DOI":"10.1007\/978-3-319-10593-2_34"}],"container-title":["IEEE Transactions on Image Processing"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielaam\/83\/8835130\/8777293-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/83\/8835130\/08777293.pdf?arnumber=8777293","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T14:38:56Z","timestamp":1651070336000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8777293\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":77,"URL":"https:\/\/doi.org\/10.1109\/tip.2019.2930176","relation":{},"ISSN":["1057-7149","1941-0042"],"issn-type":[{"type":"print","value":"1057-7149"},{"type":"electronic","value":"1941-0042"}],"subject":[],"published":{"date-parts":[[2020]]}}}