{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:34:36Z","timestamp":1763458476542,"version":"3.45.0"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,10,1]],"date-time":"2017-10-01T00:00:00Z","timestamp":1506816000000},"content-version":"vor","delay-in-days":365,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1317560"],"award-info":[{"award-number":["1317560"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,10]]},"DOI":"10.1145\/2964284.2967282","type":"proceedings-article","created":{"date-parts":[[2016,9,29]],"date-time":"2016-09-29T15:17:32Z","timestamp":1475162252000},"page":"551-555","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Detecting Arbitrary Oriented Text in the Wild with a Visual Attention Model"],"prefix":"10.1145","author":[{"given":"Wenyi","family":"Huang","sequence":"first","affiliation":[{"name":"The Pennsylvania State University, University Park, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dafang","family":"He","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiao","family":"Yang","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zihan","family":"Zhou","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniel","family":"Kifer","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"C. Lee","family":"Giles","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2016,10]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.5555\/2999134.2999233"},{"key":"e_1_3_2_1_2_1","volume-title":"Proc. of ICLR'15","author":"Ba J.","year":"2015","unstructured":"J. Ba, V. Mnih, and K. Kavukcuoglu. Multiple object recognition with visual attention. In Proc. of ICLR'15, 2015."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2013.279"},{"key":"e_1_3_2_1_4_1","first-page":"937","volume-title":"Proc. of ICML'11","author":"Bazzani L.","year":"2011","unstructured":"L. Bazzani, N. de Freitas, H. Larochelle, V. Murino, and J.-A. Ting. Learning attentional policies for object tracking and recognition in video with deep networks. In Proc. of ICML'11, pages 937--944. ACM, 2011."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"e_1_3_2_1_6_1","first-page":"134","volume-title":"Proc of ACCV'14 Workshops","author":"M.","year":"2014","unstructured":"M. Bu\\vsta, T. Drtina, D. Helekal, L. Neumann, and J. Matas. Efficient character skew rectification in scene text images. In Proc of ACCV'14 Workshops, pages 134--146. Springer, 2014."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.5555\/1896300.1896354"},{"key":"e_1_3_2_1_8_1","volume-title":"Control of goal-directed and stimulus-driven attention in the brain. Nature reviews neuroscience, 3(3):201--215","author":"Corbetta M.","year":"2002","unstructured":"M. Corbetta and G. L. Shulman. Control of goal-directed and stimulus-driven attention in the brain. Nature reviews neuroscience, 3(3):201--215, 2002."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540041"},{"key":"e_1_3_2_1_10_1","volume-title":"Workshop on Deep Learning, NIPS","author":"Jaderberg M.","year":"2014","unstructured":"M. Jaderberg, K. Simonyan, A. Vedaldi, and A. Zisserman. Synthetic data and artificial neural networks for natural scene text recognition. In Workshop on Deep Learning, NIPS, 2014."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10593-2_34"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.514"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2015.7333942"},{"key":"e_1_3_2_1_14_1","first-page":"306","volume-title":"Intelligent Signal Processing","author":"LeCun Y.","year":"2001","unstructured":"Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. Gradient-based learning applied to document recognition. In Intelligent Signal Processing, pages 306--351. IEEE Press, 2001."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502108"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.5555\/2969033.2969073"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.5555\/1966049.1966110"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/2354409.2355095"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2393347.2396307"},{"key":"e_1_3_2_1_20_1","volume-title":"Visual cognition, 7(1--3):17--42","author":"Rensink R. A.","year":"2000","unstructured":"R. A. Rensink. The dynamic representation of scenes. Visual cognition, 7(1--3):17--42, 2000."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2014.07.008"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2012.09.019"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.528"},{"key":"e_1_3_2_1_24_1","first-page":"2","volume":"4","author":"Tieleman T.","year":"2012","unstructured":"T. Tieleman and G. Hinton. Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude. COURSERA: Neural Networks for Machine Learning, 4:2, 2012.","journal-title":"COURSERA: Neural Networks for Machine Learning"},{"key":"e_1_3_2_1_25_1","volume-title":"Treatise on Physiological Optics: Translated from the 3rd German Ed","author":"von Helmholtz H.","year":"1925","unstructured":"H. von Helmholtz and J. P. C. Southall. Treatise on Physiological Optics: Translated from the 3rd German Ed. Optical Society of America, 1925."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126402"},{"key":"e_1_3_2_1_27_1","first-page":"3304","volume-title":"Proc. of ICPR'12","author":"Wang T.","year":"2012","unstructured":"T. Wang, D. J. Wu, A. Coates, and A. Y. Ng. End-to-end text recognition with convolutional neural networks. In Proc. of ICPR'12, pages 3304--3308. IEEE, 2012."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10032-006-0014-0"},{"key":"e_1_3_2_1_29_1","volume-title":"Proc. of ICML'15","author":"Xu K.","year":"2015","unstructured":"K. Xu, J. Ba, R. Kiros, K. Cho, A. Courville, R. Salakhutdinov, R. Zemel, and Y. Bengio. Show, attend and tell: Neural image caption generation with visual attention. In Proc. of ICML'15, 2015."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.5555\/2354409.2354851"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2388210"},{"key":"e_1_3_2_1_32_1","first-page":"91","volume-title":"Proc. of ACCV'14","author":"Zamberletti A.","year":"2014","unstructured":"A. Zamberletti, L. Noce, and I. Gallo. Text localization based on fast feature pyramids and multi-resolution maximally stable extremal regions. In Proc. of ACCV'14, pages 91--105. Springer, 2014."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298871"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/1180639.1180698"}],"event":{"name":"MM '16: ACM Multimedia Conference","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Amsterdam The Netherlands","acronym":"MM '16"},"container-title":["Proceedings of the 24th ACM international conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2964284.2967282","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2964284.2967282","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2964284.2967282","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:28:30Z","timestamp":1763458110000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2964284.2967282"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,10]]},"references-count":34,"alternative-id":["10.1145\/2964284.2967282","10.1145\/2964284"],"URL":"https:\/\/doi.org\/10.1145\/2964284.2967282","relation":{},"subject":[],"published":{"date-parts":[[2016,10]]},"assertion":[{"value":"2016-10-01","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}