{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T05:22:05Z","timestamp":1770355325493,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":14,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,10,15]],"date-time":"2019-10-15T00:00:00Z","timestamp":1571097600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,10,15]]},"DOI":"10.1145\/3343031.3351093","type":"proceedings-article","created":{"date-parts":[[2019,10,21]],"date-time":"2019-10-21T16:32:26Z","timestamp":1571675546000},"page":"855-863","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":26,"title":["You Only Recognize Once"],"prefix":"10.1145","author":[{"given":"Zhanzhan","family":"Cheng","sequence":"first","affiliation":[{"name":"Hikvision Research Institution, Hangzhou, China"}]},{"given":"Jing","family":"Lu","sequence":"additional","affiliation":[{"name":"Hikvision Research Institute, Hangzhou, China"}]},{"given":"Yi","family":"Niu","sequence":"additional","affiliation":[{"name":"Hikvision Research Institute, Hangzhou, China"}]},{"given":"Shiliang","family":"Pu","sequence":"additional","affiliation":[{"name":"Hikvision Research Institute, Hangzhou, China"}]},{"given":"Fei","family":"Wu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"given":"Shuigeng","family":"Zhou","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2019,10,15]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"crossref","unstructured":"Fan Bai Zhanzhan Cheng Yi Niu Shiliang Pu and Shuigeng Zhou. 2018. Edit Probability for Scene Text Recognition. In CVPR. 1508--1516.  Fan Bai Zhanzhan Cheng Yi Niu Shiliang Pu and Shuigeng Zhou. 2018. Edit Probability for Scene Text Recognition. In CVPR. 1508--1516.","DOI":"10.1109\/CVPR.2018.00163"},{"key":"e_1_3_2_2_2_1","volume-title":"Rosetta: Large Scale System for Text Detection and Recognition in Images. In SIGKDD . 71--79.","author":"Borisyuk Fedor","year":"2018"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"crossref","unstructured":"Dapeng Chen Hongsheng Li Tong Xiao Shuai Yi and Xiaogang Wang. 2018. Video Person Re-Identification With Competitive Snippet-Similarity Aggregation and Co-Attentive Snippet Embedding. In CVPR. 1169--1178.  Dapeng Chen Hongsheng Li Tong Xiao Shuai Yi and Xiaogang Wang. 2018. Video Person Re-Identification With Competitive Snippet-Similarity Aggregation and Co-Attentive Snippet Embedding. In CVPR. 1169--1178.","DOI":"10.1109\/CVPR.2018.00128"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"crossref","unstructured":"Zhanzhan Cheng Fan Bai Yunlu Xu Gang Zheng Shiliang Pu and Shuigeng Zhou. 2017. Focusing attention: Towards accurate text recognition in natural images. In ICCV . 5086--5094.  Zhanzhan Cheng Fan Bai Yunlu Xu Gang Zheng Shiliang Pu and Shuigeng Zhou. 2017. Focusing attention: Towards accurate text recognition in natural images. In ICCV . 5086--5094.","DOI":"10.1109\/ICCV.2017.543"},{"key":"e_1_3_2_2_5_1","volume-title":"AON: Towards Arbitrarily-Oriented Text Recognition. In CVPR . 5571--5579.","author":"Cheng Zhanzhan","year":"2018"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"crossref","unstructured":"Victor Fragoso Steffen Gauglitz Shane Zamora Jim Kleban and Matthew Turk. 2011. TranslatAR: A mobile augmented reality translator. In WACV. 497--502.  Victor Fragoso Steffen Gauglitz Shane Zamora Jim Kleban and Matthew Turk. 2011. TranslatAR: A mobile augmented reality translator. In WACV. 497--502.","DOI":"10.1109\/WACV.2011.5711545"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"crossref","unstructured":"Ll'ifs G\u00f3mez and Dimosthenis Karatzas. 2014. MSER-based real-time text detection and tracking. In ICPR. 3110--3115.  Ll'ifs G\u00f3mez and Dimosthenis Karatzas. 2014. MSER-based real-time text detection and tracking. In ICPR. 3110--3115.","DOI":"10.1109\/ICPR.2014.536"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"crossref","unstructured":"Hideaki Goto and Makoto Tanaka. 2009. Text-tracking wearable camera system for the blind. In ICDAR. 141--145.  Hideaki Goto and Makoto Tanaka. 2009. Text-tracking wearable camera system for the blind. In ICDAR. 141--145.","DOI":"10.1109\/ICDAR.2009.102"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"crossref","unstructured":"Alex Graves Santiago Fern\u00e1ndez Faustino Gomez and J\u00fcrgen Schmidhuber. 2006. Connectionist Temporal Classification : Labelling Unsegmented Sequence Data with Recurrent Neural Networks. In ICML. 369--376.  Alex Graves Santiago Fern\u00e1ndez Faustino Gomez and J\u00fcrgen Schmidhuber. 2006. Connectionist Temporal Classification : Labelling Unsegmented Sequence Data with Recurrent Neural Networks. In ICML. 369--376.","DOI":"10.1145\/1143844.1143891"},{"key":"e_1_3_2_2_10_1","first-page":"1360","article-title":"Recognizing Text-Based Traffic Signs","volume":"16","author":"Greenhalgh Jack","year":"2015","journal-title":"IEEE TITS"},{"key":"e_1_3_2_2_11_1","unstructured":"Raia Hadsell Sumit Chopra and Yann LeCun. 2006. Dimensionality reduction by learning an invariant mapping. In CVPR. 1735--1742.  Raia Hadsell Sumit Chopra and Yann LeCun. 2006. Dimensionality reduction by learning an invariant mapping. In CVPR. 1735--1742."},{"key":"e_1_3_2_2_12_1","unstructured":"Tong He Zhi Tian Weilin Huang Chunhua Shen Yu Qiao and Changming Sun. 2018. An End-to-End TextSpotter With Explicit Alignment and Attention. In CVPR . 5020--5029.  Tong He Zhi Tian Weilin Huang Chunhua Shen Yu Qiao and Changming Sun. 2018. An End-to-End TextSpotter With Explicit Alignment and Attention. In CVPR . 5020--5029."},{"key":"e_1_3_2_2_13_1","unstructured":"Wenhao He Xu-Yao Zhang Fei Yin and Cheng-Lin Liu. 2017. Deep Direct Regression for Multi-Oriented Scene Text Detection. In ICCV . 745--753.  Wenhao He Xu-Yao Zhang Fei Yin and Cheng-Lin Liu. 2017. Deep Direct Regression for Multi-Oriented Scene Text Detection. In ICCV . 745--753."},{"key":"e_1_3_2_2_14_1","unstructured":"Geoffrey Hinton Oriol Vinyals and Jeff Dean. 2015. Distilling the knowledge in a neural network. (2015).  Geoffrey Hinton Oriol Vinyals and Jeff Dean. 2015. Distilling the knowledge in a neural network. (2015)."}],"event":{"name":"MM '19: The 27th ACM International Conference on Multimedia","location":"Nice France","acronym":"MM '19","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 27th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3343031.3351093","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3343031.3351093","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:13:12Z","timestamp":1750201992000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3343031.3351093"}},"subtitle":["Towards Fast Video Text Spotting"],"short-title":[],"issued":{"date-parts":[[2019,10,15]]},"references-count":14,"alternative-id":["10.1145\/3343031.3351093","10.1145\/3343031"],"URL":"https:\/\/doi.org\/10.1145\/3343031.3351093","relation":{},"subject":[],"published":{"date-parts":[[2019,10,15]]},"assertion":[{"value":"2019-10-15","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}