{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T17:45:22Z","timestamp":1772905522367,"version":"3.50.1"},"publisher-location":"New York, New York, USA","reference-count":17,"publisher":"ACM Press","license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1145\/3368926.3369689","type":"proceedings-article","created":{"date-parts":[[2019,12,20]],"date-time":"2019-12-20T13:30:11Z","timestamp":1576848611000},"page":"285-290","source":"Crossref","is-referenced-by-count":13,"title":["Improving CRNN with EfficientNet-like feature extractor and multi-head attention for text recognition"],"prefix":"10.1145","author":[{"given":"Dinh Viet","family":"Sang","sequence":"first","affiliation":[{"name":"Hanoi University of Science and Technology, Hanoi, Vietnam"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Le Tran Bao","family":"Cuong","sequence":"additional","affiliation":[{"name":"Hanoi University of Science and Technology, Hanoi, Vietnam"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","reference":[{"key":"key-10.1145\/3368926.3369689-1","unstructured":"Martin Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, Man-junath Kudlur, Josh Levenberg, Rajat Monga, Sherry Moore, Derek G. Murray, Benoit Steiner, Paul Tucker, Vijay Vasudevan, Pete Warden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. 2016. TensorFlow: A system for large-scale machine learning. In 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16). 265--283. https:\/\/www.usenix.org\/system\/files\/conference\/osdi16\/osdi16-abadi.pdf"},{"key":"key-10.1145\/3368926.3369689-2","unstructured":"Dzmitry Bahdanau, Kyunghyun Cho, and Yoshua Bengio. 2014. Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 (2014)."},{"key":"key-10.1145\/3368926.3369689-3","unstructured":"Christian Bartz, Haojin Yang, and Christoph Meinel. 2017. STN-OCR: A single neural network for text detection and text recognition. arXiv preprint arXiv:1707.08831 (2017)."},{"key":"key-10.1145\/3368926.3369689-4","unstructured":"Alex Graves, Santiago Fern&#225;ndez, Faustino Gomez, and J&#252;rgen Schmidhuber. 2006. Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In Proceedings of the 23rd international conference on Machine learning. ACM, 369--376."},{"key":"key-10.1145\/3368926.3369689-5","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2015. Delving deep into rectifiers: Surpassing human-level performance on imagenet classification. In Proceedings of the IEEE international conference on computer vision. 1026--1034."},{"key":"key-10.1145\/3368926.3369689-6","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2016. Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition. 770--778."},{"key":"key-10.1145\/3368926.3369689-7","doi-asserted-by":"crossref","unstructured":"Gao Huang, Zhuang Liu, Kilian Q Weinberger, and Laurens van der Maaten. 2016. Densely connected convolutional networks. arXiv preprint arXiv:1608.06993 (2016).","DOI":"10.1109\/CVPR.2017.243"},{"key":"key-10.1145\/3368926.3369689-8","unstructured":"Max Jaderberg, Karen Simonyan, Andrea Vedaldi, and Andrew Zisserman. 2014. Deep structured output learning for unconstrained text recognition. arXiv preprint arXiv:1412.5903 (2014)."},{"key":"key-10.1145\/3368926.3369689-9","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"key-10.1145\/3368926.3369689-10","doi-asserted-by":"crossref","unstructured":"Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, and Liang-Chieh Chen. 2018. Mobilenetv2: Inverted residuals and linear bottlenecks. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 4510--4520.","DOI":"10.1109\/CVPR.2018.00474"},{"key":"key-10.1145\/3368926.3369689-11","unstructured":"Baoguang Shi, Xiang Bai, and Cong Yao. 2016. An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE transactions on pattern analysis and machine intelligence 39, 11 (2016), 2298--2304."},{"key":"key-10.1145\/3368926.3369689-12","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"key-10.1145\/3368926.3369689-13","doi-asserted-by":"crossref","unstructured":"Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich. 2015. Going deeper with convolutions. In Proceedings of the IEEE conference on computer vision and pattern recognition. 1--9.","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"key-10.1145\/3368926.3369689-14","unstructured":"Mingxing Tan and Quoc Le. 2019. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. In International Conference on Machine Learning. 6105--6114."},{"key":"key-10.1145\/3368926.3369689-15","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, &#321;tukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in neural information processing systems. 5998--6008."},{"key":"key-10.1145\/3368926.3369689-16","unstructured":"Zbigniew Wojna, Alexander N Gorban, Dar-Shyang Lee, Kevin Murphy, Qian Yu, Yeqing Li, and Julian Ibarz. 2017. Attention-based extraction of structured information from street view imagery. In 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), Vol. 1. IEEE, 844--850."},{"key":"key-10.1145\/3368926.3369689-17","doi-asserted-by":"crossref","unstructured":"Barret Zoph, Vijay Vasudevan, Jonathon Shlens, and Quoc V Le. 2018. Learning transferable architectures for scalable image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition. 8697--8710.","DOI":"10.1109\/CVPR.2018.00907"}],"event":{"name":"the Tenth International Symposium","location":"Hanoi, Ha Long Bay, Viet Nam","acronym":"SoICT 2019","number":"10","sponsor":["SOICT, School of Information and Communication Technology - HUST","NAFOSTED, The National Foundation for Science and Technology Development"],"start":{"date-parts":[[2019,12,4]]},"end":{"date-parts":[[2019,12,6]]}},"container-title":["Proceedings of the Tenth International Symposium on Information and Communication Technology  - SoICT 2019"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3368926.3369689","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/dl.acm.org\/ft_gateway.cfm?id=3369689&ftid=2101277&dwn=1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:44:51Z","timestamp":1750203891000},"score":1,"resource":{"primary":{"URL":"http:\/\/dl.acm.org\/citation.cfm?doid=3368926.3369689"}},"subtitle":[],"proceedings-subject":"Information and Communication Technology","short-title":[],"issued":{"date-parts":[[2019]]},"references-count":17,"URL":"https:\/\/doi.org\/10.1145\/3368926.3369689","relation":{},"subject":[],"published":{"date-parts":[[2019]]}}}