{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:38:44Z","timestamp":1778081924412,"version":"3.51.4"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319466033","type":"print"},{"value":"9783319466040","type":"electronic"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-46604-0_32","type":"book-chapter","created":{"date-parts":[[2016,9,17]],"date-time":"2016-09-17T03:31:55Z","timestamp":1474083115000},"page":"440-455","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["Downtown Osaka Scene Text Dataset"],"prefix":"10.1007","author":[{"given":"Masakazu","family":"Iwamura","sequence":"first","affiliation":[]},{"given":"Takahiro","family":"Matsuda","sequence":"additional","affiliation":[]},{"given":"Naoyuki","family":"Morimoto","sequence":"additional","affiliation":[]},{"given":"Hitomi","family":"Sato","sequence":"additional","affiliation":[]},{"given":"Yuki","family":"Ikeda","sequence":"additional","affiliation":[]},{"given":"Koichi","family":"Kise","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,9,18]]},"reference":[{"key":"32_CR1","doi-asserted-by":"crossref","unstructured":"Shi, B., Wang, X., Lyu, P., Yao, C., Bai, X.: Robust scene text recognition with automatic rectification. In: Proceedings of CVPR, pp. 4168\u20134176 (2016)","DOI":"10.1109\/CVPR.2016.452"},{"key":"32_CR2","doi-asserted-by":"crossref","unstructured":"Mishra, A., Alahari, K., Jawahar, C.V.: Scene text recognition using higher order language priors. In: Proceedings of BMVC (2012)","DOI":"10.5244\/C.26.127"},{"key":"32_CR3","unstructured":"Wang, K., Babenko, B., Belongie, S.: End-to-end scene text recognition. In: Proceedings of ICCV, pp. 1457\u20131464 (2011)"},{"issue":"2\u20133","key":"32_CR4","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1007\/s10032-004-0134-3","volume":"7","author":"SM Lucas","year":"2005","unstructured":"Lucas, S.M., Panaretos, A., Sosa, L., Tang, A., Wong, S., Young, R., Ashida, K., Nagai, H., Okamoto, M., Yamamoto, H., Miyao, H., Zhu, J., Ou, W., Wolf, C., Jolion, J.M., Todoran, L., Worring, M., Lin, X.: ICDAR 2003 robust reading competitions: Entries, results and future directions. IJDAR 7(2\u20133), 105\u2013122 (2005)","journal-title":"IJDAR"},{"key":"32_CR5","doi-asserted-by":"crossref","unstructured":"Karatzas, D., Shafait, F., Uchida, S., Iwamura, M., Gomez i Bigorda, L., Mestre, S.R., Mas, J., Mota, D.F., Almazan, J.A., de las Heras, L.P.: ICDAR 2013 robust reading competition. In: Proceedings of ICDAR, pp. 1115\u20131124 (2013)","DOI":"10.1109\/ICDAR.2013.221"},{"key":"32_CR6","unstructured":"Wang, T., Wu, D.J., Coates, A., Ng, A.Y.: End-to-end text recognition with convolutional neural networks. In: Proceedings of ICPR, pp. 3304\u20133308 (2012)"},{"key":"32_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"752","DOI":"10.1007\/978-3-642-33783-3_54","volume-title":"Computer Vision \u2013 ECCV 2012","author":"T Novikova","year":"2012","unstructured":"Novikova, T., Barinova, O., Kohli, P., Lempitsky, V.: Large-lexicon attribute-consistent text recognition in natural images. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012, Part VI. LNCS, vol. 7577, pp. 752\u2013765. Springer, Heidelberg (2012)"},{"key":"32_CR8","doi-asserted-by":"crossref","unstructured":"Goel, V., Mishra, A., Alahari, K., Jawahar, C.V.: Whole is greater than sum of parts: recognizing scene text words. In: Proceedings of ICDAR, pp. 398\u2013402 (2013)","DOI":"10.1109\/ICDAR.2013.87"},{"key":"32_CR9","doi-asserted-by":"crossref","unstructured":"Bissacco, A., Cummins, M., Netzer, Y., Neven, H.: Photoocr: reading text in uncontrolled conditions. In: Proceedings of ICCV, pp. 785\u2013792 (2013)","DOI":"10.1109\/ICCV.2013.102"},{"key":"32_CR10","unstructured":"Alsharif, O., Pineau, J.: End-to-end text recognition with hybrid HMM maxout models. In: International Conference on Learning Representations (ICLR) (2014)"},{"issue":"12","key":"32_CR11","doi-asserted-by":"publisher","first-page":"2552","DOI":"10.1109\/TPAMI.2014.2339814","volume":"36","author":"J Almaz\u00e1n","year":"2014","unstructured":"Almaz\u00e1n, J., Gordo, A., Forn\u00e9s, A., Valveny, E.: Word spotting and recognition with embedded attributes. IEEE TPAMI 36(12), 2552\u20132566 (2014)","journal-title":"IEEE TPAMI"},{"key":"32_CR12","doi-asserted-by":"crossref","unstructured":"Yao, C., Bai, X., Shi, B., Liu, W.: Strokelets: a learned multi-scale representation for scene text recognition. In: Proceedings of CVPR (2014)","DOI":"10.1109\/CVPR.2014.515"},{"key":"32_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"512","DOI":"10.1007\/978-3-319-10593-2_34","volume-title":"Computer Vision \u2013 ECCV 2014","author":"M Jaderberg","year":"2014","unstructured":"Jaderberg, M., Vedaldi, A., Zisserman, A.: Deep features for text spotting. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014, Part IV. LNCS, vol. 8692, pp. 512\u2013528. Springer, Heidelberg (2014)"},{"key":"32_CR14","series-title":"Lecture Notes in Computer Science","first-page":"35","volume-title":"Computer Vision \u2013 ACCV 2014","author":"B Su","year":"2015","unstructured":"Su, B., Lu, S.: Accurate scene text recognition based on recurrent neural network. In: Cremers, D., Reid, I., Saito, H., Yang, M.-H. (eds.) ACCV 2014. LNCS, vol. 9003, pp. 35\u201348. Springer, Heidelberg (2015)"},{"issue":"3","key":"32_CR15","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1007\/s11263-014-0793-6","volume":"113","author":"JA Rodriguez","year":"2015","unstructured":"Rodriguez, J.A., Gordo, A., Perronnin, F.: Label embedding: a frugal baseline for text recognition. IJCV 113(3), 193\u2013207 (2015)","journal-title":"IJCV"},{"key":"32_CR16","doi-asserted-by":"crossref","unstructured":"Gordo, A.: Supervised mid-level features for word image representation. In: Proceedings of CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298914"},{"issue":"1","key":"32_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11263-015-0823-z","volume":"116","author":"M Jaderberg","year":"2016","unstructured":"Jaderberg, M., Simonyan, K., Vedaldi, A., Zisserman, A.: Reading text in the wild with convolutional neural networks. IJCV 116(1), 1\u201320 (2016)","journal-title":"IJCV"},{"key":"32_CR18","unstructured":"Jaderberg, M., Simonyan, K., Vedaldi, A., Zisserman, A.: Deep structured output learning for unconstrained text recognition. In: Proceedings of ICLR (2015)"},{"key":"32_CR19","unstructured":"Shi, B., Bai, X., Yao, C.: An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. CoRR abs\/1507.05717 (2015)"},{"key":"32_CR20","doi-asserted-by":"crossref","unstructured":"Poznanski, A., Wolf, L.: CNN-N-gram for handwritingword recognition. In: Proceedings of CVPR (2016)","DOI":"10.1109\/CVPR.2016.253"},{"issue":"2","key":"32_CR21","first-page":"83","volume":"7","author":"J Liang","year":"2005","unstructured":"Liang, J., Doermann, D., Li, H.: Camera-based analysis of text and documents: a survey. IJDAR 7(2), 83\u2013104 (2005)","journal-title":"IJDAR"},{"key":"32_CR22","unstructured":"Jaderberg, M., Simonyan, K., Vedaldi, A., Zisserman, A.: Synthetic data and artificial neural networks for natural scene text recognition. In: Proceedings of NIPS Deep Learning Workshop (2014)"},{"key":"32_CR23","doi-asserted-by":"crossref","unstructured":"Gupta, A., Vedaldi, A., Zisserman, A.: Synthetic data for text localisation in natural images. In: Proceedings of CVPR (2016)","DOI":"10.1109\/CVPR.2016.254"},{"key":"32_CR24","doi-asserted-by":"crossref","unstructured":"Karatzas, D., Gomez-Bigorda, L., Nicolaou, A., Ghosh, S., Bagdanov, A., Iwamura, M., Matas, J., Neumann, L., Chandrasekhar, V.R., Lu, S., Shafait, F., Uchida, S., Valveny, E.: ICDAR 2015 robust reading competition. In: Proceedings of ICDAR, pp. 1156\u20131160 (2015)","DOI":"10.1109\/ICDAR.2015.7333942"},{"key":"32_CR25","unstructured":"Nguyen, P.X., Wang, K., Belongie, S.: Video text detection and recognition: dataset and benchmark. In: Proceedings of WACV (2014)"},{"key":"32_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1007\/978-3-642-29364-1_12","volume-title":"Camera-Based Document Analysis and Recognition","author":"R Nagy","year":"2012","unstructured":"Nagy, R., Dicker, A., Meyer-Wegener, K.: NEOCR: a configurable dataset for natural image text recognition. In: Iwamura, M., Shafait, F. (eds.) CBDAR 2011. LNCS, vol. 7139, pp. 150\u2013163. Springer, Heidelberg (2012)"},{"issue":"1","key":"32_CR27","doi-asserted-by":"publisher","first-page":"78","DOI":"10.4218\/etrij.11.1510.0029","volume":"33","author":"J Jung","year":"2011","unstructured":"Jung, J., Lee, S., Cho, M.S., Kim, J.H.: Touch TT: scene text extractor using touchscreen interface. ETRI J. 33(1), 78\u201388 (2011)","journal-title":"ETRI J."},{"key":"32_CR28","unstructured":"Netzer, Y., Wang, T., Coates, A., Bissacco, A., Wu, B., Ng, A.Y.: Reading digits in natural images with unsupervised feature learning. In: Proceedings of NIPS Workshop on Deep Learning and Unsupervised Feature Learning (2011)"},{"key":"32_CR29","unstructured":"Veit, A., Matera, T., Neumann, L., Matas, J., Belongie, S.: COCO-Text: dataset and benchmark for text detection and recognition in natural images. CoRR abs\/1207.0016 (2016)"},{"key":"32_CR30","doi-asserted-by":"crossref","unstructured":"Yuen, J., Russell, B., Liu, C., Torralba, A.: LabelMe video: building a video database with human annotations. In: Proceedings of ICCV, pp. 1451\u20131458 (2009)","DOI":"10.1109\/ICCV.2009.5459289"},{"key":"32_CR31","doi-asserted-by":"crossref","unstructured":"Neumann, L., Matas, J.: Real-time scene text localization and recognition. In: Proceedings of CVPR, pp. 3538\u20133545 (2012)","DOI":"10.1109\/CVPR.2012.6248097"},{"issue":"3","key":"32_CR32","first-page":"336","volume":"J93","author":"Y Matsuda","year":"2010","unstructured":"Matsuda, Y., Omachi, S., Aso, H.: String detection from scene images by binarization and edge detection. Trans. IEICE J93(3), 336\u2013344 (2010). In Japanese","journal-title":"Trans. IEICE"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2016 Workshops"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-46604-0_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T19:35:51Z","timestamp":1749584151000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-46604-0_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319466033","9783319466040"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-46604-0_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016]]},"assertion":[{"value":"18 September 2016","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Amsterdam","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2016","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 October 2016","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 October 2016","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2016","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.eccv2016.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}