{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:00:49Z","timestamp":1772906449567,"version":"3.50.1"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031704413","type":"print"},{"value":"9783031704420","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70442-0_17","type":"book-chapter","created":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T08:09:40Z","timestamp":1725955780000},"page":"277-294","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Speed-Up Pre-trained Vision Encoder\u2013Decoder Transformers by\u00a0Leveraging Lightweight Mixer Layers for\u00a0Text Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2078-0329","authenticated-orcid":false,"given":"Daniel","family":"Parres","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8288-6009","authenticated-orcid":false,"given":"Dan","family":"Anitei","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5192-0021","authenticated-orcid":false,"given":"Roberto","family":"Paredes","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0423-2020","authenticated-orcid":false,"given":"Joan Andreu","family":"S\u00e1nchez","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6516-2746","authenticated-orcid":false,"given":"Jos\u00e9 Miguel","family":"Bened\u00ed","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,11]]},"reference":[{"key":"17_CR1","doi-asserted-by":"crossref","unstructured":"Bhunia, A.K., Ghose, S., Kumar, A., Chowdhury, P.N., Sain, A., Song, Y.Z.: MetaHTR: towards writer-adaptive handwritten text recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15830\u201315839 (2021)","DOI":"10.1109\/CVPR46437.2021.01557"},{"key":"17_CR2","unstructured":"Blecher, L., Cucurull, G., Scialom, T., Stojnic, R.: Nougat: neural optical understanding for academic documents. arXiv preprint arXiv:2308.13418 (2023)"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Bluche, T., Messina, R.: Gated convolutional recurrent neural networks for multilingual handwriting recognition. In: Proceedings of the 14th IAPR International Conference on Document Analysis and Recognition, pp. 646\u2013651 (2017)","DOI":"10.1109\/ICDAR.2017.111"},{"key":"17_CR4","doi-asserted-by":"crossref","unstructured":"Cheng, C., Wang, P., Da, C., Zheng, Q., Yao, C.: LISTER: neighbor decoding for length-insensitive scene text recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 19541\u201319551 (2023)","DOI":"10.1109\/ICCV51070.2023.01790"},{"key":"17_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"182","DOI":"10.1007\/978-3-031-41685-9_12","volume-title":"Document Analysis and Recognition - ICDAR 2023","author":"D Coquenet","year":"2023","unstructured":"Coquenet, D., Chatelain, C., Paquet, T.: Faster DAN: multi-target queries with document positional encoding for end-to-end handwritten document recognition. In: Fink, G.A., Jain, R., Kise, K., Zanibbi, R. (eds.) ICDAR 2023. LNCS, vol. 14190, pp. 182\u2013199. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-41685-9_12"},{"issue":"7","key":"17_CR6","doi-asserted-by":"publisher","first-page":"8227","DOI":"10.1109\/TPAMI.2023.3235826","volume":"45","author":"D Coquenet","year":"2023","unstructured":"Coquenet, D., Chatelain, C., Paquet, T.: DAN: a segmentation-free document attention network for handwritten document recognition. IEEE Trans. Pattern Anal. Mach. Intell. 45(7), 8227\u20138243 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"17_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"322","DOI":"10.1007\/978-3-031-19815-1_19","volume-title":"Computer Vision - ECCV 2022","author":"C Da","year":"2022","unstructured":"Da, C., Wang, P., Yao, C.: Levenshtein OCR. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13688, pp. 322\u2013338. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19815-1_19"},{"key":"17_CR8","doi-asserted-by":"crossref","unstructured":"Dai, G., et al.: Disentangling writer and character styles for handwriting generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5977\u20135986 (2023)","DOI":"10.1109\/CVPR52729.2023.00579"},{"key":"17_CR9","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1016\/j.patrec.2023.03.020","volume":"169","author":"M Dhiaf","year":"2023","unstructured":"Dhiaf, M., Rouhou, A.C., Kessentini, Y., Salem, S.B.: MSdocTr-lite: a lite transformer for full page multi-script handwriting recognition. Pattern Recogn. Lett. 169, 28\u201334 (2023)","journal-title":"Pattern Recogn. Lett."},{"key":"17_CR10","unstructured":"Dosovitskiy, A., et al.: An image is worth 16$$\\times $$16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"17_CR11","doi-asserted-by":"crossref","unstructured":"Fischer, A., Inderm\u00fchle, E., Bunke, H., Viehhauser, G., Stolz, M.: Ground truth creation for handwriting recognition in historical documents. In: Proceedings of the 9th IAPR International Workshop on Document Analysis Systems, pp. 3\u201310 (2010)","DOI":"10.1145\/1815330.1815331"},{"key":"17_CR12","doi-asserted-by":"crossref","unstructured":"Fogel, S., Averbuch-Elor, H., Cohen, S., Mazor, S., Litman, R.: ScrabbleGAN: semi-supervised varying length handwritten text generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4324\u20134333 (2020)","DOI":"10.1109\/CVPR42600.2020.00438"},{"key":"17_CR13","doi-asserted-by":"crossref","unstructured":"Gholamian, S., Vahdat, A.: Handwritten and printed text segmentation: a signature case study. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 582\u2013592 (2023)","DOI":"10.1109\/ICCV51070.2023.00060"},{"key":"17_CR14","doi-asserted-by":"crossref","unstructured":"Graves, A.: Sequence transduction with recurrent neural networks. arXiv preprint arXiv:1211.3711 (2012)","DOI":"10.1007\/978-3-642-24797-2"},{"key":"17_CR15","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 369\u2013376 (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"17_CR16","unstructured":"Hendrycks, D., Gimpel, K.: Gaussian error linear units (GELUs). arXiv preprint arxiv:1606.08415 (2016)"},{"key":"17_CR17","doi-asserted-by":"crossref","unstructured":"Huang, Z., Chen, K., He, J., Bai, X., Karatzas, D., Lu, S., Jawahar, C.V.: ICDAR2019 competition on scanned receipt OCR and information extraction. In: ICDAR, pp. 1516\u20131520 (2019)","DOI":"10.1109\/ICDAR.2019.00244"},{"key":"17_CR18","doi-asserted-by":"crossref","unstructured":"Kil, T., Kim, S., Seo, S., Kim, Y., Kim, D.: Towards unified scene text spotting based on sequence generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15223\u201315232 (2023)","DOI":"10.1109\/CVPR52729.2023.01461"},{"key":"17_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"498","DOI":"10.1007\/978-3-031-19815-1_29","volume-title":"Computer Vision - ECCV 2022","author":"G Kim","year":"2022","unstructured":"Kim, G., et al.: OCR-free document understanding transformer. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13688, pp. 498\u2013517. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19815-1_29"},{"key":"17_CR20","doi-asserted-by":"crossref","unstructured":"Li, M., Lv, T., Chen, J., Cui, L., Lu, Y., Florencio, D., Zhang, C., Li, Z., Wei, F.: TrOCR: transformer-based optical character recognition with pre-trained models. In: In Proceedings of the 37th AAAI Conference on Artificial Intelligence, pp. 13094\u201313102 (2023)","DOI":"10.1609\/aaai.v37i11.26538"},{"key":"17_CR21","unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"17_CR22","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer V2: scaling up capacity and resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12009\u201312019 (2022)","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"17_CR23","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"17_CR24","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"17_CR25","doi-asserted-by":"crossref","unstructured":"Luo, C., Jin, L., Chen, J.: SimAN: exploring self-supervised representation learning of scene text via similarity-aware normalization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1039\u20131048 (2022)","DOI":"10.1109\/CVPR52688.2022.00111"},{"key":"17_CR26","unstructured":"Lyu, P., et al.: MaskOCR: text recognition with masked encoder-decoder pretraining. arXiv preprint arXiv:2206.00311 (2023)"},{"key":"17_CR27","doi-asserted-by":"crossref","unstructured":"Ma, J., Liang, Z., Zhang, L.: A text attention network for spatial deformation robust scene text image super-resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5911\u20135920 (2022)","DOI":"10.1109\/CVPR52688.2022.00582"},{"key":"17_CR28","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1007\/s100320200071","volume":"5","author":"UV Marti","year":"2002","unstructured":"Marti, U.V., Bunke, H.: The IAM-database: an English sentence database for offline handwriting recognition. IJDAR 5, 39\u201346 (2002)","journal-title":"IJDAR"},{"key":"17_CR29","doi-asserted-by":"crossref","unstructured":"Mishra, A., Alahari, K., Jawahar, C.V.: Scene text recognition using higher order language priors. In: BMVC, p. 1-11 (2012)","DOI":"10.5244\/C.26.127"},{"key":"17_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1007\/978-3-031-41685-9_16","volume-title":"Document Analysis and Recognition - ICDAR 2023","author":"D Parres","year":"2023","unstructured":"Parres, D., Paredes, R.: Fine-tuning vision encoder-decoder transformers for handwriting text recognition on historical documents. In: Fink, G.A., Jain, R., Kise, K., Zanibbi, R. (eds.) ICDAR 2023. LNCS, vol. 14190, pp. 253\u2013268. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-41685-9_16"},{"key":"17_CR31","doi-asserted-by":"crossref","unstructured":"Pippi, V., Cascianelli, S., Cucchiara, R.: Handwritten text generation from visual archetypes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22458\u201322467 (2023)","DOI":"10.1109\/CVPR52729.2023.02151"},{"key":"17_CR32","doi-asserted-by":"crossref","unstructured":"Puigcerver, J.: Are multidimensional recurrent layers really necessary for handwritten text recognition? In: Proceedings of the 14th IAPR International Conference on Document Analysis and Recognition, pp. 67\u201372 (2017)","DOI":"10.1109\/ICDAR.2017.20"},{"key":"17_CR33","doi-asserted-by":"crossref","unstructured":"de\u00a0Sousa\u00a0Neto, A.F., Bezerra, B.L.D., Toselli, A.H., Lima, E.B.: HTR-Flor: a deep learning system for offline handwritten text recognition. In: Proceedings of the 33rd Brazilian Symposium on Computer Graphics and Image Processing Conference on Graphics, Patterns and Images, pp. 54\u201361 (2020)","DOI":"10.1109\/SIBGRAPI51738.2020.00016"},{"issue":"4","key":"17_CR34","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1016\/0167-8655(93)90095-U","volume":"14","author":"SN Srihari","year":"1993","unstructured":"Srihari, S.N.: Recognition of handwritten and machine-printed text for postal address interpretation. Pattern Recogn. Lett. 14(4), 291\u2013302 (1993)","journal-title":"Pattern Recogn. Lett."},{"key":"17_CR35","doi-asserted-by":"crossref","unstructured":"S\u00e1nchez, J.A., Romero, V., Toselli, A.H., Vidal, E.: ICFHR2014 competition on handwritten text recognition on Transcriptorium datasets (HTRtS). In: Proceedings of the 14th International Conference on Frontiers in Handwriting Recognition, pp. 785\u2013790 (2014)","DOI":"10.1109\/ICFHR.2014.137"},{"key":"17_CR36","doi-asserted-by":"crossref","unstructured":"S\u00e1nchez, J.A., Romero, V., Toselli, A.H., Vidal, E.: ICFHR2016 competition on handwritten text recognition on the READ dataset. In: Proceedings of the 15th International Conference on Frontiers in Handwriting Recognition, pp. 630\u2013635 (2016)","DOI":"10.1109\/ICFHR.2016.0120"},{"key":"17_CR37","unstructured":"Tolstikhin, I.O., et al.: MLP-mixer: an all-MLP architecture for vision. In: Proceedings of the Thirty-fifth Conference on Neural Information Processing Systems, vol.\u00a034, pp. 24261\u201324272 (2021)"},{"key":"17_CR38","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., Jegou, H.: Training data-efficient image transformers & distillation through attention. In: Proceedings of the 38th International Conference on Machine Learning, pp. 10347\u201310357 (2021)"},{"key":"17_CR39","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Proceedings of the Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"key":"17_CR40","doi-asserted-by":"crossref","unstructured":"Wang, H., et al.: Knowledge mining with scene text for fine-grained recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4624\u20134633 (2022)","DOI":"10.1109\/CVPR52688.2022.00458"},{"key":"17_CR41","doi-asserted-by":"crossref","unstructured":"Zheng, C., Li, H., Rhee, S.M., Han, S., Han, J.J., Wang, P.: Pushing the performance limit of scene text recognizer without human annotation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14116\u201314125 (2022)","DOI":"10.1109\/CVPR52688.2022.01372"}],"container-title":["Lecture Notes in Computer Science","Document Analysis Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70442-0_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T08:16:15Z","timestamp":1725956175000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70442-0_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031704413","9783031704420"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70442-0_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"11 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DAS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Document Analysis Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"das2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/das2024.seecs.edu.pk\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}