{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T15:30:45Z","timestamp":1758123045622,"version":"3.40.3"},"publisher-location":"Cham","reference-count":52,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031705359"},{"type":"electronic","value":"9783031705366"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70536-6_18","type":"book-chapter","created":{"date-parts":[[2024,9,2]],"date-time":"2024-09-02T10:03:02Z","timestamp":1725271382000},"page":"297-314","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["The Learnable Typewriter: A Generative Approach to\u00a0Text Analysis"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-2278-5825","authenticated-orcid":false,"given":"Ioannis","family":"Siglidis","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9236-5394","authenticated-orcid":false,"given":"Nicolas","family":"Gonthier","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2577-0847","authenticated-orcid":false,"given":"Julien","family":"Gaubil","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1937-6506","authenticated-orcid":false,"given":"Tom","family":"Monnier","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3804-0193","authenticated-orcid":false,"given":"Mathieu","family":"Aubry","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,3]]},"reference":[{"key":"18_CR1","unstructured":"Baird, H.S.: Model-directed document image analysis. In: Proceedings of the Symposium on Document Image Understanding Technology (1999)"},{"key":"18_CR2","doi-asserted-by":"crossref","unstructured":"Bar\u00f3, A., Chen, J., Forn\u00e9s, A., Megyesi, B.: Towards a generic unsupervised method for transcription of encoded manuscripts. In: Proceedings of the 3rd International Conference on Digital Access to Textual Cultural Heritage (2019)","DOI":"10.1145\/3322905.3322920"},{"key":"18_CR3","unstructured":"Berg-Kirkpatrick, T., Durrett, G., Klein, D.: Unsupervised Transcription of Historical Documents. ACL (2013)"},{"key":"18_CR4","doi-asserted-by":"crossref","unstructured":"Bluche, T., Messina, R.: Gated convolutional recurrent neural networks for multilingual handwriting recognition. In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), vol.\u00a01, pp. 646\u2013651. IEEE (2017)","DOI":"10.1109\/ICDAR.2017.111"},{"key":"18_CR5","unstructured":"Burgess, C.P., Matthey, L., et al.: A.: MONet: Unsupervised scene decomposition and representation. arXiv preprint arXiv:1901.11390 (2019)"},{"key":"18_CR6","unstructured":"Camps, J.B., Vidal-Gor\u00e8ne, C., Stutzmann, D., Vernet, M., Pinche, A.: Data diversity in handwritten text recognition: challenge or opportunity? Digital Humanities (2022)"},{"key":"18_CR7","doi-asserted-by":"crossref","unstructured":"Crawford, E., Pineau, J.: Spatially invariant unsupervised object detection with convolutional neural networks. In: AAAI (2019)","DOI":"10.1609\/aaai.v33i01.33013412"},{"key":"18_CR8","unstructured":"Deng, F., Zhi, Z., Lee, D., Ahn, S.: Generative scene graph networks. In: ICLR (2020)"},{"key":"18_CR9","unstructured":"Emami, P., He, P., Ranka, S., Rangarajan, A.: Efficient iterative amortized inference for learning symmetric and disentangled multi-object representations. In: ICML (2021)"},{"key":"18_CR10","unstructured":"Eslami, S.M.A., et al.: Attend, Infer, Repeat: Fast Scene Understanding with Generative Models. Advances in Neural Information Processing Systems (2016)"},{"key":"18_CR11","doi-asserted-by":"crossref","unstructured":"Fogel, S., Averbuch-Elor, H., Cohen, S., Mazor, S., Litman, R.: ScrabbleGAN: Semi-supervised varying length handwritten text generation. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00438"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Garrette, D., Alpert-Abrams, H., Berg-Kirkpatrick, T., Klein, D.: Unsupervised code-switching for multilingual historical document transcription. In: Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (2015)","DOI":"10.3115\/v1\/N15-1109"},{"key":"18_CR13","unstructured":"Goodfellow, I., et al.: Generative adversarial nets. In: NeurIPS (2014)"},{"key":"18_CR14","doi-asserted-by":"crossref","unstructured":"Goyal, K., Dyer, C., Warren, C., G\u2019Sell, M., Berg-Kirkpatrick, T.: A probabilistic generative model for typographical analysis of early modern printing. In: ACL (2020)","DOI":"10.18653\/v1\/2020.acl-main.266"},{"key":"18_CR15","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: Labelling unsegmented sequence data with recurrent neural networks. In: ICML (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"18_CR16","unstructured":"Graves, A., Schmidhuber, J.: Offline handwriting recognition with multidimensional recurrent neural networks. In: NeurIPS (2008)"},{"key":"18_CR17","unstructured":"Greff, K., et al.: Multi-object representation learning with iterative variational inference. In: ICML (2019)"},{"key":"18_CR18","unstructured":"Greff, K., Van\u00a0Steenkiste, S., Schmidhuber, J.: Neural expectation maximization. In: NeurIPS (2017)"},{"key":"18_CR19","doi-asserted-by":"crossref","unstructured":"Gupta, A., Vedaldi, A., Zisserman, A.: Learning to read by spelling: Towards unsupervised text recognition. arXiv:1809.08675 [cs] (2018)","DOI":"10.1145\/3293353.3293386"},{"key":"18_CR20","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"Hochberg, J., Kelly, P., Thomas, T., Kerns, L.: Automatic script identification from document images using cluster-based templates. IEEE Trans. Pattern Anal. Mach. Intell. (1997)","DOI":"10.1109\/34.574802"},{"key":"18_CR22","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A.: Spatial Transformer Networks. In: NeurIPS (2015)"},{"key":"18_CR23","unstructured":"Jiang, J., Ahn, S.: Generative neurosymbolic machines. In: NeurIPS (2020)"},{"key":"18_CR24","doi-asserted-by":"crossref","unstructured":"Kahle, P., Colutto, S., Hackl, G., M\u00fchlberger, G.: Transkribus-a service platform for transcription, recognition and retrieval of historical documents. In: ICDAR (2017)","DOI":"10.1109\/ICDAR.2017.307"},{"key":"18_CR25","doi-asserted-by":"crossref","unstructured":"Kang, L., Riba, P., Rusi\u00f1ol, M., Forn\u00e9s, A., Villegas, M.: Pay attention to what you read: Non-recurrent handwritten text-line recognition. Pattern Recogn. (2022)","DOI":"10.1016\/j.patcog.2022.108766"},{"key":"18_CR26","unstructured":"Karazija, L., Laina, I., Rupprecht, C.: ClevrTex: a texture-rich benchmark for unsupervised multi-object segmentation. In: NeurIPS Datasets and Benchmarks (2021)"},{"key":"18_CR27","unstructured":"Knight, K., Megyesi, B., Schaefer, C.: The Copiale Cipher. In: Proceedings of the ACL Workshop on Building and Using Comparable Corpora (2011)"},{"key":"18_CR28","unstructured":"Kopec, G.E., Lomelin, M.: Document-specific character template estimation. In: Document Recognition III (1996)"},{"key":"18_CR29","doi-asserted-by":"crossref","unstructured":"Kopec, G.E., Lomelin, M.: Supervised template estimation for document image decoding. IEEE Trans. Pattern Anal. Mach. Intell. (1997)","DOI":"10.1109\/34.643891"},{"key":"18_CR30","doi-asserted-by":"crossref","unstructured":"Kopec, G.E., Said, M.R., Popat, K.: N-gram language models for document image decoding. In: Document Recognition and Retrieval IX (2001)","DOI":"10.1117\/12.450728"},{"key":"18_CR31","doi-asserted-by":"crossref","unstructured":"LeCun, Y., et al.: Backpropagation applied to handwritten zip code recognition. Neural Comput. (1989)","DOI":"10.1162\/neco.1989.1.4.541"},{"key":"18_CR32","doi-asserted-by":"crossref","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., Haffner, P.: Gradient-based learning applied to document recognition. In: Proceedings of the IEEE (1998)","DOI":"10.1109\/5.726791"},{"key":"18_CR33","unstructured":"Li, M., et al.: Trocr: Transformer-based optical character recognition with pre-trained models. arXiv preprint arXiv:2109.10282 (2021)"},{"key":"18_CR34","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: ICLR (2019)"},{"key":"18_CR35","doi-asserted-by":"crossref","unstructured":"Monnier, T., Aubry, M.: docExtractor: An off-the-shelf historical document element extraction. In: ICFHR (2020)","DOI":"10.1109\/ICFHR2020.2020.00027"},{"key":"18_CR36","unstructured":"Monnier, T., Groueix, T., Aubry, M.: Deep transformation-invariant clustering. In: NeurIPS (2020)"},{"key":"18_CR37","doi-asserted-by":"crossref","unstructured":"Monnier, T., Vincent, E., Ponce, J., Aubry, M.: Unsupervised Layered Image Decomposition into Object Prototypes. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00852"},{"key":"18_CR38","unstructured":"Nolan, J.C., Filippini, R.: Method and apparatus for creating a high-fidelity glyph prototype from low-resolution glyph images (2010), uS Patent 7,702,182"},{"key":"18_CR39","doi-asserted-by":"crossref","unstructured":"Puigcerver, J.: Are multidimensional recurrent layers really necessary for handwritten text recognition? In: ICDAR (2017)","DOI":"10.1109\/ICDAR.2017.20"},{"key":"18_CR40","unstructured":"Reddy, P., Guerrero, P., Mitra, N.J.: Search for concepts: Discovering visual concepts using direct optimization. arXiv preprint arXiv:2210.14808 (2022)"},{"key":"18_CR41","doi-asserted-by":"crossref","unstructured":"Seuret, M., et al.: Combining ocr models for reading early modern books. ICDAR (2023)","DOI":"10.1007\/978-3-031-41734-4_21"},{"key":"18_CR42","unstructured":"Smirnov, D., Gharbi, M., Fisher, M., Guizilini, V., Efros, A.A., Solomon, J.: MarioNette: self-supervised sprite learning. In: NeurIPS 2021 (2021)"},{"key":"18_CR43","unstructured":"Souibgui, M.A., Forn\u00e9s, A., Kessentini, Y., Tudor, C.: A few-shot learning approach for historical ciphered manuscript recognition. CoRR (2020)"},{"key":"18_CR44","doi-asserted-by":"crossref","unstructured":"de\u00a0Sousa\u00a0Neto, A.F., Bezerra, B.L.D., Toselli, A.H., Lima, E.B.: Htr-flor: a deep learning system for offline handwritten text recognition. In: SIBGRAPI (2020)","DOI":"10.1145\/3395027.3419603"},{"key":"18_CR45","doi-asserted-by":"crossref","unstructured":"Srivatsan, N., Vega, J., Skelton, C., Berg-Kirkpatrick, T.: Neural representation learning for scribal hands of linear b. In: ICDAR 2021 Workshops (2021)","DOI":"10.1007\/978-3-030-86159-9_23"},{"key":"18_CR46","doi-asserted-by":"crossref","unstructured":"Srivatsan, N., Wu, S., Barron, J., Berg Kirkpatrick, T.: Scalable font reconstruction with dual latent manifolds. In: EMNLP (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.244"},{"key":"18_CR47","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS (2017)"},{"key":"18_CR48","doi-asserted-by":"crossref","unstructured":"Vincent, L.: Google book search: document understanding on a massive scale. In: ICDAR (2007)","DOI":"10.1109\/ICDAR.2007.4377029"},{"key":"18_CR49","unstructured":"Xu, Y., Nagy, G.: Prototype extraction and adaptive OCR. IEEE Trans. Pattern Analysis Mach. Intell. (1999)"},{"key":"18_CR50","doi-asserted-by":"crossref","unstructured":"Yang, Y., Chen, Y., Soatto, S.: Learning to manipulate individual objects in an image. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00659"},{"key":"18_CR51","doi-asserted-by":"crossref","unstructured":"Ye, V., Li, Z., Tucker, R., Kanazawa, A., Snavely, N.: Deformable sprites for unsupervised video decomposition. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00268"},{"key":"18_CR52","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1007\/978-3-030-58517-4_4","volume-title":"Computer Vision \u2013 ECCV 2020","author":"C Zhang","year":"2020","unstructured":"Zhang, C., Gupta, A., Zisserman, A.: Adaptive text recognition through visual matching. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12361, pp. 51\u201367. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58517-4_4"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition - ICDAR 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70536-6_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,2]],"date-time":"2024-09-02T10:14:29Z","timestamp":1725272069000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70536-6_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031705359","9783031705366"],"references-count":52,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70536-6_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"3 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icdar2024.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}