{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T15:56:56Z","timestamp":1776182216058,"version":"3.50.1"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032046239","type":"print"},{"value":"9783032046246","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T00:00:00Z","timestamp":1758067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T00:00:00Z","timestamp":1758067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-04624-6_23","type":"book-chapter","created":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T05:34:44Z","timestamp":1758000884000},"page":"391-407","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Self-HTR: A Novel Self-supervised Handwritten Text Recognition Framework Using Generative Adversarial Networks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6556-2600","authenticated-orcid":false,"given":"Lisa","family":"Koopmans","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7548-3858","authenticated-orcid":false,"given":"Maruf A.","family":"Dhali","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2351-930X","authenticated-orcid":false,"given":"Lambert","family":"Schomaker","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,17]]},"reference":[{"key":"23_CR1","doi-asserted-by":"crossref","unstructured":"Alonso, E., Moysset, B., Messina, R.: Adversarial generation of handwritten text images conditioned on sequences. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 481\u2013486 (2019)","DOI":"10.1109\/ICDAR.2019.00083"},{"key":"23_CR2","doi-asserted-by":"publisher","unstructured":"Aradillas\u00a0Jaramillo, J.C., Murillo-Fuentes, J.J., M.\u00a0Olmos, P.: Boosting handwriting text recognition in small databases with transfer learning. In: 2018 16th International Conference on Frontiers in Handwriting Recognition (ICFHR), pp. 429\u2013434 (2018). https:\/\/doi.org\/10.1109\/ICFHR-2018.2018.00081","DOI":"10.1109\/ICFHR-2018.2018.00081"},{"key":"23_CR3","doi-asserted-by":"publisher","unstructured":"Barakat, B.K., Alasam, R., El-Sana, J.: Word spotting using convolutional Siamese network. In: 2018 13th IAPR International Workshop on Document Analysis Systems (DAS), pp. 229\u2013234 (2018). https:\/\/doi.org\/10.1109\/DAS.2018.67","DOI":"10.1109\/DAS.2018.67"},{"key":"23_CR4","doi-asserted-by":"crossref","unstructured":"Bhunia, A., Khan, S., Cholakkal, H., Anwer, R., Khan, F., Shah, M.: Handwriting transformers. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 1066\u20131074. IEEE Computer Society, Los Alamitos, CA, USA (2021)","DOI":"10.1109\/ICCV48922.2021.00112"},{"key":"23_CR5","doi-asserted-by":"crossref","unstructured":"Bluche, T., Messina, R.: Gated convolutional recurrent neural networks for multilingual handwriting recognition. In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), vol.\u00a001, pp. 646\u2013651 (2017)","DOI":"10.1109\/ICDAR.2017.111"},{"key":"23_CR6","doi-asserted-by":"publisher","unstructured":"Fogel, S., Averbuch-Elor, H., Cohen, S., Mazor, S., Litman, R.: ScrabbleGAN: semi-supervised varying length handwritten text generation. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4323\u20134332 (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.00438","DOI":"10.1109\/CVPR42600.2020.00438"},{"key":"23_CR7","doi-asserted-by":"crossref","unstructured":"Gan, J., Wang, W.: HiGAN: handwriting imitation conditioned on arbitrary-length texts and disentangled styles. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, no. 9, pp. 7484\u20137492 (2021)","DOI":"10.1609\/aaai.v35i9.16917"},{"key":"23_CR8","doi-asserted-by":"crossref","unstructured":"Gan, J., Wang, W., Leng, J., Gao, X.: HiGAN+: handwriting imitation GAN with disentangled representations. ACM Trans. Graph. 42(1), 1\u201317 (2022)","DOI":"10.1145\/3550070"},{"key":"23_CR9","unstructured":"Goodfellow, I.J., et al.: Generative adversarial nets. In: Proceedings of the 28th International Conference on Neural Information Processing Systems - Volume 2, pp. 2672\u20132680. NIPS 2014, MIT Press, Cambridge, MA, USA (2014)"},{"key":"23_CR10","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 369\u2013376. ICML 2006, Association for Computing Machinery, New York, NY, USA (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"23_CR11","unstructured":"Graves, A., Schmidhuber, J.: Offline handwriting recognition with multidimensional recurrent neural networks. In: Proceedings of the 22nd International Conference on Neural Information Processing Systems, pp. 545\u2013552. NIPS 2008, Curran Associates Inc., Red Hook, NY, USA (2008)"},{"key":"23_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Delving deep into rectifiers: surpassing human-level performance on ImageNet classification. In: 2015 IEEE International Conference on Computer Vision (ICCV), pp. 1026\u20131034 (2015)","DOI":"10.1109\/ICCV.2015.123"},{"key":"23_CR13","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M., Lin, H. (eds.) Advances in Neural Information Processing Systems, vol.\u00a033, pp. 6840\u20136851. Curran Associates, Inc. (2020)"},{"key":"23_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"694","DOI":"10.1007\/978-3-319-46475-6_43","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Johnson","year":"2016","unstructured":"Johnson, J., Alahi, A., Fei-Fei, L.: Perceptual losses for real-time style transfer and super-resolution. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9906, pp. 694\u2013711. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46475-6_43"},{"key":"23_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1007\/978-3-030-58592-1_17","volume-title":"Computer Vision \u2013 ECCV 2020","author":"L Kang","year":"2020","unstructured":"Kang, L., Riba, P., Wang, Y., Rusi\u00f1ol, M., Forn\u00e9s, A., Villegas, M.: GANwriting: content-conditioned generation of styled handwritten word images. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12368, pp. 273\u2013289. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58592-1_17"},{"key":"23_CR16","doi-asserted-by":"publisher","first-page":"459","DOI":"10.1007\/978-3-030-12939-2_32","volume-title":"Pattern Recognition","author":"L Kang","year":"2019","unstructured":"Kang, L., Toledo, J.I., Riba, P., Villegas, M., Forn\u00e9s, A., Rusi\u00f1ol, M.: Convolve, attend and spell: an attention-based sequence-to-sequence model for handwritten word recognition. In: Brox, T., Bruhn, A., Fritz, M. (eds.) Pattern Recognition, pp. 459\u2013472. Springer International Publishing, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-12939-2_32"},{"key":"23_CR17","unstructured":"Koopmans, L.: Towards self-supervised handwritten text recognition using generative adversarial networks (2024). https:\/\/fse.studenttheses.ub.rug.nl\/33045\/"},{"key":"23_CR18","doi-asserted-by":"crossref","unstructured":"krishnan, P., Dutta, K., Jawahar, C.V.: HWNET V3: a joint embedding framework for recognition and retrieval of handwritten text. Int. J. Document Anal. Recognit. (IJDAR) 26(4), 401\u2013417 (2023)","DOI":"10.1007\/s10032-022-00423-6"},{"key":"23_CR19","doi-asserted-by":"crossref","unstructured":"Krishnan, P., Dutta, K., Jawahar, C.: Deep feature embedding for accurate recognition and retrieval of handwritten text. In: 2016 15th International Conference on Frontiers in Handwriting Recognition (ICFHR), pp. 289\u2013294 (2016)","DOI":"10.1109\/ICFHR.2016.0062"},{"key":"23_CR20","doi-asserted-by":"crossref","unstructured":"Krishnan, P., Kovvuri, R., Pang, G., Vassilev, B., Hassner, T.: TextStyleBrush: transfer of text aesthetics from a single example. IEEE Trans. Pattern Anal. Mach. Intell. 45(7), 9122\u20139134 (2023)","DOI":"10.1109\/TPAMI.2023.3239736"},{"key":"23_CR21","doi-asserted-by":"publisher","unstructured":"Li, M., et al.: TROCR: transformer-based optical character recognition with pre-trained models (2023). https:\/\/doi.org\/10.1609\/aaai.v37i11.26538","DOI":"10.1609\/aaai.v37i11.26538"},{"key":"23_CR22","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1007\/s100320200071","volume":"5","author":"U Marti","year":"2002","unstructured":"Marti, U., Bunke, H.: The IAM-database: an English sentence database for offline handwriting recognition. Int. J. Document Anal. Recognit. 5, 39\u201346 (2002)","journal-title":"Int. J. Document Anal. Recognit."},{"key":"23_CR23","doi-asserted-by":"crossref","unstructured":"Michael, J., Labahn, R., Gruning, T., Zollner, J.: Evaluating sequence-to-sequence models for handwritten text recognition. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 1286\u20131293. IEEE Computer Society, Los Alamitos, CA, USA (2019)","DOI":"10.1109\/ICDAR.2019.00208"},{"key":"23_CR24","doi-asserted-by":"publisher","unstructured":"Nikolaidou, K., et al.: WordStylist: styled verbatim handwritten text generation with latent diffusion models. In: Fink, G.A., Jain, R., Kise, K., Zanibbi, R. (eds.) International Conference on Document Analysis and Recognition, pp. 384\u2013401. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-41679-8_22","DOI":"10.1007\/978-3-031-41679-8_22"},{"key":"23_CR25","doi-asserted-by":"crossref","unstructured":"Pippi, V., Cascianelli, S., Cucchiara, R.: Handwritten text generation from visual archetypes. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 22458\u201322467. IEEE Computer Society, Los Alamitos, CA, USA (2023)","DOI":"10.1109\/CVPR52729.2023.02151"},{"key":"23_CR26","doi-asserted-by":"crossref","unstructured":"Poulos, J., Valle, R.: Character-based handwritten text transcription with attention networks. Neural Comput. Appl. 33, 0563\u201310573 (2021)","DOI":"10.1007\/s00521-021-05813-1"},{"key":"23_CR27","doi-asserted-by":"crossref","unstructured":"Puigcerver, J.: Are multidimensional recurrent layers really necessary for handwritten text recognition? In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), vol.\u00a001, pp. 67\u201372 (2017)","DOI":"10.1109\/ICDAR.2017.20"},{"key":"23_CR28","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1007\/978-3-031-06555-2_17","volume-title":"Document Analysis Systems","author":"G Retsinas","year":"2022","unstructured":"Retsinas, G., Sfikas, G., Gatos, B., Nikou, C.: Best practices for a handwritten text recognition system. In: Uchida, S., Barney, E., Eglin, V. (eds.) Document Analysis Systems, pp. 247\u2013259. Springer International Publishing, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-06555-2_17"},{"key":"23_CR29","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"23_CR30","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: Bach, F., Blei, D. (eds.) Proceedings of the 32nd International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a037, pp. 2256\u20132265. PMLR, Lille, France (2015)"},{"key":"23_CR31","doi-asserted-by":"publisher","unstructured":"de\u00a0Sousa\u00a0Neto, A.F., Bezerra, B.L.D., Toselli, A.H., Lima, E.B.: HTR-Flor: a deep learning system for offline handwritten text recognition. In: 2020 33rd SIBGRAPI Conference on Graphics, Patterns and Images (SIBGRAPI), pp. 54\u201361 (2020). https:\/\/doi.org\/10.1109\/SIBGRAPI51738.2020.00016","DOI":"10.1109\/SIBGRAPI51738.2020.00016"},{"issue":"4","key":"23_CR32","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A., Sheikh, H., Simoncelli, E.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition \u2013 ICDAR 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04624-6_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T05:34:51Z","timestamp":1758000891000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04624-6_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,17]]},"ISBN":["9783032046239","9783032046246"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04624-6_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,17]]},"assertion":[{"value":"17 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Wuhan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iapr.org\/icdar2025","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}