{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:01:01Z","timestamp":1772906461546,"version":"3.50.1"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031705427","type":"print"},{"value":"9783031705434","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70543-4_16","type":"book-chapter","created":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T09:02:15Z","timestamp":1725786135000},"page":"271-288","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Zipf Curves and\u00a0Basic Text Analytics from\u00a0Untranscribed Manuscript Images"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4579-5196","authenticated-orcid":false,"given":"Enrique","family":"Vidal","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6955-9249","authenticated-orcid":false,"given":"Alejandro H.","family":"Toselli","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,9]]},"reference":[{"key":"16_CR1","unstructured":"Adamic, L.A.: Zipf, power-laws, and pareto-a ranking tutorial. Xerox Palo Alto Research Center, Palo Alto (2000). http:\/\/gingerhpl.hp.com\/shl\/papers\/ranking\/ranking.html"},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Bluche, T., et al.: Preparatory KWS experiments for large-scale indexing of a vast medieval manuscript collection in the HIMANIS project. In: Proceedings of the 14th ICDAR (2017)","DOI":"10.1109\/ICDAR.2017.59"},{"key":"16_CR3","unstructured":"Daniel, W.W.: Kolmogorov\u2013Smirnov one-sample test. Appl. Nonparamet. Statist. 2 (1990)"},{"key":"16_CR4","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning (ICML 2006), pp. 369\u2013376. ACM, New York (2006)","DOI":"10.1145\/1143844.1143891"},{"issue":"3","key":"16_CR5","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1007\/s10032-019-00332-1","volume":"22","author":"T Gr\u00fcning","year":"2019","unstructured":"Gr\u00fcning, T., Leifert, G., Strau\u00df, T., Michael, J., Labahn, R.: A two-stage method for text line detection in historical documents. Int. J. Doc. Anal. Recognit. 22(3), 285\u2013302 (2019)","journal-title":"Int. J. Doc. Anal. Recognit."},{"key":"16_CR6","doi-asserted-by":"crossref","unstructured":"Kneser, R., Ney, H.: Improved backing-off for M-gram language modeling. In: 1995 International Conference on Acoustics, Speech, and Signal Processing, vol.\u00a01, pp. 181\u2013184 (1995)","DOI":"10.1109\/ICASSP.1995.479394"},{"key":"16_CR7","doi-asserted-by":"crossref","unstructured":"Lang, E., Puigcerver, J., Toselli, A.H., Vidal, E.: Probabilistic indexing and search for information extraction on handwritten German parish records. In: 2018 16th International Conference on Frontiers in Handwriting Recognition (ICFHR), pp. 44\u201349 (2018)","DOI":"10.1109\/ICFHR-2018.2018.00017"},{"key":"16_CR8","unstructured":"Manning, C.D., Sch\u00fctze, H., et\u00a0al.: Foundations of Statistical Natural Language Processing, vol.\u00a0999. MIT Press (1999)"},{"key":"16_CR9","unstructured":"Povey, D., et al.: The Kaldi speech recognition toolkit. In: IEEE 2011 Workshop on Automatic Speech Recognition and Understanding. IEEE Signal Proceedings Society (2011). iEEE Catalog No.: CFP11SRW-USB"},{"key":"16_CR10","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1016\/j.patrec.2023.06.006","volume":"172","author":"JR Prieto","year":"2023","unstructured":"Prieto, J.R., Flores, J.J., Vidal, E., Toselli, A.H.: Open set classification of untranscribed handwritten text image documents. Pattern Recogn. Lett. 172, 113\u2013120 (2023)","journal-title":"Pattern Recogn. Lett."},{"key":"16_CR11","doi-asserted-by":"publisher","unstructured":"Prieto, J.R., Vidal, E., S\u00e1nchez, J.A., Alonso, C., Garrido, D.: Extracting descriptive words from untranscribed handwritten images. In: Pinho, A.J.., Georgieva, P., Teixeira, L.F., S\u00e1nchez, J.A. (eds.) IbPRIA 2022, pp. 540\u2013551. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-04881-4_43","DOI":"10.1007\/978-3-031-04881-4_43"},{"key":"16_CR12","unstructured":"Puigcerver, J.: A probabilistic formulation of keyword spotting. Ph.D. thesis, Universitat Polit\u00e8cnica de Val\u00e8ncia (2018)"},{"key":"16_CR13","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1016\/S0031-3203(99)00055-2","volume":"33","author":"J Sauvola","year":"2000","unstructured":"Sauvola, J., Pietik\u00e4inen, M.: Adaptive document image binarization. Pattern Recogn. 33, 225\u2013236 (2000)","journal-title":"Pattern Recogn."},{"key":"16_CR14","unstructured":"Tieleman, T., Hinton, G.: Lecture 6.5-RMSprop: divide the gradient by a running average of its recent magnitude. COURSERA: Neural Netw. Mach. Learn. 4(2) (2012)"},{"key":"16_CR15","doi-asserted-by":"crossref","unstructured":"Toselli, A.H., Puigcerver, J., Vidal, E.: Two methods to improve confidence scores for lexicon-free word spotting in handwritten text. In: 2016 15th International Conference on Frontiers in Handwriting Recognition (ICFHR), pp. 349\u2013354 (2016)","DOI":"10.1109\/ICFHR.2016.0072"},{"key":"16_CR16","doi-asserted-by":"crossref","unstructured":"Toselli, A., Puigcerver, J., Vidal, E.: Probabilistic indexing for information search and retrieval in large collections of handwritten text images. In: The Information Retrieval Series, vol.\u00a049. Springer, Cham (2024)","DOI":"10.1007\/978-3-031-55389-9"},{"key":"16_CR17","doi-asserted-by":"crossref","unstructured":"Toselli, A.H., Vidal, E., Romero, V., Frinken, V.: HMM word-graph based keyword spotting in handwritten document images. Inf. Sci. 370(C), 497\u2013518 (2016)","DOI":"10.1016\/j.ins.2016.07.063"},{"key":"16_CR18","doi-asserted-by":"crossref","unstructured":"Toselli, A.H., Romero, V., S\u00e1nchez, J.A., Vidal, E.: Making two vast historical manuscript collections searchable and extracting meaningful textual features through large-scale probabilistic indexing. In: Internaltional Conference on Document Analysis and Recognition (ICDAR), pp. 108\u2013113. IEEE (2019)","DOI":"10.1109\/ICDAR.2019.00026"},{"key":"16_CR19","doi-asserted-by":"crossref","unstructured":"Vidal, E., et al.: The Carabela project and manuscript collection: large-scale probabilistic indexing and content-based classification. In: 2020 17th International Conference on Frontiers in Handwriting Recognition (ICFHR), pp. 85\u201390. IEEE (2020)","DOI":"10.1109\/ICFHR2020.2020.00026"},{"key":"16_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"426","DOI":"10.1007\/978-3-030-86331-9_28","volume-title":"Document Analysis and Recognition \u2013 ICDAR 2021","author":"E Vidal","year":"2021","unstructured":"Vidal, E., Toselli, A.H.: Probabilistic indexing and search for hyphenated words. In: Llad\u00f3s, J., Lopresti, D., Uchida, S. (eds.) ICDAR 2021. LNCS, vol. 12822, pp. 426\u2013442. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86331-9_28"},{"key":"16_CR21","doi-asserted-by":"crossref","unstructured":"Vidal, E., Toselli, A.H., Puigcerver, J.: Lexicon-based probabilistic indexing of handwritten text images. Neural Comput. Appl. 1\u201320 (2023)","DOI":"10.1007\/978-3-031-55389-9_1"},{"issue":"1","key":"16_CR22","doi-asserted-by":"publisher","first-page":"48","DOI":"10.2307\/2784953","volume":"5","author":"GK Zipf","year":"1942","unstructured":"Zipf, G.K.: The unity of nature, least-action, and natural social science. Sociometry 5(1), 48\u201362 (1942)","journal-title":"Sociometry"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition - ICDAR 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70543-4_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,18]],"date-time":"2025-02-18T14:07:48Z","timestamp":1739887668000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70543-4_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031705427","9783031705434"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70543-4_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"9 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icdar2024.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}