{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T20:46:32Z","timestamp":1742935592900,"version":"3.40.3"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031705489"},{"type":"electronic","value":"9783031705496"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70549-6_19","type":"book-chapter","created":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T09:02:15Z","timestamp":1725786135000},"page":"319-333","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Mining and\u00a0Analyzing Statistical Information from\u00a0Untranscribed Form Images"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5541-8231","authenticated-orcid":false,"given":"Jos\u00e9","family":"Andr\u00e9s","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6955-9249","authenticated-orcid":false,"given":"Alejandro H.","family":"Toselli","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4579-5196","authenticated-orcid":false,"given":"Enrique","family":"Vidal","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,9]]},"reference":[{"key":"19_CR1","doi-asserted-by":"crossref","unstructured":"Bluche, T., et al.: Preparatory KWS experiments for large-scale indexing of a vast medieval manuscript collection in the HIMANIS project. In: 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), vol.\u00a001, pp. 311\u2013316 (2017)","DOI":"10.1109\/ICDAR.2017.59"},{"key":"19_CR2","doi-asserted-by":"crossref","unstructured":"Bluche, T., Messina, R.: Gated convolutional recurrent neural networks for multilingual handwriting recognition. In: 2017 14th IAPR International Conference on Document Analysis and Recognition (ICDAR), vol.\u00a01, pp. 646\u2013651. IEEE (2017)","DOI":"10.1109\/ICDAR.2017.111"},{"key":"19_CR3","doi-asserted-by":"crossref","unstructured":"Vidal, E., et al.: The Carabela project and manuscript collection: large-scale probabilistic indexing and content-based classification. In: 2020 17th International Conference on Frontiers in Handwriting Recognition (ICFHR), pp. 85\u201390 (2020)","DOI":"10.1109\/ICFHR2020.2020.00026"},{"key":"19_CR4","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1007\/978-3-319-08976-8_16","volume-title":"Advances in Data Mining. Applications and Theoretical Aspects","author":"N Elgendy","year":"2014","unstructured":"Elgendy, N., Elragal, A.: Big data analytics: a literature review paper. In: Perner, P. (ed.) ICDM 2014. LNCS (LNAI), vol. 8557, pp. 214\u2013227. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-08976-8_16"},{"issue":"3","key":"19_CR5","doi-asserted-by":"publisher","first-page":"419","DOI":"10.1111\/j.1751-5823.2002.tb00178.x","volume":"70","author":"AL Gibbs","year":"2002","unstructured":"Gibbs, A.L., Su, F.E.: On choosing and bounding probability metrics. Int. Stat. Rev. 70(3), 419\u2013435 (2002)","journal-title":"Int. Stat. Rev."},{"key":"19_CR6","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 369\u2013376 (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"19_CR7","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"19_CR8","unstructured":"Heafield, K.: KenLM: Faster and smaller language model queries. In: Callison-Burch, C., Koehn, P., Monz, C., Zaidan, O.F. (eds.) Proceedings of the Sixth Workshop on Statistical Machine Translation, pp. 187\u2013197. Association for Computational Linguistics, Edinburgh, Scotland (2011)"},{"issue":"4","key":"19_CR9","doi-asserted-by":"publisher","first-page":"1537","DOI":"10.1007\/s10618-021-00748-6","volume":"35","author":"Y Hu","year":"2021","unstructured":"Hu, Y., Hu, C., Tran, T., Kasturi, T., Joseph, E., Gillingham, M.: What\u2019s in a name?-Gender classification of names with character based machine learning models. Data Min. Knowl. Disc. 35(4), 1537\u20131563 (2021)","journal-title":"Data Min. Knowl. Disc."},{"key":"19_CR10","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"19_CR11","doi-asserted-by":"crossref","unstructured":"Klein, B., Gokkus, S., Kieninger, T., Dengel, A.: Three approaches to \u201cindustrial\u201d table spotting. In: Proceedings of Sixth International Conference on Document Analysis and Recognition, pp. 513\u2013517 (2001)","DOI":"10.1109\/ICDAR.2001.953842"},{"key":"19_CR12","doi-asserted-by":"crossref","unstructured":"Kneser, R., Ney, H.: Improved backing-off for m-gram language modeling. In: 1995 International Conference on Acoustics, Speech, and Signal Processing, vol.\u00a01, pp. 181\u2013184. IEEE (1995)","DOI":"10.1109\/ICASSP.1995.479394"},{"key":"19_CR13","unstructured":"Li, M., et al.: TrOCR: transformer-based optical character recognition with pre-trained models. arXiv preprint arXiv:2109.10282 (2022)"},{"key":"19_CR14","doi-asserted-by":"crossref","unstructured":"Mishra, S., Misra, A.: Structured and unstructured big data analytics. In: 2017 International Conference on Current Trends in Computer, Electrical, Electronics and Communication (CTCEEC), pp. 740\u2013746 (2017)","DOI":"10.1109\/CTCEEC.2017.8454999"},{"key":"19_CR15","unstructured":"Povey, D., et\u00a0al.: The Kaldi speech recognition toolkit. In: IEEE 2011 Workshop on Automatic Speech Recognition and Understanding No.\u00a0Conference, IEEE Signal Processing Society (2011)"},{"key":"19_CR16","doi-asserted-by":"crossref","unstructured":"Puigcerver, J.: Are multidimensional recurrent layers really necessary for handwritten text recognition? In: 14th International Conference on Document Analysis and Recognition (ICDAR), vol.\u00a001, pp. 67\u201372 (2017)","DOI":"10.1109\/ICDAR.2017.20"},{"key":"19_CR17","unstructured":"Puigcerver, J.: A Probabilistic Formulation of Keyword Spotting. Ph.D. thesis, University Polit\u00e8cnica de Val\u00e8ncia (2018)"},{"key":"19_CR18","doi-asserted-by":"publisher","unstructured":"Shi, B., Bai, X., An\u00a0end-to end, C.Y.: Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition, vol. 39, pp. 2298\u20132304 (2016). https:\/\/doi.org\/10.1109\/TPAMI","DOI":"10.1109\/TPAMI"},{"key":"19_CR19","unstructured":"Tieleman, T., Hinton, G., et\u00a0al.: Lecture 6.5-rmsprop: divide the gradient by a running average of its recent magnitude. COURSERA: Neural Netw. Mach. Learn. 4(2), 26\u201331 (2012)"},{"key":"19_CR20","doi-asserted-by":"publisher","unstructured":"Toselli, A., Puigcerver, J., Vidal, E.: Probabilistic Indexing for Information Search and Retrieval in Large Collections of Handwritten Text Images. Springer, The Information Retrieval Series (to appear in (2024). https:\/\/doi.org\/10.1007\/978-3-031-55389-9","DOI":"10.1007\/978-3-031-55389-9"},{"key":"19_CR21","doi-asserted-by":"crossref","unstructured":"Toselli, A., Romero, V., Vidal, E., S\u00e1nchez, J.: Making two vast historical manuscript collections searchable and extracting meaningful textual features through large-scale probabilistic indexing. In: 2019 15th IAPR International Conference on Document Analysis and Recognition (ICDAR) (2019)","DOI":"10.1109\/ICDAR.2019.00026"},{"issue":"1","key":"19_CR22","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1007\/s10044-018-0742-z","volume":"22","author":"AH Toselli","year":"2019","unstructured":"Toselli, A.H., Vidal, E., Puigcerver, J., Noya-Garc\u00eda, E.: Probabilistic multi-word spotting in handwritten text images. Pattern Anal. Appl. 22(1), 23\u201332 (2019)","journal-title":"Pattern Anal. Appl."},{"key":"19_CR23","doi-asserted-by":"publisher","first-page":"497","DOI":"10.1016\/j.ins.2016.07.063","volume":"370\u2013371","author":"AH Toselli","year":"2016","unstructured":"Toselli, A.H., Vidal, E., Romero, V., Frinken, V.: HMM word graph based keyword spotting in handwritten document images. Inf. Sci. 370\u2013371, 497\u2013518 (2016)","journal-title":"Inf. Sci."},{"key":"19_CR24","doi-asserted-by":"crossref","unstructured":"Van\u00a0Buskirk, I., Clauset, A., Larremore, D.B.: An open-source cultural consensus approach to name-based gender classification. In: Proceedings of the International AAAI Conference on Web and Social Media, vol.\u00a017, pp. 866\u2013877 (2023)","DOI":"10.1609\/icwsm.v17i1.22195"},{"key":"19_CR25","doi-asserted-by":"crossref","unstructured":"Vidal, E., Toselli, A.H., Puigcerver, J.: Lexicon-based probabilistic indexing of handwritten text images. In: Neural Computing and Applications, pp. 1\u201320 (2023)","DOI":"10.1007\/978-3-031-55389-9_1"},{"key":"19_CR26","doi-asserted-by":"crossref","unstructured":"Vinotheni, C., et\u00a0al.: A state of art approaches on handwriting recognition models. In: 2019 Fifth International Conference on Science Technology Engineering and Mathematics (ICONSTEM), vol.\u00a01, pp. 98\u2013103. IEEE (2019)","DOI":"10.1109\/ICONSTEM.2019.8918925"},{"issue":"4","key":"19_CR27","doi-asserted-by":"publisher","first-page":"399","DOI":"10.4310\/SII.2016.v9.n4.a1","volume":"9","author":"C Wang","year":"2016","unstructured":"Wang, C., Chen, M.H., Schifano, E., Wu, J., Yan, J.: Statistical methods and computing for big data. Stat. Interface 9(4), 399 (2016)","journal-title":"Stat. Interface"},{"key":"19_CR28","doi-asserted-by":"crossref","unstructured":"Wang, Y., Xiao, W., Li, S.: Offline handwritten text recognition using deep learning: a review. J. Phys. Conf. Ser. 1848, 012015. IOP Publishing (2021)","DOI":"10.1088\/1742-6596\/1848\/1\/012015"},{"key":"19_CR29","unstructured":"Wu, Y., Kirillov, A., Massa, F., Lo, W.Y., Girshick, R.: Detectron2. https:\/\/github.com\/facebookresearch\/detectron2 (2019)"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition - ICDAR 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70549-6_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T09:07:55Z","timestamp":1725786475000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70549-6_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031705489","9783031705496"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70549-6_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"9 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icdar2024.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}