{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T11:02:08Z","timestamp":1725793328991},"publisher-location":"London","reference-count":54,"publisher":"Springer London","isbn-type":[{"type":"print","value":"9780857298584"},{"type":"electronic","value":"9780857298591"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-0-85729-859-1_7","type":"book-chapter","created":{"date-parts":[[2014,5,12]],"date-time":"2014-05-12T08:40:25Z","timestamp":1399884025000},"page":"223-253","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Page Similarity and Classification"],"prefix":"10.1007","author":[{"given":"Simone","family":"Marinai","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,7,24]]},"reference":[{"issue":"1","key":"7_CR1","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1007\/PL00013569","volume":"4","author":"O Altamura","year":"2001","unstructured":"Altamura O, Esposito F, Malerba D (2001) Transforming paper documents into XML format with WISDOM++. Int J Doc Anal Recognit 4(1):2\u201317","journal-title":"Int J Doc Anal Recognit"},{"issue":"2","key":"7_CR2","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1007\/PL00010904","volume":"4","author":"E Appiani","year":"2001","unstructured":"Appiani E, Cesarini F, Colla AM, Diligenti M, Gori M, Marinai S, Soda G (2001) Automatic document classification and indexing in high-volume applications. Int J Doc Anal Recognit 4(2):69\u201383","journal-title":"Int J Doc Anal Recognit"},{"key":"7_CR3","doi-asserted-by":"crossref","unstructured":"Arlandis J, Perez-Cortes J-C, Ungria E (2009) Identification of very similar filled-in forms with a reject option. In: Proceedings of the ICDAR, Barcelona, pp 246\u2013250","DOI":"10.1109\/ICDAR.2009.190"},{"key":"7_CR4","unstructured":"Bagdanov AD, Worring M (2001) Fine-grained document genre classification using first order random graphs. In: Proceedings of the ICDAR, Seattle, pp 79\u201383"},{"issue":"3","key":"7_CR5","doi-asserted-by":"publisher","first-page":"1311","DOI":"10.1016\/S0031-3203(02)00227-3","volume":"36","author":"AD Bagdanov","year":"2003","unstructured":"Bagdanov AD, Worring M (2003) First order Gaussian graphs for efficient structure classification. Pattern Recognit 36(3):1311\u20131324","journal-title":"Pattern Recognit"},{"key":"7_CR6","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1007\/s10032-003-0112-1","volume":"6","author":"AD Bagdanov","year":"2003","unstructured":"Bagdanov AD, Worring M (2003) Multi-scale document description using rectangular granulometries. Int J Doc Anal Recognit 6:181\u2013191","journal-title":"Int J Doc Anal Recognit"},{"key":"7_CR7","unstructured":"Baldi S, Marinai S, Soda G (2003) Using tree-grammars for training set expansion in page classification. In: Proceedings of the ICDAR, Edinburgh, pp 829\u2013833"},{"key":"7_CR8","first-page":"563","volume-title":"Structured document segmentation and representation by the modified X-Y tree","author":"F Cesarini","year":"1999","unstructured":"Cesarini F, Gori M, Marinai S, Soda G (1999) Structured document segmentation and representation by the modified X-Y tree. In: ICDAR, Bangalore, pp 563\u2013566"},{"key":"7_CR9","doi-asserted-by":"crossref","unstructured":"Cesarini F, Lastri M, Marinai S, Soda G (2001) Encoding of modified X-Y trees for document classification. In: Proceedings of the ICDAR, Seattle, pp 1131\u20131136","DOI":"10.1109\/ICDAR.2001.953962"},{"key":"7_CR10","first-page":"82","volume-title":"Lecture Notes in Computer Science","author":"Francesca Cesarini","year":"2001","unstructured":"Cesarini F, Lastri M, Marinai S, Soda G (2001) Page classification for meta-data extraction from digital collections. In: Mayr HC et al (eds) Database and expert systems applications. LNCS 2113. Springer, Berlin\/New York, pp 82\u201391"},{"key":"7_CR11","doi-asserted-by":"crossref","unstructured":"Cesarini F, Marinai S, Soda G (2002) Retrieval by layout similarity of documents represented with MXY trees. In: Lopresti D, Hu J, Kashi R (eds) International workshop on document analysis systems, Princeton. LNCS 2423. Springer, pp 353\u2013364","DOI":"10.1007\/3-540-45869-7_40"},{"issue":"1","key":"7_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10032-006-0020-2","volume":"10","author":"N Chen","year":"2007","unstructured":"Chen N, Blostein D (2007) A survey of document image classification: problem statement, classifier architecture and performance evaluation. Int J Doc Anal Recognit 10(1):1\u201316","journal-title":"Int J Doc Anal Recognit"},{"key":"7_CR13","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1007\/s10032-011-0163-7","volume":"15","author":"F Chen","year":"2012","unstructured":"Chen F, Girgensohn A, Cooper M, Lu Y, Filby G (2012) Genre identification for office document search and browsing. Int J Doc Anal Recognit 15:167\u2013182. doi:10.1007\/s10032-011-0163-7","journal-title":"Int J Doc Anal Recognit"},{"key":"7_CR14","doi-asserted-by":"crossref","unstructured":"Chetverikov D, Liang J, Komuves J, Haralick RM (1996) Zone classification using texture features. In: International conference on pattern recognition, Vienna, pp 676\u2013680","DOI":"10.1109\/ICPR.1996.547031"},{"key":"7_CR15","unstructured":"Collins-Thompson K, Nickolov R (2002) A clustering-based algorithm for automatic document separation. In: Proceedings of the SIGIR workshop on information retrieval and OCR, Tampere"},{"key":"7_CR16","doi-asserted-by":"crossref","unstructured":"Cullen JF, Hull JJ, Hart PE (1997) Document image database retrieval and browsing using texture analysis. In: Proceedings of the ICDAR, Ulm, pp 718\u2013721","DOI":"10.1109\/ICDAR.1997.620602"},{"key":"7_CR17","doi-asserted-by":"crossref","unstructured":"Dengel A (1993) Initial learning of document structure. In: Proceedings of the ICDAR, Tsukuba, pp 86\u201390","DOI":"10.1109\/ICDAR.1993.395776"},{"key":"7_CR18","doi-asserted-by":"crossref","unstructured":"Dengel A, Dubiel F (1995) Clustering and classification of document structure-a machine learning approach. In: Proceedings of the ICDAR, Montreal, pp 587\u2013591","DOI":"10.1109\/ICDAR.1995.601965"},{"issue":"4","key":"7_CR19","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1109\/TPAMI.2003.1190578","volume":"25","author":"M Diligenti","year":"2003","unstructured":"Diligenti M, Frasconi P, Gori M (2003) Hidden Tree Markov models for document image classification. IEEE Trans Pattern Anal Mach Intell 25(4):519\u2013523","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"3","key":"7_CR20","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1006\/cviu.1998.0692","volume":"70","author":"D Doermann","year":"1998","unstructured":"Doermann D (1998) The indexing and retrieval of document images: a survey. Comput Vis Image Underst 70(3):287\u2013298","journal-title":"Comput Vis Image Underst"},{"issue":"1","key":"7_CR21","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1007\/s100320100077","volume":"5","author":"P Duygulu","year":"2002","unstructured":"Duygulu P, Atalay V (2002) A hierarchical representation of form documents for identification and retrieval. Int J Doc Anal Recognit 5(1):17\u201327","journal-title":"Int J Doc Anal Recognit"},{"key":"7_CR22","unstructured":"Ford G, Thoma GR (2003) Ground truth data for document image analysis. In: Proceedings of the symposium on document image understanding and technology, Greenbelt, pp 199\u2013205"},{"key":"7_CR23","doi-asserted-by":"crossref","unstructured":"Gordo A, Valveny E (2009) A rotation invariant page layout descriptor for document classification and retrieval. In: Proceedings of the ICDAR, Barcelona, pp 481\u2013485","DOI":"10.1109\/ICDAR.2009.110"},{"key":"7_CR24","doi-asserted-by":"crossref","unstructured":"Gordo A, Gibert J, Valveny E, Rusi$$\\mathrm{\\tilde{n}}$$ol M (2010) A kernel-based approach to document retrieval. In: International workshop on document analysis systems, Boston, pp 377\u2013384","DOI":"10.1145\/1815330.1815379"},{"key":"7_CR25","unstructured":"Hu J, Kashi R, Wilfong G (1999) Document image layout comparison and classification. In: Proceedings of the ICDAR, Bangalore, pp 285\u2013288"},{"key":"7_CR26","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1023\/A:1009910911387","volume":"2","author":"J Hu","year":"2000","unstructured":"Hu J, Kashi R, Wilfong G (2000) Comparison and classification of documents based on layout similarity. Inf Retr 2:227\u2013243","journal-title":"Inf Retr"},{"key":"7_CR27","doi-asserted-by":"crossref","unstructured":"Huang M, DeMenthon D, Doermann D, Golebiowski L (2005) Document ranking by layout relevance. In: Proceedings of the ICDAR, Seoul, pp 362\u2013366","DOI":"10.1109\/ICDAR.2005.92"},{"key":"7_CR28","first-page":"3","volume-title":"Text versus non-text distinction in online handwritten documents","author":"E Indermuhle","year":"2010","unstructured":"Indermuhle E, Bunke H, Shafait F, Breuel T (2010) Text versus non-text distinction in online handwritten documents. In: SAC, Sierre, pp 3\u20137"},{"issue":"2","key":"7_CR29","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1007\/PL00010982","volume":"3","author":"Y Ishitani","year":"2000","unstructured":"Ishitani Y (2000) Flexible and robust model matching based on association graph for form image understanding. Pattern Anal Appl 3(2):104\u2013119","journal-title":"Pattern Anal Appl"},{"key":"7_CR30","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1016\/S0031-3203(99)00066-7","volume":"33","author":"AK Jain","year":"2000","unstructured":"Jain AK, Liu J (2000) Image-based form document retrieval. Pattern Recognit 33:503\u2013513","journal-title":"Pattern Recognit"},{"key":"7_CR31","first-page":"127","volume-title":"User-defined template for identifying document type and extracting information from documents","author":"T Kochi","year":"1999","unstructured":"Kochi T, Saitoh T (1999) User-defined template for identifying document type and extracting information from documents. In: ICDAR, Bangalore, pp 127\u2013130"},{"key":"7_CR32","first-page":"28","volume-title":"Scalable indexing for layout based document retrieval and ranking","author":"L Lecerf","year":"2010","unstructured":"Lecerf L, Chidlovskii B (2010) Scalable indexing for layout based document retrieval and ranking. ACM Symposium on Applied Computing, Sierre, pp 28\u201332"},{"issue":"2","key":"7_CR33","first-page":"56","volume":"9","author":"JY Lin","year":"1996","unstructured":"Lin JY, Lee C-W, Chen Z (1996) Identification of business forms using relationships between adjacency frames. MVA 9(2):56\u201364","journal-title":"MVA"},{"key":"7_CR34","unstructured":"Mao S, Nie L, Thoma GR (2005) Unsupervised style classification of document page images. IEEE International Conference on Image Processing, Genoa, pp 510\u2013513"},{"key":"7_CR35","unstructured":"Marinai S (2006) A survey of document image retrieval in digital libraries. In: 9th colloque international francophone sur l\u2019Ecrit et le document, Fribourg, pp 193\u2013198"},{"key":"7_CR36","doi-asserted-by":"crossref","unstructured":"Marinai S, Marino E, Soda G (2006) Tree clustering for layout-based document image retrieval. In: Proceedings of the international workshop on document image analysis for libraries 2006, Lyon, pp 243\u2013253","DOI":"10.1109\/DIAL.2006.44"},{"key":"7_CR37","doi-asserted-by":"crossref","unstructured":"Marinai S, Marino E, Soda G (2010) Table of contents recognition for converting PDF documents in e-book formats. In: Proceedings of the 10th ACM symposium on document engineering (DocEng\u201910), Manchester. New York, pp 73\u201376","DOI":"10.1145\/1860559.1860576"},{"key":"7_CR38","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1007\/978-3-642-22913-8_9","volume-title":"Learning Structure and Schemas from Documents","author":"Simone Marinai","year":"2011","unstructured":"Marinai S, Miotti B, Soda G (2011) Digital libraries and document image retrieval techniques: a survey. In: Biba M, Xhafa F (eds) Learning structure and schemas from documents. Volume 375 of studies in computational intelligence. Springer, Berlin\/Heidelberg, pp 181\u2013204"},{"key":"7_CR39","doi-asserted-by":"publisher","first-page":"1033","DOI":"10.1016\/S0167-8655(01)00049-6","volume":"22","author":"H Peng","year":"2001","unstructured":"Peng H, Long F, Chi Z, Siu W-C (2001) Document image template matching based on component block list. PRL 22:1033\u20131042","journal-title":"PRL"},{"issue":"9","key":"7_CR40","doi-asserted-by":"publisher","first-page":"1188","DOI":"10.1109\/TPAMI.2003.1227996","volume":"25","author":"H Peng","year":"2003","unstructured":"Peng H, Long F, Chi Z (2003) Document image recognition based on template matching of component block projections. IEEE Trans Pattern Anal Mach Intell 25(9):1188\u20131192","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"7_CR41","doi-asserted-by":"crossref","unstructured":"Perea I, Lp\u0301ez D (2004) Syntactic modeling and recognition of document image. In: SSPR&SPR, Lisbon, pp 416\u2013424","DOI":"10.1007\/978-3-540-27868-9_44"},{"issue":"2","key":"7_CR42","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1459352.1459357","volume":"41","author":"Xiaoguang Qi","year":"2009","unstructured":"Qi X, Davison BD (2009) Web page classification: features and algorithms. ACM Comput Surv 41:12:1\u201312:31","journal-title":"ACM Computing Surveys"},{"key":"7_CR43","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/505282.505283","volume":"34","author":"F Sebastiani","year":"2002","unstructured":"Sebastiani F (2002) Machine learning in automated text categorization. ACM Comput Surv 34:1\u201347","journal-title":"ACM Comput Surv"},{"issue":"5","key":"7_CR44","first-page":"797","volume":"26","author":"FY Shih","year":"1996","unstructured":"Shih FY, Chen SS (1996) Adaptive document block segmentation and classification. IEEE Trans SMC 26(5):797\u2013802","journal-title":"IEEE Trans SMC"},{"issue":"4","key":"7_CR45","doi-asserted-by":"publisher","first-page":"232","DOI":"10.1007\/PL00013566","volume":"3","author":"C Shin","year":"2001","unstructured":"Shin C, Doermann DS, Rosenfeld A (2001) Classification of document pages using structure-based features. Int J Doc Anal Recognit 3(4):232\u2013247","journal-title":"Int J Doc Anal Recognit"},{"key":"7_CR46","doi-asserted-by":"crossref","unstructured":"Takama Y, Mitsuhashi N (2005) Visual similarity comparison for web page retrieval. In: IEEE\/WIC\/ACM international conference on web intelligence (WI 2005), Compiegne, pp 301\u2013304","DOI":"10.1109\/WI.2005.157"},{"issue":"5","key":"7_CR47","first-page":"211","volume":"5","author":"SL Taylor","year":"1992","unstructured":"Taylor SL, Fritzson R, Pastor JA (1992) Extraction of data from preprinted forms. MVA 5(5):211\u2013222","journal-title":"MVA"},{"key":"7_CR48","doi-asserted-by":"crossref","unstructured":"Taylor SL, Lipshutz M, Nilson RW (1995) Classification and functional decomposition of business documents. In: ICDAR 95, Montreal, pp 563\u2013566","DOI":"10.1109\/ICDAR.1995.601959"},{"key":"7_CR49","first-page":"121","volume-title":"Document image matching using a maximal grid approach","author":"A Tzacheva","year":"2002","unstructured":"Tzacheva A, El-Sonbaty Y, El-Kwae EA (2002) Document image matching using a maximal grid approach. Document Recognition and Retrieval IX, San\u00a0Jose, pp 121\u2013128"},{"key":"7_CR50","doi-asserted-by":"crossref","unstructured":"van Beusekom J, Keysers D, Shafait F, Breuel TM (2006) Distance measures for layout-based document image retrieval. In: Proceedings of the international workshop on document image analysis for libraries 2006, Lyon, pp 232\u2013242","DOI":"10.1109\/DIAL.2006.16"},{"issue":"4","key":"7_CR51","doi-asserted-by":"publisher","first-page":"559","DOI":"10.1109\/69.298173","volume":"6","author":"JT-L Wang","year":"1994","unstructured":"Wang JT-L, Zhang K, Jeong K, Shasha D (1994) A system for approximate tree matching. IEEE Trans Knowl Data Eng 6(4):559\u2013571","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"7_CR52","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1016\/j.patcog.2005.06.009","volume":"39","author":"Y Wang","year":"2006","unstructured":"Wang Y, Phillips IT, Haralick RM (2006) Document zone content classification and its performance evaluation. Pattern Recognit 39:57\u201373","journal-title":"Pattern Recognit"},{"issue":"1\u20134","key":"7_CR53","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1016\/S0020-0255(97)00048-0","volume":"100","author":"C-S Wei","year":"1997","unstructured":"Wei C-S, Liu Q, Wang JT-L, Ng PA (1997) Knowledge discovering for document classification using tree matching in TEXPROS. Inf Sci 100(1\u20134):255\u2013310","journal-title":"Inf Sci"},{"issue":"6","key":"7_CR54","doi-asserted-by":"publisher","first-page":"1245","DOI":"10.1137\/0218082","volume":"18","author":"K Zhang","year":"1989","unstructured":"Zhang K, Shasha D (1989) Simple fast algorithms for the editing distance between trees and related problems. SIAM J Comput 18(6):1245\u20131262","journal-title":"SIAM J Comput"}],"container-title":["Handbook of Document Image Processing and Recognition"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-0-85729-859-1_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,7,23]],"date-time":"2019-07-23T05:14:10Z","timestamp":1563858850000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-0-85729-859-1_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9780857298584","9780857298591"],"references-count":54,"URL":"https:\/\/doi.org\/10.1007\/978-0-85729-859-1_7","relation":{},"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"24 July 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}