{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,1,4]],"date-time":"2025-01-04T16:40:21Z","timestamp":1736008821845,"version":"3.32.0"},"publisher-location":"Berlin, Heidelberg","reference-count":18,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540287674"},{"type":"electronic","value":"9783540319313"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2005]]},"DOI":"10.1007\/11551362_9","type":"book-chapter","created":{"date-parts":[[2005,9,27]],"date-time":"2005-09-27T14:53:55Z","timestamp":1127832835000},"page":"92-103","source":"Crossref","is-referenced-by-count":4,"title":["From Legacy Documents to XML: A Conversion Framework"],"prefix":"10.1007","author":[{"given":"Jean-Pierre","family":"Chanod","sequence":"first","affiliation":[]},{"given":"Boris","family":"Chidlovskii","sequence":"additional","affiliation":[]},{"given":"Herv\u00e9","family":"Dejean","sequence":"additional","affiliation":[]},{"given":"Olivier","family":"Fambon","sequence":"additional","affiliation":[]},{"given":"J\u00e9r\u00f4me","family":"Fuselier","sequence":"additional","affiliation":[]},{"given":"Thierry","family":"Jacquin","sequence":"additional","affiliation":[]},{"given":"Jean-Luc","family":"Meunier","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"1","key":"9_CR1","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1007\/PL00013569","volume":"4","author":"O. Altamura","year":"2001","unstructured":"Altamura, O., Esposito, F., Malerba, D.: Transforming paper documents into XML format with WISDOM++. IJDAR\u00a04(1), 2\u201317 (2001)","journal-title":"IJDAR"},{"issue":"1","key":"9_CR2","first-page":"39","volume":"22","author":"A.L. Berger","year":"1996","unstructured":"Berger, A.L., Della Pietra, S., Della Pietra, V.: A maximum entropy approach to natural language processing. Computational Linguistics\u00a022(1), 39\u201371 (1996)","journal-title":"Computational Linguistics"},{"key":"9_CR3","unstructured":"Le Bourgeois, F., Emptoz, H., Bensafi, S.: Document understanding using probabilistic relaxation: Application on tables of contents of periodicals. In: ICDAR (2001)"},{"key":"9_CR4","unstructured":"Cattoni, R., Coianiz, T., Messelodi, S., Modena, C.M.: Geometric layout analysis techniques for document image understanding: a review. Technical Report #9703-09, ITC-IRST (1997)"},{"key":"9_CR5","unstructured":"Sundaresan, N., Chung, C.Y., Gertz, M.: Reverse engineering for web data: From visual to semantic structures. In: 18th Intern. Conf Data Eng, ICDE (2002)"},{"key":"9_CR6","unstructured":"Curran, J.R., Wong, R.K.: Transformation-based learning for automatic translation from HTML to XML. In: Proc. 4th Austral. Doc. Comp. Symp, ADCS (1999)"},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Penttonen, M., Kuikka, E., Leinonen, P.: Towards automating of document structure transformations. In: Proc. ACM Sym. on Doc. Eng., pp. 103\u2013110 (2002)","DOI":"10.1145\/585058.585078"},{"key":"9_CR8","unstructured":"Ha, J., Haralick, R.M., Phillips, I.T.: Recursive X-Y cut using bounding boxes of connected components. In: ICDAR (1995)"},{"key":"9_CR9","doi-asserted-by":"crossref","unstructured":"He, F., Ding, X., Peng, L.: Hierarchical logical structure extraction of book documents by analyzing tables of contents. In: Proc. of SPIE-IS&T Elect. Imaging. SPIE, vol.\u00a05296 (1995)","DOI":"10.1117\/12.528808"},{"key":"9_CR10","doi-asserted-by":"crossref","unstructured":"Ishitani, Y.: Document transformation system from papers to xml data based on pivot xml document method. In: ICDAR (2003)","DOI":"10.1109\/ICDAR.2003.1227668"},{"key":"9_CR11","unstructured":"Kurgan, L., Swiercz, W., Cios, K.J.: Semantic mapping of XML tags using inductive machine learning. In: Proc. Intern. Conf. Machine Learn. and Applic., pp. 99\u2013109 (2002)"},{"key":"9_CR12","unstructured":"Lin, X.: Text-mining based journal splitting. In: ICDAR (2003)"},{"key":"9_CR13","doi-asserted-by":"crossref","unstructured":"Lin, X.: Automatic document navigation for digital content re-mastering. Master\u2019s thesis, HP, Technical report (2003)","DOI":"10.1117\/12.521991"},{"key":"9_CR14","unstructured":"Nagy, G., Seth, S.: Hierarchical representation of optically scanned documents. In: Intern. Conf. Pattern Recogn. (1984)"},{"key":"9_CR15","doi-asserted-by":"crossref","unstructured":"Ramakrishnan, I.V., Mukherjee, S., Yang, G.: Automatic annotation of content-rich web documents: Structural and semantic analysis. In: Intern. Sem. Web Conf. (2003)","DOI":"10.1007\/978-3-540-39718-2_34"},{"key":"9_CR16","unstructured":"Wang, Y., Phillips, I.T., Haralick, R.: From image to SGML\/XML representation: One method. In: Intern. Workshop Doc. Layout Interpr. and Its Applic., DLIAP (1999)"},{"key":"9_CR17","unstructured":"XQuery 1.0: An XML query language, http:\/\/www.w3c.org\/TR\/xquery\/"},{"key":"9_CR18","unstructured":"XSL Transformations (XSLT) version 1.0, http:\/\/www.w3c.org\/TR\/xslt\/"}],"container-title":["Lecture Notes in Computer Science","Research and Advanced Technology for Digital Libraries"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/11551362_9.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,4]],"date-time":"2025-01-04T16:03:47Z","timestamp":1736006627000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/11551362_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2005]]},"ISBN":["9783540287674","9783540319313"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/11551362_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2005]]}}}