{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T02:16:36Z","timestamp":1772849796067,"version":"3.50.1"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"3-4","license":[{"start":{"date-parts":[[2014,6,8]],"date-time":"2014-06-08T00:00:00Z","timestamp":1402185600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Digit Libr"],"published-print":{"date-parts":[[2014,8]]},"DOI":"10.1007\/s00799-014-0115-1","type":"journal-article","created":{"date-parts":[[2014,6,10]],"date-time":"2014-06-10T14:32:38Z","timestamp":1402410758000},"page":"83-99","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":19,"title":["Unsupervised document structure analysis of digital scientific articles"],"prefix":"10.1007","volume":"14","author":[{"given":"Stefan","family":"Klampfl","sequence":"first","affiliation":[]},{"given":"Michael","family":"Granitzer","sequence":"additional","affiliation":[]},{"given":"Kris","family":"Jack","sequence":"additional","affiliation":[]},{"given":"Roman","family":"Kern","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,6,8]]},"reference":[{"issue":"1","key":"115_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10032-002-0080-x","volume":"5","author":"M Aiello","year":"2002","unstructured":"Aiello, M., Monz, C., Todoran, L., Worring, M.: Document understanding for a broad class of documents. Int. J. Doc. Anal. Recogn. 5(1), 1\u201316 (2002). doi: 10.1007\/s10032-002-0080-x","journal-title":"Int. J. Doc. Anal. Recogn."},{"key":"115_CR2","doi-asserted-by":"crossref","unstructured":"Beel, J., Langer, S., Genzmehr, M., M\u00fcller, C.: Docear\u2019s PDF inspector: title extraction from PDF files. In: Proceedings of the 13th ACM\/IEEE-CS Joint Conference on Digital Libraries (JCDL 2013) (2013)","DOI":"10.1145\/2467696.2467789"},{"key":"115_CR3","doi-asserted-by":"crossref","unstructured":"Constantin, A., Pettifer, S., Voronkov, A.: PDFX: fully-automated PDF-to-XML conversion of scientific literature. In: Proceedings of the 13th ACM Symposium on Document, Engineering (2013)","DOI":"10.1145\/2494266.2494271"},{"key":"115_CR4","unstructured":"Councill, I.G., Giles, C.L., Kan, M.y.: ParsCit: An Open-Source CRF Reference String Parsing Package. In: Calzolari, N., Choukri, K., Maegaard, B., Mariani, J., Odjik, J., Piperidis, S., Tapias, D. (eds.) Proceedings of LREC, vol. 2008, pp. 661\u2013667. Citeseer, European Language Resources Association (ELRA) (2008). doi:10.1.1.150.6790"},{"key":"115_CR5","doi-asserted-by":"crossref","unstructured":"Dejean, H., Meunier, J.L.: A system for converting PDF documents into structured XML format. In: Document Analysis Systems VII, pp. 129\u2013140 (2006)","DOI":"10.1007\/11669487_12"},{"key":"115_CR6","doi-asserted-by":"crossref","unstructured":"Doucet, A., Kazai, G., Colutto, S., M\u00fchlberger, G.: Overview of the ICDAR 2013 competition on book structure extraction. In: Proceedings of the Twelfth International Conference on Document Analysis and Recognition (ICDAR\u20192013), p. 6. Washington DC, USA (2013)","DOI":"10.1109\/ICDAR.2013.290"},{"issue":"2008","key":"115_CR7","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-540-76280-5\\_5","volume":"138","author":"F Esposito","year":"2008","unstructured":"Esposito, F., Ferilli, S., Basile, T.M.A.: Machine learning for digital document processing: from layout analysis to metadata extraction. World Wide Web Internet Web Inform. Syst. 138(2008), 1\u201335 (2008). doi: 10.1007\/978-3-540-76280-5_5","journal-title":"World Wide Web Internet Web Inform. Syst."},{"key":"115_CR8","doi-asserted-by":"crossref","unstructured":"Ferilli, S., Basile, T., Mauro, N.D.: Markov logic networks for document layout correction. In: Modern Approaches in, Applied Intelligence, pp. 275\u2013284 (2011)","DOI":"10.1007\/978-3-642-21822-4_28"},{"key":"115_CR9","doi-asserted-by":"crossref","unstructured":"Gao, L., Tang, Z., Lin, X., Liu, Y., Qiu, R., Wang, Y.: Structure extraction from PDF-based book documents. In: Proceedings of the 11th Annual International ACM\/IEEE Joint Conference on Digital Libraries, pp. 11\u201320 (2011)","DOI":"10.1145\/1998076.1998079"},{"issue":"11","key":"115_CR10","doi-asserted-by":"publisher","first-page":"1162","DOI":"10.1109\/34.244677","volume":"15","author":"LO Gorman","year":"1993","unstructured":"Gorman, L.O., Definitions, A.: The document spectrum for page layout analysis. IEEE Trans. Pattern Anal. Mach. Intell. 15(11), 1162\u20131173 (1993)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"115_CR11","doi-asserted-by":"crossref","unstructured":"Granitzer, M., Hristakeva, M., Knight, R., Jack, K.: A comparison of metadata extraction techniques for crowdsourced bibliographic metadata management. In: Proceedings of the 27th Symposium On Applied Computing, p. to appear. ACM, New York (2012)","DOI":"10.1145\/2245276.2245462"},{"key":"115_CR12","doi-asserted-by":"crossref","unstructured":"Granitzer, M., Hristakeva, M., Knight, R., Jack, K., Kern, R.: A comparison of layout based bibliographic metadata extraction techniques. In: WIMS12\u2014International Conference on Web Intelligence, Mining and Semantics, pp. 19:1\u201319:8. ACM, New York (2012)","DOI":"10.1145\/2254129.2254154"},{"key":"115_CR13","doi-asserted-by":"crossref","unstructured":"Kern, R., Jack, K., Hristakeva, M., Granitzer, M.: TeamBeam\u2014meta-data extraction from scientific literature. In: 1st International Workshop on Mining Scientific Publications (2012)","DOI":"10.1045\/july2012-kern"},{"key":"115_CR14","doi-asserted-by":"publisher","unstructured":"Kern, R., Klampfl, S.: Extraction of references using layout and formatting information from scientific articles. D-Lib Magazine 19(9\/10) (2013). doi: 10.1045\/september2013-kern","DOI":"10.1045\/september2013-kern"},{"key":"115_CR15","unstructured":"Klink, S., Dengel, A., Kieninger, T.: Document structure analysis based on layout and textual features. In: Proceedings of International Workshop on Document Analysis Systems (2000)"},{"key":"115_CR16","doi-asserted-by":"publisher","first-page":"164","DOI":"10.1117\/12.472833","volume":"5010","author":"X Lin","year":"2002","unstructured":"Lin, X.: Header and footer extraction by page-association. Proc. SPIE 5010, 164\u2013171 (2002). doi: 10.1117\/12.472833","journal-title":"Proc. SPIE"},{"key":"115_CR17","doi-asserted-by":"publisher","unstructured":"Liu, Y., Bai, K., Mitra, P., Giles, C.L.: Improving the table boundary detection in PDFs by fixing the sequence error of the sparse lines. In: 2009 10th International Conference on Document Analysis and Recognition, pp. 1006\u20131010 (2009). doi: 10.1109\/ICDAR.2009.138","DOI":"10.1109\/ICDAR.2009.138"},{"key":"115_CR18","doi-asserted-by":"publisher","unstructured":"Liu, Y., Mitra, P., Giles, C.L.: A fast preprocessing method for table boundary detection: narrowing down the sparse lines using solely coordinate information. In: 2008 The Eighth IAPR International Workshop on Document Analysis Systems, pp. 431\u2013438. IEEE (2008). doi: 10.1109\/DAS.2008.77","DOI":"10.1109\/DAS.2008.77"},{"key":"115_CR19","doi-asserted-by":"publisher","unstructured":"Liu, Y., Mitra, P., Giles, C.L.: Identifying table boundaries in digital documents via sparse line detection. In: Proceeding of the 17th ACM conference on Information and knowledge mining CIKM 08, pp. 1311\u20131320. ACM Press (2008). doi: 10.1145\/1458082.1458255","DOI":"10.1145\/1458082.1458255"},{"issue":"4","key":"115_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.4018\/jdls.2010100101","volume":"1","author":"MT Luong","year":"2011","unstructured":"Luong, M.T., Nguyen, T.D., Kan, M.Y.: Logical structure recovery in scholarly articles with rich document features. Int. J. Digital Libr. Syst. 1(4), 1\u201323 (2011). doi: 10.4018\/jdls.2010100101","journal-title":"Int. J. Digital Libr. Syst."},{"key":"115_CR21","doi-asserted-by":"crossref","unstructured":"Malerba, D., Ceci, M., Berardi, M.: Machine learning for reading order detection in document image understanding. In: Machine Learning in Document Analysis, pp. 45\u201369 (2008)","DOI":"10.1007\/978-3-540-76280-5_3"},{"issue":"1","key":"115_CR22","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1117\/12.476326","volume":"5010","author":"S Mao","year":"2003","unstructured":"Mao, S., Rosenfeld, A., Kanungo, T.: Document structure analysis algorithms: a literature survey. Proc. SPIE 5010(1), 197\u2013207 (2003). doi: 10.1117\/12.476326","journal-title":"Proc. SPIE"},{"key":"115_CR23","doi-asserted-by":"publisher","unstructured":"Meunier, J.L.: Optimized XY-cut for determining a page reading order. In: Eighth International Conference on Document Analysis and Recognition ICDAR05 1, pp. 347\u2013351 (2005). doi: 10.1109\/ICDAR.2005.182","DOI":"10.1109\/ICDAR.2005.182"},{"issue":"7","key":"115_CR24","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1109\/2.144436","volume":"25","author":"G Nagy","year":"1992","unstructured":"Nagy, G., Seth, S., Viswanathan, M.: A prototype document image analysis system for technical journals. Computer 25(7), 10\u201322 (1992). doi: 10.1109\/2.144436","journal-title":"Computer"},{"key":"115_CR25","unstructured":"Peng, F., McCallum, A.: Accurate information extraction from research papers using conditional random fields. In: HLTNAACL04, vol. 2004, pp. 329\u2013336 (2004). doi: 10.1.1.10.5644"},{"issue":"1","key":"115_CR26","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1186\/1751-0473-7-7","volume":"7","author":"C Ramakrishnan","year":"2012","unstructured":"Ramakrishnan, C., Patnia, A., Hovy, E., Burns, G.A.: Layout-aware text extraction from full-text PDF of scientific articles. Source Code Biol Med 7(1), 7 (2012). doi: 10.1186\/1751-0473-7-7","journal-title":"Source Code Biol Med"},{"key":"115_CR27","unstructured":"Summers, K.: Automatic discovery of logical document structure. Ph.D. thesis (1998)"},{"key":"115_CR28","doi-asserted-by":"publisher","unstructured":"Tkaczyk, D., Bolikowski, L., Czeczko, A., Rusek, K.: A modular metadata extraction system for born-digital articles. In: 2012 10th IAPR International Workshop on Document Analysis Systems, pp. 11\u201316 (2012). doi: 10.1109\/DAS.2012.4","DOI":"10.1109\/DAS.2012.4"},{"key":"115_CR29","doi-asserted-by":"crossref","unstructured":"Tkaczyk, D., Czeczko, A., Rusek, K.: GROTOAP: ground truth for open access publications. In: Proceedings of the 12th ACM\/IEEE-CS Joint Conference on Digital Libraries, pp. 381\u2013382 (2012)","DOI":"10.1145\/2232817.2232901"},{"issue":"1","key":"115_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10032-004-0120-9","volume":"7","author":"R Zanibbi","year":"2004","unstructured":"Zanibbi, R., Blostein, D., Cordy, J.R.: A survey of table recognition. Doc. Anal. Recogn. 7(1), 1\u201316 (2004). doi: 10.1007\/s10032-004-0120-9","journal-title":"Doc. Anal. Recogn."},{"issue":"6","key":"115_CR31","doi-asserted-by":"publisher","first-page":"1245","DOI":"10.1137\/0218082","volume":"18","author":"K Zhang","year":"1989","unstructured":"Zhang, K., Shasha, D.: Simple fast algorithms for the editing distance between trees and related problems. SIAM J. Comput. 18(6), 1245\u20131262 (1989). doi: 10.1137\/0218082","journal-title":"SIAM J. Comput."}],"container-title":["International Journal on Digital Libraries"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-014-0115-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00799-014-0115-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-014-0115-1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-014-0115-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,11]],"date-time":"2019-08-11T07:39:48Z","timestamp":1565509188000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00799-014-0115-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,6,8]]},"references-count":31,"journal-issue":{"issue":"3-4","published-print":{"date-parts":[[2014,8]]}},"alternative-id":["115"],"URL":"https:\/\/doi.org\/10.1007\/s00799-014-0115-1","relation":{},"ISSN":["1432-5012","1432-1300"],"issn-type":[{"value":"1432-5012","type":"print"},{"value":"1432-1300","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,6,8]]},"assertion":[{"value":"30 October 2013","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 May 2014","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 May 2014","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 June 2014","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}