{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,5]],"date-time":"2025-08-05T12:21:48Z","timestamp":1754396508889,"version":"3.38.0"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"2","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IJDAR"],"published-print":{"date-parts":[[2011,6]]},"DOI":"10.1007\/s10032-010-0135-3","type":"journal-article","created":{"date-parts":[[2010,11,3]],"date-time":"2010-11-03T21:58:41Z","timestamp":1288821521000},"page":"229-239","source":"Crossref","is-referenced-by-count":4,"title":["Digital weight watching: reconstruction of scanned documents"],"prefix":"10.1007","volume":"14","author":[{"given":"Maarten","family":"Marx","sequence":"first","affiliation":[]},{"given":"Tim","family":"Gielissen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2010,10,31]]},"reference":[{"key":"135_CR1","unstructured":"Alonso, J. et\u00a0al.: Improving Access to Government Through Better Use of the Web. W3C Interest Group Note 12 May 2009. http:\/\/www.w3.org\/TR\/egov-improving\/"},{"key":"135_CR2","unstructured":"Bennet, D., Harvey, A.: Publishing Open Government Data (W3C Working Draft 8 September 2009). http:\/\/www.w3.org\/TR\/gov-data\/"},{"key":"135_CR3","unstructured":"Breuel, Th.: High performance document layout analysis. In: Doermann, D. (eds.) Proceedings 2003 Symposium on Document Image Understanding Technology, pp. 209\u2013218 (2003)"},{"key":"135_CR4","doi-asserted-by":"crossref","unstructured":"Clarke, Ch., Agichtein, E., Dumais, S., White, R.: The influence of caption features on clickthrough patterns in web search. In: Proceedings SIGIR \u201907, pp. 135\u2013142 (2007)","DOI":"10.1145\/1277741.1277767"},{"key":"135_CR5","doi-asserted-by":"crossref","unstructured":"Doan, A., Ramakrishnan, R., Vaithyanathan, S.: Managing information extraction: State of the art and research directions. In: Proceedings SIGMOD \u201906, pp. 799\u2013800 (2006)","DOI":"10.1145\/1142473.1142595"},{"key":"135_CR6","doi-asserted-by":"crossref","unstructured":"Gielissen, T., Marx, M.: Exemelification of parliamentary debates. In: Proceedings of the 9th Dutch-Belgian Information Retrieval Workshop (DIR 2009), pp. 19\u201325. Twente, The Netherlands (2009)","DOI":"10.1145\/1568296.1568303"},{"issue":"3","key":"135_CR7","doi-asserted-by":"crossref","first-page":"299","DOI":"10.1145\/1080343.1080346","volume":"23","author":"H.M. Gladney","year":"2005","unstructured":"Gladney H.M., Lorie R.A.: Trustworthy 100-year digital objects: Durable encoding for when it\u2019s too late to ask. ACM Trans. Inf. Syst. 23(3), 299\u2013324 (2005)","journal-title":"ACM Trans. Inf. Syst."},{"key":"135_CR8","doi-asserted-by":"crossref","unstructured":"He, F., Ding, X.: Hierarchical logical structure extraction of book documents by analyzing table of contents. In: Proceedings of the SPIE Conference on Document Recognition and Retrieval XI, pp. 6\u201313 (2004)","DOI":"10.1117\/12.528808"},{"key":"135_CR9","unstructured":"Hearst, M.: Design recommendations for hierarchical faceted search interfaces. In: ACM SIGIR Workshop on Faceted Search (2006)"},{"key":"135_CR10","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9781139644082","volume-title":"Search User Interfaces","author":"M. Hearst","year":"2009","unstructured":"Hearst M.: Search User Interfaces. Cambridge University Press, Cambridge (2009)"},{"key":"135_CR11","doi-asserted-by":"crossref","unstructured":"Hulth, A., Karlgren, J., Jonsson, A., Bostr\u00f6m, H., Asker, L.: Automatic keyword extraction using domain knowledge. In: Proceedings CICLing 2001, pp. 472\u2013482. Springer (2001)","DOI":"10.1007\/3-540-44686-9_47"},{"key":"135_CR12","doi-asserted-by":"crossref","unstructured":"Kaptein, R., Marx, M., Kamps, J.: Who said what to whom? Capturing the structure of debates. In: Proceedings SIGIR \u201909, pp. 831\u2013832 (2009)","DOI":"10.1145\/1571941.1572151"},{"key":"135_CR13","volume-title":"XPath 2.0 Programmer\u2019s Reference","author":"M. Kay","year":"2004","unstructured":"Kay M.: XPath 2.0 Programmer\u2019s Reference. Wrox, Birmingham (2004)"},{"key":"135_CR14","volume-title":"XSLT 2.0 3rd edn Programmer\u2019s Reference","author":"M. Kay","year":"2004","unstructured":"Kay M.: XSLT 2.0 3rd edn Programmer\u2019s Reference. Wrox, Birmingham (2004)"},{"key":"135_CR15","unstructured":"Klink, S., Dengel, A., Kieninger, T.: Document structure analysis based on layout and textual features. In: Proceedings of International Workshop on Document Analysis Systems (2000)"},{"key":"135_CR16","unstructured":"Knight, G., Pennock, M.: Data without meaning: Establishing the significant properties of digital research. In: iPRES 2008 Conference Proceedings (2008)"},{"key":"135_CR17","unstructured":"Koninklijke Bibliotheek: Staten-generaal digitaal (2009). http:\/\/www.statengeneraaldigitaal.nl\/backgrounds.html"},{"key":"135_CR18","doi-asserted-by":"crossref","unstructured":"Lud\u00e4scher, B., Mukhopadhyay, P., Papakonstantinou, Y.: A transducer-based XML query processor. In: Proceedings VLDB \u201902, pp. 227\u2013238. VLDB Endowment (2002)","DOI":"10.1016\/B978-155860869-6\/50028-7"},{"key":"135_CR19","doi-asserted-by":"crossref","unstructured":"Mao, S., Rosenfeld, A., Kanungo, T.: Document structure analysis algorithms: A literature survey. In: Proceedings of the SPIE Conference on Document Recognition and Retrieval X, pp. 197\u2013207 (2003)","DOI":"10.1117\/12.476326"},{"key":"135_CR20","doi-asserted-by":"crossref","unstructured":"Mao, S., Kim, J., Thoma, G.: Style-independent document labeling: Design and performance evaluation. In: Proceedings of the SPIE Conference on Document Recognition and Retrieval XI, pp. 14\u201322 (2004)","DOI":"10.1117\/12.532039"},{"key":"135_CR21","doi-asserted-by":"crossref","unstructured":"Marx, M.: (2009) Long, often quite boring, notes of meetings. In: ESAIR \u201909: Proceedings of the WSDM \u201909 Workshop on Exploiting Semantic Annotations in Information Retrieval, pp. 46\u201353. ACM (2009)","DOI":"10.1145\/1506250.1506262"},{"key":"135_CR22","unstructured":"Marx, M., Schuth, A.: DutchParl. A corpus of parliamentary documents in Dutch. In: Proceedings Language Resources and Evaluation (LREC) pp. 3670\u20133677 (2010)"},{"key":"135_CR23","unstructured":"Message Understanding Conference Proceedings MUC-7: National Institute of Standards and Technology (NIST) Gaithersburg, Maryland, USA (1997)"},{"issue":"2","key":"135_CR24","doi-asserted-by":"crossref","first-page":"80","DOI":"10.1145\/1480506.1480520","volume":"42","author":"V. Murdock","year":"2008","unstructured":"Murdock V., Lalmas M.: Workshop on aggregated search. SIGIR Forum 42(2), 80\u201383 (2008)","journal-title":"SIGIR Forum"},{"key":"135_CR25","unstructured":"Proceedings of the First Text Analysis Conference (TAC 2008): National Institute of Standards and Technology (NIST) Gaithersburg, Maryland, USA (2008)"},{"key":"135_CR26","unstructured":"Rada, M., Andras, C.: Wikify!: Linking documents to encyclopedic knowledge. In: Proceedings CIKM \u201907, pp. 233\u2013242 (2007)"},{"issue":"4","key":"135_CR27","first-page":"3","volume":"23","author":"E. Rahm","year":"2000","unstructured":"Rahm E., Do H.H.: Data cleaning: Problems and current approaches. IEEE Tech. Bull. Data Eng. 23(4), 3\u201313 (2000)","journal-title":"IEEE Tech. Bull. Data Eng."},{"key":"135_CR28","doi-asserted-by":"crossref","unstructured":"Reynaert, M.: Non-interactive OCR post-correction for giga-scale digitization projects. In: Proceedings of the CICLing (Computational Linguistics and Intelligent Text Processing, 9th International Conference), pp. 617\u2013630 (2008)","DOI":"10.1007\/978-3-540-78135-6_53"},{"key":"135_CR29","unstructured":"Salminen, A.: Building digital government by XML. In: Proceedings of the Thirty-Eighth Hawaii International Conference on System Sciences. IEEE Computer Society (2005)"},{"key":"135_CR30","unstructured":"Sigurbj\u00f6rnsson, B.: Focused information access using XML element retrieval. PhD thesis, University of Amsterdam (2006)"},{"issue":"3","key":"135_CR31","doi-asserted-by":"crossref","first-page":"196","DOI":"10.1177\/0165551505052347","volume":"31","author":"J.R. Van Der Hoeven","year":"2005","unstructured":"Van Der Hoeven J.R., Van Diessen R.J., Van Der Meer K.: Development of a universal virtual computer (uvc) for long-term preservation of digital objects. J. Inf. Sci. 31(3), 196\u2013208 (2005)","journal-title":"J. Inf. Sci."}],"container-title":["International Journal on Document Analysis and Recognition (IJDAR)"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/www.springerlink.com\/index\/pdf\/10.1007\/s10032-010-0135-3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,27]],"date-time":"2025-02-27T15:42:51Z","timestamp":1740670971000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10032-010-0135-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,10,31]]},"references-count":31,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2011,6]]}},"alternative-id":["135"],"URL":"https:\/\/doi.org\/10.1007\/s10032-010-0135-3","relation":{},"ISSN":["1433-2833","1433-2825"],"issn-type":[{"type":"print","value":"1433-2833"},{"type":"electronic","value":"1433-2825"}],"subject":[],"published":{"date-parts":[[2010,10,31]]}}}