{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T06:54:59Z","timestamp":1747810499132,"version":"3.32.0"},"publisher-location":"Berlin, Heidelberg","reference-count":43,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540446361"},{"type":"electronic","value":"9783540446385"}],"license":[{"start":{"date-parts":[[2006,1,1]],"date-time":"2006-01-01T00:00:00Z","timestamp":1136073600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2006]]},"DOI":"10.1007\/11863878_6","type":"book-chapter","created":{"date-parts":[[2006,9,3]],"date-time":"2006-09-03T13:35:01Z","timestamp":1157290501000},"page":"63-74","source":"Crossref","is-referenced-by-count":3,"title":["Genre Classification in Automated Ingest and Appraisal Metadata"],"prefix":"10.1007","author":[{"given":"Yunhyong","family":"Kim","sequence":"first","affiliation":[]},{"given":"Seamus","family":"Ross","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"1","key":"6_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10032-002-0080-x","volume":"5","author":"M. Aiello","year":"2002","unstructured":"Aiello, M., Monz, C., Todoran, L., Worring, M.: Document Understanding for a Broad Class of Documents. International Journal on Document Analysis and Recognition\u00a05(1), 1\u201316 (2002)","journal-title":"International Journal on Document Analysis and Recognition"},{"unstructured":"Automatic Metadata Generation: http:\/\/www.cs.kuleuven.ac.be\/~hmdb\/amg\/documentation.php","key":"6_CR2"},{"doi-asserted-by":"crossref","unstructured":"Arens, A., Blaesius, K.H.: Domain oriented information extraction from the Internet. In: Proceedings of SPIE Document Recognition and Retrieval 2003, vol.\u00a05010, p. 286 (2003)","key":"6_CR3","DOI":"10.1117\/12.476042"},{"doi-asserted-by":"crossref","unstructured":"Bagdanov, A.D., Worring, M.: Fine-Grained Document Genre Classification Using First Order Random Graphs. In: Proceedings of International Conference on Document Analysis and Recognition 2001, p. 79 (2001)","key":"6_CR4","DOI":"10.1109\/ICDAR.2001.953759"},{"doi-asserted-by":"crossref","unstructured":"Barbu, E., Heroux, P., Adam, S., Trupin, E.: Clustering Document Images Using a Bag of Symbols Representation. In: International Conference on Document Analysis and Recognition, pp. 1216\u20131220 (2005)","key":"6_CR5","DOI":"10.1109\/ICDAR.2005.75"},{"unstructured":"Bekkerman, R., McCallum, A., Huang, G.: Automatic Categorization of Email into Folders. Benchmark Experiments on Enron and SRI Corpora\u2019, CIIR Technical Report, IR-418 (2004)","key":"6_CR6"},{"key":"6_CR7","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511519871","volume-title":"Dimensions of Register Variation:a Cross-Linguistic Comparison","author":"D. Biber","year":"1995","unstructured":"Biber, D.: Dimensions of Register Variation:a Cross-Linguistic Comparison. Cambridge University Press, Cambridge (1995)"},{"unstructured":"Boese, E.S.: Stereotyping the web: genre classification of web documents. Master\u2019s thesis, Colorado State University (2005)","key":"6_CR8"},{"doi-asserted-by":"crossref","unstructured":"Breuel, T.M.: An Algorithm for Finding Maximal Whitespace Rectangles at Arbitrary Orientations for Document Layout Analysis. In: 7th International Conference for Document Analysis and Recognition (ICDAR), pp. 66\u201370 (2003)","key":"6_CR9","DOI":"10.1109\/ICDAR.2003.1227629"},{"unstructured":"Digital Curation Centre: http:\/\/www.dcc.ac.uk","key":"6_CR10"},{"unstructured":"DC-dot, Dublin Core metadata editor: http:\/\/www.ukoln.ac.uk\/metadata\/dcdot\/","key":"6_CR11"},{"unstructured":"DELOS Network of Excellence on Digital Libraries: http:\/\/www.delos.info\/","key":"6_CR12"},{"unstructured":"NSF International Projects: http:\/\/www.dli2.nsf.gov\/intl.html","key":"6_CR13"},{"unstructured":"DELOS\/NSF Working Groups: Reference Models for Digital Libraries: Actors and Roles (2003), http:\/\/www.dli2.nsf.gov\/internationalprojects\/workinggroupreports\/actorsfinalreport.html","key":"6_CR14"},{"unstructured":"Dublin Core Initiative: http:\/\/dublincore.org\/tools\/#automaticextraction","key":"6_CR15"},{"unstructured":"Engineering and Physical Sciences Research Council: http:\/\/www.epsrc.ac.uk\/","key":"6_CR16"},{"unstructured":"Electronic Resources Preservation Access Network (ERPANET): http:\/\/www.erpanet.org","key":"6_CR17"},{"unstructured":"ERPANET: Packaged Object Ingest Project, http:\/\/www.erpanet.org\/events\/2003\/rome\/presentations\/ross_rusbridge_pres.pdf","key":"6_CR18"},{"doi-asserted-by":"crossref","unstructured":"Giuffrida, G., Shek, E., Yang, J.: Knowledge-based Metadata Extraction from PostScript File. In: Proc. 5th ACM Intl. conf. Digital Libraries, pp. 77\u201384 (2000)","key":"6_CR19","DOI":"10.1145\/336597.336639"},{"unstructured":"Han, H., Giles, L., Manavoglu, E., Zha, H., Zhang, Z., Fox, E.A.: Automatic Document Metadata Extraction using Support Vector Machines. In: Proc. 3rd ACM\/IEEECS conf. Digital libraries, pp. 37\u201348 (2000)","key":"6_CR20"},{"unstructured":"Hedstrom, M., Ross, S., Ashley, K., Christensen-Dalsgaard, B., Duff, W., Gladney, H., Huc, C., Kenney, A.R., Moore, R., Neuhold, E.: Invest to Save: Report and Recommendations of the NSF-DELOS Working Group on Digital Archiving and Preservation. Report of the European Union DELOS and US National Science Foundation Workgroup on Digital Preservation and Archiving (2003), http:\/\/delos-noe.iei.pi.cnr.it\/activities\/internationalforum\/Joint-WGs\/digitalarchiving\/Digitalarchiving.pdf","key":"6_CR21"},{"unstructured":"Joint Information Systems Committee: http:\/\/www.jisc.ac.uk\/","key":"6_CR22"},{"doi-asserted-by":"crossref","unstructured":"Karlgren, J., Cutting, D.: Recognizing Text Genres with Simple Metric using Discriminant Analysis. In: Proc. 15th conf. Comp. Ling., vol.\u00a02, pp. 1071\u20131075 (1994)","key":"6_CR23","DOI":"10.3115\/991250.991324"},{"key":"6_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"460","DOI":"10.1007\/11735106_41","volume-title":"Advances in Information Retrieval","author":"S.W. Ke","year":"2006","unstructured":"Ke, S.W., Bowerman, C., Oakes, M.: PERC: A Personal Email Classifier. In: Lalmas, M., MacFarlane, A., R\u00fcger, S.M., Tombros, A., Tsikrika, T., Yavlinsky, A. (eds.) ECIR 2006. LNCS, vol.\u00a03936, pp. 460\u2013463. Springer, Heidelberg (2006)"},{"doi-asserted-by":"crossref","unstructured":"Kessler, B., Nunberg, G., Schuetze, H.: Automatic Detection of Text Genre. In: Proc. 35th Ann. Meeting ACL, pp. 32\u201338 (1997)","key":"6_CR25","DOI":"10.3115\/976909.979622"},{"unstructured":"Le, Z.: Maximum Entropy Toolkit for Python and C++. LGPL license, http:\/\/homepages.inf.ed.ac.uk\/s0450736\/maxent_toolkit.html","key":"6_CR26"},{"unstructured":"MetadataExtractor: http:\/\/pami-xeon.uwaterloo.ca\/TextMiner\/MetadataExtractor.aspx","key":"6_CR27"},{"unstructured":"McCallum, A.: Bow: A Toolkit for Statistical Language Modeling, Text Retrieval, Classification and Clustering (1998), http:\/\/www.cs.cmu.edu\/mccallum\/bow\/","key":"6_CR28"},{"unstructured":"National Archives UK: DROID (Digital Object Identification), http:\/\/www.nationalarchives.gov.uk\/aboutapps\/pronom\/droid.htm","key":"6_CR29"},{"unstructured":"Natinal Library of Medicine US: http:\/\/www.nlm.nih.gov\/","key":"6_CR30"},{"unstructured":"National Library of New Zealand: Metadata Extraction Tool, http:\/\/www.natlib.govt.nz\/en\/whatsnew\/4initiatives.html#extraction","key":"6_CR31"},{"unstructured":"Adobe Acrobat PDF specification: http:\/\/partners.adobe.com\/public\/developer\/pdf\/index_reference.html","key":"6_CR32"},{"unstructured":"Python Imaging Library: http:\/\/www.pythonware.com\/products\/pil\/","key":"6_CR33"},{"unstructured":"PREMIS (PREservation Metadata: Implementation Strategy) Working Group: http:\/\/www.oclc.org\/research\/projects\/pmwg\/","key":"6_CR34"},{"unstructured":"Python: http:\/\/www.python.org","key":"6_CR35"},{"doi-asserted-by":"crossref","unstructured":"Riloff, E., Wiebe, J., Wilson, T.: Learning Subjective Nouns using Extraction Pattern Bootstrapping. In: Proc. 7th CoNLL, pp. 25\u201332 (2003)","key":"6_CR36","DOI":"10.3115\/1119176.1119180"},{"unstructured":"Ross, S., Hedstrom, M.: Preservation Research and Sustainable Digital Libraries. International Journal of Digital Libraries (Springer) (2005), doi:10.1007\/s00799- 004-0099-3","key":"6_CR37"},{"unstructured":"Santini, M.: A Shallow Approach To Syntactic Feature Extraction For Genre Classification. In: Proceedings of the 7th Annual Colloquium for the UK Special Interest Group for Computational Linguistics, CLUK 2004 (2004)","key":"6_CR38"},{"key":"6_CR39","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/505282.505283","volume":"34","author":"F. Sebastiani","year":"2002","unstructured":"Sebastiani, F.: Machine Learning in Automated Text Categorization. ACM Computing Surveys\u00a034, 1\u201347 (2002)","journal-title":"ACM Computing Surveys"},{"doi-asserted-by":"crossref","unstructured":"Shafait, F., Keysers, D., Breuel, T.M.: Performance Comparison of Six Algorithms for Page Segmentation. In: 7th IAPR Workshop on Document Analysis Systems (DAS), pp. 368\u2013379 (2006)","key":"6_CR40","DOI":"10.1007\/11669487_33"},{"unstructured":"Shao, M., Futrelle, R.: Graphics Recognition in PDF document. In: Sixth IAPR International Workshop on Graphics Recognition (GREC 2005), pp. 218\u2013227 (2005)","key":"6_CR41"},{"unstructured":"Thoma, G.: Automating the production of bibliographic records. R&D report of the Communications Engineering Branch, Lister Hill National Center for Biomedical Communications, National Library of Medicine (2001)","key":"6_CR42"},{"unstructured":"Witte, R., Krestel, R., Bergler, S.: ERSS 2005:Coreference-based Summarization Reloaded. DUC 2005 Document Understanding Workshop, Canada","key":"6_CR43"}],"container-title":["Lecture Notes in Computer Science","Research and Advanced Technology for Digital Libraries"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/11863878_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,10]],"date-time":"2025-01-10T19:13:00Z","timestamp":1736536380000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/11863878_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006]]},"ISBN":["9783540446361","9783540446385"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/11863878_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2006]]}}}