{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T23:49:23Z","timestamp":1717458563499},"reference-count":55,"publisher":"American Chemical Society (ACS)","issue":"2","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J. Chem. Inf. Model."],"published-print":{"date-parts":[[2010,2,22]]},"DOI":"10.1021\/ci9003688","type":"journal-article","created":{"date-parts":[[2010,1,20]],"date-time":"2010-01-20T20:08:48Z","timestamp":1264018128000},"page":"251-261","source":"Crossref","is-referenced-by-count":8,"title":["SPECTRa-T: Machine-Based Data Extraction and Semantic Searching of Chemistry e-Theses"],"prefix":"10.1021","volume":"50","author":[{"given":"Jim","family":"Downing","sequence":"first","affiliation":[{"name":"Unilever Centre for Molecular Informatics, Department of Chemistry, Lensfield Rd., Cambridge CB2 1EW, U.K., Cambridge University Library, West Rd., Cambridge CB3 9DR, U.K., and Department of Chemistry and High Performance Computing Unit, ICT, Imperial College London, Exhibition Rd., London SW7 2AZ, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Matt J.","family":"Harvey","sequence":"additional","affiliation":[{"name":"Unilever Centre for Molecular Informatics, Department of Chemistry, Lensfield Rd., Cambridge CB2 1EW, U.K., Cambridge University Library, West Rd., Cambridge CB3 9DR, U.K., and Department of Chemistry and High Performance Computing Unit, ICT, Imperial College London, Exhibition Rd., London SW7 2AZ, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter B.","family":"Morgan","sequence":"additional","affiliation":[{"name":"Unilever Centre for Molecular Informatics, Department of Chemistry, Lensfield Rd., Cambridge CB2 1EW, U.K., Cambridge University Library, West Rd., Cambridge CB3 9DR, U.K., and Department of Chemistry and High Performance Computing Unit, ICT, Imperial College London, Exhibition Rd., London SW7 2AZ, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter","family":"Murray-Rust","sequence":"additional","affiliation":[{"name":"Unilever Centre for Molecular Informatics, Department of Chemistry, Lensfield Rd., Cambridge CB2 1EW, U.K., Cambridge University Library, West Rd., Cambridge CB3 9DR, U.K., and Department of Chemistry and High Performance Computing Unit, ICT, Imperial College London, Exhibition Rd., London SW7 2AZ, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Henry S.","family":"Rzepa","sequence":"additional","affiliation":[{"name":"Unilever Centre for Molecular Informatics, Department of Chemistry, Lensfield Rd., Cambridge CB2 1EW, U.K., Cambridge University Library, West Rd., Cambridge CB3 9DR, U.K., and Department of Chemistry and High Performance Computing Unit, ICT, Imperial College London, Exhibition Rd., London SW7 2AZ, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Diana C.","family":"Stewart","sequence":"additional","affiliation":[{"name":"Unilever Centre for Molecular Informatics, Department of Chemistry, Lensfield Rd., Cambridge CB2 1EW, U.K., Cambridge University Library, West Rd., Cambridge CB3 9DR, U.K., and Department of Chemistry and High Performance Computing Unit, ICT, Imperial College London, Exhibition Rd., London SW7 2AZ, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alan P.","family":"Tonge","sequence":"additional","affiliation":[{"name":"Unilever Centre for Molecular Informatics, Department of Chemistry, Lensfield Rd., Cambridge CB2 1EW, U.K., Cambridge University Library, West Rd., Cambridge CB3 9DR, U.K., and Department of Chemistry and High Performance Computing Unit, ICT, Imperial College London, Exhibition Rd., London SW7 2AZ, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joe A.","family":"Townsend","sequence":"additional","affiliation":[{"name":"Unilever Centre for Molecular Informatics, Department of Chemistry, Lensfield Rd., Cambridge CB2 1EW, U.K., Cambridge University Library, West Rd., Cambridge CB3 9DR, U.K., and Department of Chemistry and High Performance Computing Unit, ICT, Imperial College London, Exhibition Rd., London SW7 2AZ, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"316","published-online":{"date-parts":[[2010,1,20]]},"reference":[{"key":"ref1\/cit1","doi-asserted-by":"crossref","first-page":"3192","DOI":"10.1039\/b410732b","volume":"2","author":"Murray-Rust P.","year":"2004","journal-title":"Org. Biomol. Chem."},{"key":"ref2\/cit2","doi-asserted-by":"crossref","first-page":"757","DOI":"10.1021\/ci0256541","volume":"43","author":"Murray-Rust P.","year":"2003","journal-title":"J. Chem. Inf. Comput. Sci."},{"key":"ref3\/cit3","volume-title":"RDF Primer","author":"Manola F.","year":"2004"},{"key":"ref4\/cit4","unstructured":"Fanning, B. A.Preserving the Data Explosion: Using PDF. Digital Preservation Coalition and The Association for Information & Image Management (AIIM), 2008; http:\/\/www.dpconline.org\/docs\/reports\/dpctw08-02.pdf(accessed November 26, 2009)."},{"key":"ref5\/cit5","doi-asserted-by":"crossref","first-page":"669","DOI":"10.1002\/jhet.5570370324","volume":"37","author":"de Laet A.","year":"2000","journal-title":"J. Heterocycl. Chem."},{"key":"ref6\/cit6","unstructured":"Electronic Theses Online Service (EthOSnet).http:\/\/www.ethos.ac.uk\/(accessed November 26, 2009)."},{"key":"ref7\/cit7","unstructured":"Narcis, the Gateway to Dutch Scientific Information: Promise of Science. 2009; http:\/\/www.narcis.info\/index\/tab\/publication\/Language\/en\/(accessed November 26, 2009)."},{"key":"ref8\/cit8","unstructured":"DART-Europe E-theses Portal (DEEP). 2007; http:\/\/www.dart-europe.eu\/index.php\/index(accessed November 26, 2009)."},{"key":"ref9\/cit9","unstructured":"ADT Australasian Digital Theses Program. 2009; http:\/\/adt.caul.edu.au\/(accessed November 26, 2009)."},{"key":"ref10\/cit10","unstructured":"Murray-Rust, P.; Downing, J.; Townsend, J.Chem4Word. 2009;http:\/\/www.chem4word.com\/(accessed November 29, 2009)."},{"key":"ref11\/cit11","volume-title":"The Semantic Web: A Guide to the Future of XML, Web Services and Knowledge Management","author":"Daconta M. C.","year":"2003"},{"key":"ref12\/cit12","doi-asserted-by":"crossref","first-page":"216","DOI":"10.1016\/j.websem.2006.05.004","volume":"4","author":"Stephens S.","year":"2006","journal-title":"Web Semantics"},{"key":"ref13\/cit13","unstructured":"Walker, F. L.; Gallagher, M. E.; Thoma, R.;PDF File Migration to PDF\/A: Technical Considerations.http:\/\/archive.nlm.nih.gov\/pubs\/ceb2007\/2007020.pdf(accessed November 26, 2009)."},{"key":"ref14\/cit14","unstructured":"ISO 19005\u22121:2005, Document Management\u2014Electronic Document File Format for Long-Term Preservation\u2014Part 1: Use of PDF 1.4 (PDF\/A-1). 2006; http:\/\/www.aiim.org\/documents\/standards\/19005-1_FAQ.pdf(accessed November 26, 2009)."},{"key":"ref15\/cit15","unstructured":"What is Tagged PDF?http:\/\/www.planetpdf.com\/mainpage.asp?webpageid=1269(accessed November 26, 2009)."},{"key":"ref16\/cit16","unstructured":"Davis, J.; Shur, A.OPC A New Standard For Packaging Your Data. 2009; http:\/\/msdn.microsoft.com\/en-us\/magazine\/cc163372.aspx(accessed November 29, 2009)."},{"key":"ref17\/cit17","unstructured":"OASIS: Advancing the Standards for the Open Information Society. 2009; http:\/\/www.oasis-open.org\/who\/(accessed November 26, 2009)."},{"key":"ref18\/cit18","unstructured":"Dublin Core Metadata Initiative. 2009; http:\/\/www.dublincore.org\/(accessed November 26, 2009)."},{"key":"ref19\/cit19","unstructured":"Ph.D. Thesis Regulations, California Institute of Technology, 2008; http:\/\/www.gradoffice.caltech.edu\/documents\/PHD-Thesisregulations.pdf(accessed November 26, 2009)."},{"key":"ref20\/cit20","unstructured":"Specifications for Thesis Preparation, Massachusetts Institute of Technology, 2009; http:\/\/libraries.mit.edu\/archives\/thesis-specs\/(accessed November 26, 2009)."},{"key":"ref21\/cit21","volume-title":"Proceedings of the 7th E-Science All Hands Meeting (AHM2007)","author":"Lewin I."},{"key":"ref22\/cit22","first-page":"423","volume-title":"Proceedings of the 5th. International. Workshop on Document Analysis","author":"Le X. L.","year":"2002"},{"key":"ref23\/cit23","unstructured":"Library of Congress Classification Outline Class Q\u2014Science. 2009; http:\/\/www.loc.gov\/aba\/cataloging\/classification\/lcco\/lcco_q.pdf\/ (accessed November 26, 2009)."},{"key":"ref24\/cit24","unstructured":"Downing, J.; Murray-Rust, P.TheOREM Marked-up Theses, 2009; http:\/\/wwmm.ch.cam.ac.uk\/projects\/theorem\/theses\/(accessed November 29, 2009)."},{"key":"ref25\/cit25","first-page":"304","volume":"12","author":"Rhodes J.","year":"2007","journal-title":"Pacific Symp. Biocomput."},{"key":"ref26\/cit26","first-page":"941","volume-title":"IWANN","author":"Grego T.","year":"2009"},{"key":"ref27\/cit27","doi-asserted-by":"crossref","first-page":"2559","DOI":"10.1093\/bioinformatics\/btn469","volume":"24","author":"Tsuruoka Y.","year":"2008","journal-title":"Bioinformatics"},{"key":"ref28\/cit28","unstructured":"JISC Projects, CheTA (Chemistry using Text Annotations). 2009; http:\/\/www.jisc.ac.uk\/whatwedo\/programmes\/inf11\/cheta.aspx(accessed November 29, 2009)."},{"key":"ref29\/cit29a","volume-title":"Abstracts of Papers","author":"Kidd R.","year":"2008"},{"key":"ref29\/cit29b","volume-title":"Abstracts of Papers","author":"Batchelor C. R.","year":"2009"},{"key":"ref30\/cit30","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1007\/11875741_11","volume-title":"Computational Life Sciences II","author":"Corbett P.","year":"2006"},{"key":"ref31\/cit31","doi-asserted-by":"crossref","first-page":"3294","DOI":"10.1039\/b411033a","volume":"2","author":"Townsend J. A.","year":"2004","journal-title":"Org. Biomol. Chem."},{"key":"ref32\/cit32","volume-title":"Proceedings of the 4th UK E-Science All Hands Meeting","author":"Rupp C. J.","year":"2006"},{"key":"ref34\/cit34","doi-asserted-by":"crossref","first-page":"D344","DOI":"10.1093\/nar\/gkm791","volume":"36","author":"Degtyarenko K.","year":"2008","journal-title":"Nucleic Acids Res."},{"key":"ref36\/cit36","unstructured":"PubChem. 2009; http:\/\/pubchem.ncbi.nlm.nih.gov\/(accessed November 29, 2009)."},{"key":"ref37\/cit37","doi-asserted-by":"crossref","unstructured":"Waldron, B.; Copestake, A.A Standoff Annotation Interface between DELPH-IN Components.NLPXML-2006 (Multi-Dimensional Markup in Natural Language Processing), Trento, Italy, 2006.","DOI":"10.3115\/1621034.1621054"},{"key":"ref38\/cit38","unstructured":"Apache PDFBox is an open source Java library for working with PDF documents: Apache Software Foundation, 2008; http:\/\/pdfbox.apache.org\/(accessed November 26, 2009)."},{"key":"ref39\/cit39","unstructured":"CambridgeSoft, 100 Cambridge Park Drive, Cambridge, MA 02140 [http:\/\/www.cambridgesoft.com(accessed November 26, 2009)]."},{"key":"ref40\/cit40","unstructured":"Symyx Technologies, 2440 Camino Ramon, San Ramon, CA 94583 [http:\/\/www.symyx.com(accessed November 26, 2009)]."},{"key":"ref42\/cit42","unstructured":"There are a number of unresolved name-to-structure issues with OPSIN (includingR\/Sstereochemistry):Murray-Rust, P.http:\/\/wwmm.ch.cam.ac.uk\/blogs\/murrayrust\/?p=691(accessed November 29, 2009). In the three PDF theses studied in detail,(53)95% of the preparative procedures were of chiral structures."},{"key":"ref43\/cit43","unstructured":"XML Pointer Language (XPointer), W3C, 2001; http:\/\/www.w3.org\/TR\/WD-xptr(accessed November 29, 2009)."},{"issue":"11","key":"ref44\/cit44","doi-asserted-by":"crossref","first-page":"S4","DOI":"10.1186\/1471-2105-9-S11-S4","volume":"9","author":"Corbett P.","year":"2008","journal-title":"BMC Bioinf."},{"key":"ref45\/cit45","first-page":"29","author":"Berners-Lee T.","year":"2001","journal-title":"Sci. Am."},{"key":"ref46\/cit46","unstructured":"Web Ontology Language OWL, W3C, 2004; http:\/\/www.w3.org\/TR\/owl-features\/(accessed November 26, 2009)."},{"key":"ref47\/cit47","unstructured":"SKOS, W3C, 2008; http:\/\/www.w3.org\/TR\/2008\/WD-skos-reference-20080609\/(accessed November 26, 2009)."},{"key":"ref48\/cit48","unstructured":"Adams, N.;Semantic Chemistry, Semantic Technology Conference,2009.http:\/\/semanticuniverse.com\/articles-semantic-chemistry.html(accessed November 29, 2009)."},{"key":"ref49\/cit49","doi-asserted-by":"crossref","first-page":"2118","DOI":"10.1021\/ci8002123","volume":"48","author":"Adams N.","year":"2008","journal-title":"J. Chem. Inf. Model."},{"key":"ref50\/cit50","doi-asserted-by":"crossref","first-page":"939","DOI":"10.1021\/ci050378m","volume":"46","author":"Taylor K. R.","year":"2006","journal-title":"J. Chem. Inf. Model."},{"key":"ref51\/cit51","doi-asserted-by":"crossref","first-page":"2396","DOI":"10.1021\/ci060139e","volume":"46","author":"Casher O.","year":"2006","journal-title":"J. Chem. Inf. Model."},{"key":"ref52\/cit52","unstructured":"Dodds, L.Introducing SPARQL: Querying the Semantic Web. 2005; http:\/\/www.xml.com\/pub\/a\/2005\/11\/16\/introducing-sparql-querying-semantic-web-tutorial.html(accessed November 26, 2009)."},{"key":"ref53\/cit53a","unstructured":"aHarter, J.\u03c0-Allyltricarbonyliron Lactone Complexes: Versatile Tools for Asymmetric Synthesis; Dept. of Chemistry, Cambridge, 2002 (converted to PDF from the original Word document)(24)."},{"key":"ref53\/cit53b","unstructured":"bBrown, S. B.Iminium and Enamine Activation Methods for Enantioselective Organocatalysis; CalTech, 2005; http:\/\/etd.caltech.edu\/etd\/available\/etd-02242005-174252\/(accessed November 26, 2009)."},{"key":"ref53\/cit53c","unstructured":"cLambert, T. H.Development of the Lewis Acid Catalyzed Allenoate\u2212Claisen Rearrangement. Investigations of Enantioselective Catalysis of the Allenoate\u2212Claisen Rearrangement. Studies towards the Total Synthesis of Erythrolide E; CalTech, 2004;http:\/\/etd.caltech.edu\/etd\/available\/etd-12112003-091509(accessed November 26, 2009)."},{"key":"ref54\/cit54","doi-asserted-by":"crossref","unstructured":"Corbett, P.; Batchelor, C.; Teufel, S.Annotation of Chemical Named Entities.Proceedings of the Workshop on BioNLP 2007: Biological, Translational and Clinical Language Processing;Association for Computational Linguistics:Morristown, NJ, 2007; pp57\u221264.","DOI":"10.3115\/1572392.1572403"},{"key":"ref55\/cit55","unstructured":"JISC Repositories and Preservation Programme; 2009; http:\/\/www.jisc.ac.uk\/whatwedo\/programmes\/reppres.aspx(accessed November 29, 2009)."}],"container-title":["Journal of Chemical Information and Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/ci9003688","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,21]],"date-time":"2023-03-21T02:36:50Z","timestamp":1679366210000},"score":1,"resource":{"primary":{"URL":"https:\/\/pubs.acs.org\/doi\/10.1021\/ci9003688"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,1,20]]},"references-count":55,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2010,2,22]]}},"alternative-id":["10.1021\/ci9003688"],"URL":"https:\/\/doi.org\/10.1021\/ci9003688","relation":{},"ISSN":["1549-9596","1549-960X"],"issn-type":[{"value":"1549-9596","type":"print"},{"value":"1549-960X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010,1,20]]}}}