{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T01:04:11Z","timestamp":1774573451923,"version":"3.50.1"},"reference-count":17,"publisher":"American Chemical Society (ACS)","issue":"7","license":[{"start":{"date-parts":[[2022,3,29]],"date-time":"2022-03-29T00:00:00Z","timestamp":1648512000000},"content-version":"unspecified","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100000287","name":"Royal Academy of Engineering","doi-asserted-by":"publisher","award":["RCSRF1819\\7\\10"],"award-info":[{"award-number":["RCSRF1819\\7\\10"]}],"id":[{"id":"10.13039\/501100000287","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100004349","name":"BASF","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100004349","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000271","name":"Science and Technology Facilities Council","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000271","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J. Chem. Inf. Model."],"published-print":{"date-parts":[[2022,4,11]]},"DOI":"10.1021\/acs.jcim.1c01198","type":"journal-article","created":{"date-parts":[[2022,3,29]],"date-time":"2022-03-29T18:49:18Z","timestamp":1648579758000},"page":"1633-1643","source":"Crossref","is-referenced-by-count":44,"title":["PDFDataExtractor: A Tool for Reading Scientific Text and Interpreting Metadata from the Typeset Literature in the Portable Document Format"],"prefix":"10.1021","volume":"62","author":[{"given":"Miao","family":"Zhu","sequence":"first","affiliation":[{"name":"Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K."}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1552-8743","authenticated-orcid":true,"given":"Jacqueline M.","family":"Cole","sequence":"additional","affiliation":[{"name":"Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K."},{"name":"ISIS Neutron and Muon Source, STFC Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0QX, U.K."},{"name":"Department of Chemical Engineering and Biotechnology, University of Cambridge, West Cambridge Site, Philippa Fawcett Drive, Cambridge CB3 0AS, U.K."}]}],"member":"316","published-online":{"date-parts":[[2022,3,29]]},"reference":[{"key":"ref1\/cit1","doi-asserted-by":"crossref","first-page":"177","DOI":"10.1145\/2494266.2494271","volume-title":"Proceedings of the 2013 ACM symposium on Document engineering - DocEng\u201913","author":"Constantin A.","year":"2013"},{"key":"ref2\/cit2","doi-asserted-by":"crossref","unstructured":"Manning, C.; Surdeanu, M.; Bauer, J.; Finkel, J.; Bethard, S.; McClosky, D. The Stanford Corenlp Natural Language Processing Toolkit. In  Proceedings of 52nd Annual Meeting of the Association for Computational Linguistics: System Demonstrations, 2014.","DOI":"10.3115\/v1\/P14-5010"},{"key":"ref3\/cit3","unstructured":"GitHub - explosion\/spaCy:\nIndustrial-strength Natural Language Processing (NLP) in Python. https:\/\/github.com\/explosion\/spaCy (accessed Mar 3, 2022)."},{"key":"ref4\/cit4","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.6b00207"},{"key":"ref5\/cit5","doi-asserted-by":"crossref","first-page":"129","DOI":"10.1007\/11669487_12","volume-title":"Document Analysis Systems VII","volume":"3872","author":"D\u00e9jean H.","year":"2006"},{"key":"ref6\/cit6","first-page":"1","volume-title":"2017 ACM\/IEEE Joint Conference on Digital Libraries (JCDL)","author":"Bast H.","year":"2017"},{"key":"ref7\/cit7","doi-asserted-by":"publisher","DOI":"10.1186\/1751-0473-7-7"},{"key":"ref8\/cit8","doi-asserted-by":"publisher","DOI":"10.1021\/acs.chemrev.6b00851"},{"key":"ref9\/cit9","doi-asserted-by":"publisher","DOI":"10.1007\/s10032-015-0249-8"},{"key":"ref10\/cit10","unstructured":"GitHub -\npdfminer\/pdfminer.six:\nCommunity maintained fork of pdfminer - we fathom PDF. https:\/\/github.com\/pdfminer\/pdfminer.six (accessed Mar 3, 2022)."},{"key":"ref11\/cit11","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1007\/978-3-319-41264-1_5","volume-title":"Computers Helping People with Special Needs","volume":"9758","author":"Suzuki M.","year":"2016"},{"key":"ref12\/cit12","unstructured":"GitHub - jalan\/pdftotext:\nSimple PDF text extraction. https:\/\/github.com\/jalan\/pdftotext (accessed Mar 3, 2022)."},{"key":"ref13\/cit13","unstructured":"A Tool and Library That\nCan Extract Various Areas of Text from a PDF, Especially a Scholarly\nArticle PDF.: CrossRef\/Pdfextract. Crossref, 2019."},{"key":"ref14\/cit14","unstructured":"Councill, I. G.; Giles, C. L.; Kan, M.Y. P. C. An Open-Source CRF Reference String Parsing Package. In  LREC, 2008."},{"key":"ref15\/cit15","doi-asserted-by":"crossref","first-page":"473","DOI":"10.1007\/978-3-642-04346-8_62","volume-title":"Research and Advanced Technology for Digital Libraries","author":"Lopez P.","year":"2009"},{"key":"ref16\/cit16","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0127502"},{"key":"ref17\/cit17","unstructured":"GitHub -\natlanhq\/camelot:\nCamelot: PDF Table Extraction for Humans. https:\/\/github.com\/atlanhq\/camelot (accessed Mar 3, 2022)."}],"container-title":["Journal of Chemical Information and Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.1c01198","content-type":"application\/pdf","content-version":"vor","intended-application":"unspecified"},{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.1c01198","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,4,25]],"date-time":"2023-04-25T19:29:35Z","timestamp":1682450975000},"score":1,"resource":{"primary":{"URL":"https:\/\/pubs.acs.org\/doi\/10.1021\/acs.jcim.1c01198"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,3,29]]},"references-count":17,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2022,4,11]]}},"alternative-id":["10.1021\/acs.jcim.1c01198"],"URL":"https:\/\/doi.org\/10.1021\/acs.jcim.1c01198","relation":{},"ISSN":["1549-9596","1549-960X"],"issn-type":[{"value":"1549-9596","type":"print"},{"value":"1549-960X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,3,29]]}}}