{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T07:26:22Z","timestamp":1758266782331,"version":"3.37.3"},"reference-count":50,"publisher":"Oxford University Press (OUP)","license":[{"start":{"date-parts":[[2019,4,25]],"date-time":"2019-04-25T00:00:00Z","timestamp":1556150400000},"content-version":"vor","delay-in-days":114,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000092","name":"National Library of Medicine","doi-asserted-by":"publisher","award":["R56LM011354A","R01LM012527","R01LM011945"],"award-info":[{"award-number":["R56LM011354A","R01LM012527","R01LM011945"]}],"id":[{"id":"10.13039\/100000092","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000071","name":"National Institute of Child Health and Human Development","doi-asserted-by":"publisher","award":["P41 HD062499"],"award-info":[{"award-number":["P41 HD062499"]}],"id":[{"id":"10.13039\/100000071","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,1,1]]},"DOI":"10.1093\/database\/baz045","type":"journal-article","created":{"date-parts":[[2019,3,19]],"date-time":"2019-03-19T16:27:54Z","timestamp":1553012874000},"source":"Crossref","is-referenced-by-count":14,"title":["An effective biomedical document classification scheme in support of biocuration: addressing class imbalance"],"prefix":"10.1093","volume":"2019","author":[{"given":"Xiangying","family":"Jiang","sequence":"first","affiliation":[{"name":"Department of Computer and Information Sciences, University of Delaware, Newark, DE, USA"}]},{"given":"Martin","family":"Ringwald","sequence":"additional","affiliation":[{"name":"The Jackson Laboratory, 600 Main St., Bar Harbor, ME, USA"}]},{"given":"Judith A","family":"Blake","sequence":"additional","affiliation":[{"name":"The Jackson Laboratory, 600 Main St., Bar Harbor, ME, USA"}]},{"given":"Cecilia","family":"Arighi","sequence":"additional","affiliation":[{"name":"Department of Computer and Information Sciences, University of Delaware, Newark, DE, USA"},{"name":"Center of Bioinformatics and Computational Biology, Delaware Biotechnology Institute, Newark, DE, USA"}]},{"given":"Gongbo","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Computer and Information Sciences, University of Delaware, Newark, DE, USA"}]},{"given":"Hagit","family":"Shatkay","sequence":"additional","affiliation":[{"name":"Department of Computer and Information Sciences, University of Delaware, Newark, DE, USA"},{"name":"Center of Bioinformatics and Computational Biology, Delaware Biotechnology Institute, Newark, DE, USA"}]}],"member":"286","published-online":{"date-parts":[[2019,4,25]]},"reference":[{"key":"2019042510330884800_ref1","doi-asserted-by":"crossref","first-page":"370","DOI":"10.1186\/1471-2105-7-370","article-title":"Automatic document classification of biological literature","volume":"7","author":"Chen","year":"2006","journal-title":"BMC Bioinformatics"},{"key":"2019042510330884800_ref2","doi-asserted-by":"crossref","first-page":"16","DOI":"10.1186\/1471-2105-13-16","article-title":"Automatic categorization of diverse experimental information in the bioscience literature","volume":"13","author":"Fang","year":"2012","journal-title":"BMC Bioinformatics"},{"key":"2019042510330884800_ref3","doi-asserted-by":"crossref","DOI":"10.1093\/database\/bas047","article-title":"Building an efficient curation workflow for the Arabidopsis literature corpus","volume":"2012","author":"Li","year":"2012","journal-title":"Database"},{"key":"2019042510330884800_ref4","doi-asserted-by":"crossref","DOI":"10.1093\/database\/bas020","article-title":"Text mining for the biocuration workflow","volume":"2012","author":"Hirschman","year":"2012","journal-title":"Database"},{"key":"2019042510330884800_ref5","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pone.0115892","article-title":"Machine learning for biomedical literature triage","volume":"9","author":"Almeida","year":"2014","journal-title":"PloS One"},{"key":"2019042510330884800_ref6","doi-asserted-by":"crossref","DOI":"10.1093\/database\/bax040","article-title":"Triage by ranking to support the curation of protein interactions","volume":"2017","author":"Mottin","year":"2017","journal-title":"Database"},{"key":"2019042510330884800_ref7","doi-asserted-by":"crossref","DOI":"10.1093\/database\/bas043","article-title":"Biocuration workflows and text mining: overview of the BioCreative 2012 Workshop Track II","volume":"2012","author":"Lu","year":"2012","journal-title":"Database"},{"author":"Mouse Genome Informatics (MGI)","key":"2019042510330884800_ref8"},{"key":"2019042510330884800_ref9","doi-asserted-by":"crossref","first-page":"D723","DOI":"10.1093\/nar\/gkw1040","article-title":"Mouse Genome Database (MGD)-2017: community knowledge resource for the laboratory mouse","volume":"45","author":"Blake","year":"2017","journal-title":"Nucleic Acids Res"},{"key":"2019042510330884800_ref10","doi-asserted-by":"crossref","first-page":"D730","DOI":"10.1093\/nar\/gkw1073","article-title":"The mouse Gene Expression Database (GXD): 2017 update","volume":"45","author":"Finger","year":"2017","journal-title":"Nucleic Acids Res"},{"key":"2019042510330884800_ref11","doi-asserted-by":"crossref","first-page":"D818","DOI":"10.1093\/nar\/gku987","article-title":"Mouse Tumor Biology (MTB): a database of mouse models for human cancer","volume":"43","author":"Bult","year":"2014","journal-title":"Nucleic Acids Res"},{"author":"PubMed","key":"2019042510330884800_ref12"},{"key":"2019042510330884800_ref13","doi-asserted-by":"crossref","DOI":"10.1093\/database\/bax017","article-title":"Effective biomedical document classification for identifying publications relevant to the mouse Gene Expression Database (GXD)","volume":"2017","author":"Jiang","year":"2017","journal-title":"Database"},{"key":"2019042510330884800_ref14","doi-asserted-by":"crossref","first-page":"4451","DOI":"10.1109\/ICIP.2015.7351648","article-title":"Utilizing image-based features in biomedical document classification","volume-title":"2015 IEEE International Conference on Image Processing (ICIP)","author":"Ma","year":"2015"},{"key":"2019042510330884800_ref15","article-title":"Feature generation, feature selection, classifiers, and conceptual drift for biomedical document triage","volume-title":"Thirteenth Text REtrieval Conference (TREC)","author":"Cohen","year":"2004"},{"key":"2019042510330884800_ref16","doi-asserted-by":"crossref","DOI":"10.1186\/1471-2105-6-S1-S1","article-title":"Overview of BioCreAtIvE: critical assessment of information extraction for biology","volume":"6","author":"Hirschman","year":"2005","journal-title":"BMC Bioinformatics"},{"key":"2019042510330884800_ref17","doi-asserted-by":"crossref","first-page":"3454","DOI":"10.1093\/bioinformatics\/btx439","article-title":"On expert curation and scalability: UniProtKB\/Swiss-Prot as a case study","volume":"33","author":"Poux","year":"2017","journal-title":"Bioinformatics"},{"key":"2019042510330884800_ref18","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1016\/j.ins.2013.07.007","article-title":"An insight into classification with imbalanced data: empirical results and current trends on using data intrinsic characteristics","volume":"250","author":"L\u00f3pez","year":"2013","journal-title":"Inf Sci"},{"key":"2019042510330884800_ref19","doi-asserted-by":"crossref","first-page":"224","DOI":"10.7763\/IJMLC.2013.V3.307","article-title":"Addressing the class imbalance problem in medical datasets","volume":"3","author":"Rahman","year":"2013","journal-title":"Int J Mach Learn Computing"},{"key":"2019042510330884800_ref20","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1186\/1471-2105-11-55","article-title":"Semi-automated screening of biomedical citations for systematic reviews","volume":"11","author":"Wallace","year":"2010","journal-title":"BMC Bioinformatics"},{"key":"2019042510330884800_ref21","doi-asserted-by":"crossref","first-page":"S13","DOI":"10.1186\/1471-2105-12-S8-S13","article-title":"Detection of interaction articles and experimental methods in biomedical literature","volume":"12","author":"Schneider","year":"2011","journal-title":"BMC Bioinformatics"},{"author":"Medical Subject Headings (MeSH)","key":"2019042510330884800_ref22"},{"author":"U.S. National Library of Medicine (NLM)","key":"2019042510330884800_ref23"},{"key":"2019042510330884800_ref24","doi-asserted-by":"crossref","DOI":"10.1093\/database\/bav008","article-title":"mycoCLAP, the database for characterized lignocellulose-active proteins of fungal origin: resource and text mining curation support","volume":"2015","author":"Strasser","year":"2015","journal-title":"Database"},{"key":"2019042510330884800_ref25","first-page":"139","article-title":"One-class SVMs for document classification","volume":"2","author":"Manevitz","year":"2001","journal-title":"J Mach Lear Res"},{"key":"2019042510330884800_ref26","doi-asserted-by":"crossref","first-page":"60","DOI":"10.1145\/1007730.1007739","article-title":"Extreme re-balancing for SVMs: a case study","volume":"6","author":"Raskutti","year":"2004","journal-title":"SIGKDD Explor"},{"key":"2019042510330884800_ref27","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1007730.1007733","article-title":"Special issue on learning from imbalanced data sets","volume":"6","author":"Chawla","year":"2004","journal-title":"SIGKDD Explor"},{"key":"2019042510330884800_ref28","doi-asserted-by":"crossref","first-page":"D869","DOI":"10.1093\/nar\/gkx998","article-title":"WormBase 2017: molting into a new stage","volume":"46","author":"Lee","year":"2017","journal-title":"Nucleic Acids Res"},{"key":"2019042510330884800_ref29","doi-asserted-by":"crossref","first-page":"D663","DOI":"10.1093\/nar\/gkw1016","article-title":"FlyBase at 25: looking to the future","volume":"45","author":"Gramates","year":"2016","journal-title":"Nucleic Acids Res"},{"key":"2019042510330884800_ref30","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1186\/1751-0473-7-7","article-title":"Layout-aware text extraction from full-text PDF of scientific articles","volume":"7","author":"Ramakrishnan","year":"2012","journal-title":"Source Code Biol Med"},{"key":"2019042510330884800_ref31","article-title":"The role of title, metadata and abstract in identifying clinically relevant journal articles","volume":"191","author":"Demner-Fushman","year":"2005","journal-title":"Proc. of the Annual Symp. of the American Medical Informatics Association"},{"key":"2019042510330884800_ref32","doi-asserted-by":"crossref","first-page":"e547","DOI":"10.1093\/bioinformatics\/btl261","article-title":"Accessing bioscience images from abstract sentences","volume":"22","author":"Yu","year":"2006","journal-title":"Bioinformatics"},{"key":"2019042510330884800_ref33","first-page":"217","article-title":"Meta-classification: combining multimodal classifiers","volume-title":"Pacific-Asia Conference on Knowledge Discovery and Data Mining","author":"Lin","year":"2002"},{"key":"2019042510330884800_ref34","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1075\/li.30.1.03nad","article-title":"A survey of named entity recognition and classification","volume":"30","author":"Nadeau","year":"2007","journal-title":"Lingvist Investig"},{"key":"2019042510330884800_ref35","doi-asserted-by":"crossref","first-page":"496","DOI":"10.1017\/CBO9780511809071","volume-title":"Introduction to Information Retrieval","author":"Manning","year":"2008"},{"issue":"3","key":"2019042510330884800_ref36","first-page":"768","article-title":"Cluster analysis of multivariate data: efficiency versus interpretability models","volume":"61","author":"Forgy","year":"1965","journal-title":"Biometrics"},{"key":"2019042510330884800_ref37","first-page":"278","article-title":"Random decision forests","volume-title":"IEEE Proceedings of the Third International Conference on Document Analysis and Recognition","author":"Ho","year":"1995"},{"key":"2019042510330884800_ref38","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1007\/BF00994018","article-title":"Support-vector networks","volume":"20","author":"Cortes","year":"1995","journal-title":"Mach Learn"},{"key":"2019042510330884800_ref39","first-page":"148","article-title":"Inductive learning algorithms and representations for text categorization","volume-title":"Proceedings of the Seventh ACM International Conference on Information and Knowledge Management","author":"Dumais","year":"1998"},{"key":"2019042510330884800_ref40","first-page":"604","article-title":"EpiLoc: a (working) text-based system for predicting protein subcellular location","volume-title":"Pacific Symposium on Biocomputing","author":"Brady","year":"2008"},{"key":"2019042510330884800_ref41","doi-asserted-by":"crossref","first-page":"98","DOI":"10.1145\/2382936.2382949","article-title":"OCR-based image features for biomedical image and article classification: identifying documents relevant to cis-regulatory elements","volume-title":"Proceedings of the ACM Conference on Bioinformatics, Computational Biology and Biomedicine","author":"Shatkay","year":"2012"},{"key":"2019042510330884800_ref42","doi-asserted-by":"crossref","first-page":"W518","DOI":"10.1093\/nar\/gkt441","article-title":"PubTator: a web-based text mining tool for assisting biocuration","volume":"41","author":"Wei","year":"2013","journal-title":"Nucleic Acids Res"},{"key":"2019042510330884800_ref43","doi-asserted-by":"crossref","DOI":"10.1093\/database\/bas041","article-title":"Accelerating literature curation with text-mining tools: a case study of using PubTator to curate genes in PubMed abstracts","volume":"2012","author":"Wei","year":"2012","journal-title":"Database"},{"key":"2019042510330884800_ref44","first-page":"145","article-title":"PubTator:\nA PubMedlike interactive curation system for document triage and literature\ncuration","volume-title":"Proceedings of BioCreative 2012 Workshop","author":"Wei","year":"2012"},{"key":"2019042510330884800_ref45","doi-asserted-by":"crossref","first-page":"1915","DOI":"10.1093\/bioinformatics\/btt317","article-title":"BeCAS: biomedical concept recognition services and visualization","volume":"29","author":"Nunes","year":"2013","journal-title":"Bioinformatics"},{"volume-title":"Probability and Statistics for Engineers and Scientists","year":"1993","author":"Myers","key":"2019042510330884800_ref46"},{"key":"2019042510330884800_ref47","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1145\/1656274.1656278","article-title":"The WEKA data mining software: an update","volume":"11","author":"Hall","year":"2009","journal-title":"SIGKDD Explor"},{"key":"2019042510330884800_ref48","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1145\/1961189.1961199","article-title":"LIBSVM: a library for support vector machines","volume":"2","author":"Chang","year":"2011","journal-title":"ACM Trans Intell Syst Technol"},{"key":"2019042510330884800_ref49","doi-asserted-by":"crossref","first-page":"412","DOI":"10.1093\/bioinformatics\/16.5.412","article-title":"Assessing the accuracy of prediction algorithms for classification: an overview","volume":"16","author":"Baldi","year":"2000","journal-title":"Bioinformatics"},{"key":"2019042510330884800_ref50","doi-asserted-by":"crossref","first-page":"401","DOI":"10.1186\/1471-2105-9-401","article-title":"Developing and validating predictive decision tree models from mining chemical structural fingerprints and high-throughput screening data in PubChem","volume":"9","author":"Han","year":"2008","journal-title":"BMC Bioinformatics"}],"container-title":["Database"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/database\/article-pdf\/doi\/10.1093\/database\/baz045\/28526358\/baz045.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,4,25]],"date-time":"2019-04-25T10:33:33Z","timestamp":1556188413000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/database\/article\/doi\/10.1093\/database\/baz045\/5477783"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,1,1]]},"references-count":50,"URL":"https:\/\/doi.org\/10.1093\/database\/baz045","relation":{},"ISSN":["1758-0463"],"issn-type":[{"type":"electronic","value":"1758-0463"}],"subject":[],"published-other":{"date-parts":[[2019]]},"published":{"date-parts":[[2019,1,1]]}}}