{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,18]],"date-time":"2026-04-18T00:43:22Z","timestamp":1776473002701,"version":"3.51.2"},"reference-count":18,"publisher":"Oxford University Press (OUP)","issue":"2","license":[{"start":{"date-parts":[[2020,7,28]],"date-time":"2020-07-28T00:00:00Z","timestamp":1595894400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"DOI":"10.13039\/501100006769","name":"Russian Science Foundation","doi-asserted-by":"publisher","award":["18-11-00284"],"award-info":[{"award-number":["18-11-00284"]}],"id":[{"id":"10.13039\/501100006769","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,4,19]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Drugs and diseases play a central role in many areas of biomedical research and healthcare. Aggregating knowledge about these entities across a broader range of domains and languages is critical for information extraction (IE) applications. To facilitate text mining methods for analysis and comparison of patient\u2019s health conditions and adverse drug reactions reported on the Internet with traditional sources such as drug labels, we present a new corpus of Russian language health reviews.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>The Russian Drug Reaction Corpus (RuDReC) is a new partially annotated corpus of consumer reviews in Russian about pharmaceutical products for the detection of health-related named entities and the effectiveness of pharmaceutical products. The corpus itself consists of two parts, the raw one and the labeled one. The raw part includes 1.4 million health-related user-generated texts collected from various Internet sources, including social media. The labeled part contains 500 consumer reviews about drug therapy with drug- and disease-related information. Labels for sentences include health-related issues or their absence. The sentences with one are additionally labeled at the expression level for identification of fine-grained subtypes such as drug classes and drug forms, drug indications and drug reactions. Further, we present a baseline model for named entity recognition (NER) and multilabel sentence classification tasks on this corpus. The macro F1 score of 74.85% in the NER task was achieved by our RuDR-BERT model. For the sentence classification task, our model achieves the macro F1 score of 68.82% gaining 7.47% over the score of BERT model trained on Russian data.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>We make the RuDReC corpus and pretrained weights of domain-specific BERT models freely available at https:\/\/github.com\/cimm-kzn\/RuDReC.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaa675","type":"journal-article","created":{"date-parts":[[2020,7,22]],"date-time":"2020-07-22T19:24:53Z","timestamp":1595445893000},"page":"243-249","source":"Crossref","is-referenced-by-count":26,"title":["The Russian Drug Reaction Corpus and neural models for drug reactions and effectiveness detection in user reviews"],"prefix":"10.1093","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7936-0284","authenticated-orcid":false,"given":"Elena","family":"Tutubalina","sequence":"first","affiliation":[{"name":"Chemoinformatics and Molecular Modeling Laboratory, The Alexander Butlerov Institute of Chemistry, Kazan Federal University , Kazan 420008, Russian Federation"}]},{"given":"Ilseyar","family":"Alimova","sequence":"additional","affiliation":[{"name":"Chemoinformatics and Molecular Modeling Laboratory, The Alexander Butlerov Institute of Chemistry, Kazan Federal University , Kazan 420008, Russian Federation"}]},{"given":"Zulfat","family":"Miftahutdinov","sequence":"additional","affiliation":[{"name":"Chemoinformatics and Molecular Modeling Laboratory, The Alexander Butlerov Institute of Chemistry, Kazan Federal University , Kazan 420008, Russian Federation"}]},{"given":"Andrey","family":"Sakhovskiy","sequence":"additional","affiliation":[{"name":"Chemoinformatics and Molecular Modeling Laboratory, The Alexander Butlerov Institute of Chemistry, Kazan Federal University , Kazan 420008, Russian Federation"}]},{"given":"Valentin","family":"Malykh","sequence":"additional","affiliation":[{"name":"Chemoinformatics and Molecular Modeling Laboratory, The Alexander Butlerov Institute of Chemistry, Kazan Federal University , Kazan 420008, Russian Federation"}]},{"given":"Sergey","family":"Nikolenko","sequence":"additional","affiliation":[{"name":"Chemoinformatics and Molecular Modeling Laboratory, The Alexander Butlerov Institute of Chemistry, Kazan Federal University , Kazan 420008, Russian Federation"},{"name":"Samsung-PDMI AI Center, Steklov Institute of Mathematics at St. Petersburg , St. Petersburg 191023, Russian Federation"}]}],"member":"286","published-online":{"date-parts":[[2020,7,28]]},"reference":[{"key":"2023051510593442000_btaa675-B1","first-page":"64","author":"Alimova","year":"2017"},{"key":"2023051510593442000_btaa675-B2","doi-asserted-by":"crossref","first-page":"e24","DOI":"10.2196\/publichealth.6396","article-title":"TwiMed: twitter and PubMed comparable corpus of drugs, diseases, symptoms, and their relations","volume":"3","author":"Alvaro","year":"2017","journal-title":"JMIR Public Health Surveill"},{"key":"2023051510593442000_btaa675-B3","doi-asserted-by":"crossref","first-page":"3539","DOI":"10.1093\/bioinformatics\/bty356","article-title":"D3NER: biomedical named entity recognition using CRF-biLSTM improved with fine-tuned embeddings of various linguistic information","volume":"34","author":"Dang","year":"2018","journal-title":"Bioinformatics"},{"key":"2023051510593442000_btaa675-B4","first-page":"4171","author":"Devlin","year":"2019"},{"key":"2023051510593442000_btaa675-B5","author":"Giorgi","year":"2019"},{"key":"2023051510593442000_btaa675-B6","doi-asserted-by":"crossref","first-page":"1274","DOI":"10.1093\/jamia\/ocy114","article-title":"Data and systems for medication-related text classification and concept normalization from Twitter: insights from the Social Media Mining for Health (SMM4H)-2017 shared task","volume":"25","author":"Gonzalez-Hernandez","year":"2018","journal-title":"J. Am. Med. Inf. Assoc"},{"key":"2023051510593442000_btaa675-B7","doi-asserted-by":"crossref","first-page":"baw042","DOI":"10.1093\/database\/baw042","article-title":"Chemical-induced disease relation extraction with various linguistic features","volume":"2016","author":"Gu","year":"2016","journal-title":"Database"},{"key":"2023051510593442000_btaa675-B8","doi-asserted-by":"crossref","first-page":"73","DOI":"10.1016\/j.jbi.2015.03.010","article-title":"CADEC: a corpus of adverse drug event annotations","volume":"55","author":"Karimi","year":"2015","journal-title":"J. Biomed. Inf"},{"key":"2023051510593442000_btaa675-B9","first-page":"5","author":"Klie","year":"2018"},{"key":"2023051510593442000_btaa675-B10","author":"Kuratov","year":"2019"},{"key":"2023051510593442000_btaa675-B11","doi-asserted-by":"crossref","first-page":"baw091","DOI":"10.1093\/database\/baw091","article-title":"AuDis: an automatic CRF-enhanced disease normalization in biomedical text","volume":"2016","author":"Lee","year":"2016","journal-title":"Database"},{"key":"2023051510593442000_btaa675-B12","doi-asserted-by":"crossref","first-page":"1234","DOI":"10.1093\/bioinformatics\/btz682","article-title":"BioBERT: pre-trained biomedical language representation model for biomedical text mining","author":"Lee","year":"2020","journal-title":"Bioinformatics."},{"key":"2023051510593442000_btaa675-B13","first-page":"155","author":"Miftahutdinov","year":"2017"},{"key":"2023051510593442000_btaa675-B14","first-page":"281","author":"Miftahutdinov","year":"2020"},{"key":"2023051510593442000_btaa675-B9649018","first-page":"560","volume":"1","author":"Shelmanov","year":"2015","journal-title":"Information extraction from clinical texts in Russian. In\u00a0 Komp'juternaja Lingvistika i Intellektual'nye Tehnologii,\u00a0 Conference Paper"},{"key":"2023051510593442000_btaa675-B15","doi-asserted-by":"crossref","first-page":"1397","DOI":"10.1007\/s40264-018-0707-6","article-title":"Methods to compare adverse events in twitter to FAERS, drug information databases, and systematic reviews: proof of concept with adalimumab","volume":"41","author":"Smith","year":"2018","journal-title":"Drug Safety"},{"key":"2023051510593442000_btaa675-B16","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1155\/2017\/9451342","article-title":"Combination of deep recurrent neural networks and conditional random fields for extracting adverse drug reactions from user reviews","volume":"2017","author":"Tutubalina","year":"2017","journal-title":"J. Healthc. Eng"},{"key":"2023051510593442000_btaa675-B17","doi-asserted-by":"crossref","first-page":"103091","DOI":"10.1016\/j.jbi.2018.12.005","article-title":"A systematic approach for developing a corpus of patient reported adverse drug events: a case study for SSRI and SNRI medications","volume":"90","author":"Zolnoori","year":"2019","journal-title":"J. Biomed. Inf"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaa675\/33861669\/btaa675.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/37\/2\/243\/50321457\/btaa675.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/37\/2\/243\/50321457\/btaa675.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,15]],"date-time":"2023-05-15T11:00:14Z","timestamp":1684148414000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/37\/2\/243\/5877427"}},"subtitle":[],"editor":[{"given":"Jonathan","family":"Wren","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2020,7,28]]},"references-count":18,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2021,4,19]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaa675","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2021,1,15]]},"published":{"date-parts":[[2020,7,28]]}}}