{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T04:28:34Z","timestamp":1772166514427,"version":"3.50.1"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2019,5,22]],"date-time":"2019-05-22T00:00:00Z","timestamp":1558483200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"},{"start":{"date-parts":[[2019,5,22]],"date-time":"2019-05-22T00:00:00Z","timestamp":1558483200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["5R01GM110597-03"],"award-info":[{"award-number":["5R01GM110597-03"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003141","name":"Consejo Nacional de Ciencia y Tecnolog\u00eda","doi-asserted-by":"crossref","award":["576333"],"award-info":[{"award-number":["576333"]}],"id":[{"id":"10.13039\/501100003141","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100003141","name":"Consejo Nacional de Ciencia y Tecnolog\u00eda","doi-asserted-by":"crossref","award":["FOINS Fronteras de la Ciencia [project 15]"],"award-info":[{"award-number":["FOINS Fronteras de la Ciencia [project 15]"]}],"id":[{"id":"10.13039\/501100003141","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Biomed Semant"],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1186\/s13326-019-0200-x","type":"journal-article","created":{"date-parts":[[2019,5,22]],"date-time":"2019-05-22T11:06:37Z","timestamp":1558523197000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Similarity corpus on microbial transcriptional regulation"],"prefix":"10.1186","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1995-1669","authenticated-orcid":false,"given":"Oscar","family":"Lithgow-Serrano","sequence":"first","affiliation":[]},{"given":"Socorro","family":"Gama-Castro","sequence":"additional","affiliation":[]},{"given":"Cecilia","family":"Ishida-Guti\u00e9rrez","sequence":"additional","affiliation":[]},{"given":"Citlalli","family":"Mej\u00eda-Almonte","sequence":"additional","affiliation":[]},{"given":"V\u00edctor H.","family":"Tierrafr\u00eda","sequence":"additional","affiliation":[]},{"given":"Sara","family":"Mart\u00ednez-Luna","sequence":"additional","affiliation":[]},{"given":"Alberto","family":"Santos-Zavaleta","sequence":"additional","affiliation":[]},{"given":"David","family":"Vel\u00e1zquez-Ram\u00edrez","sequence":"additional","affiliation":[]},{"given":"Julio","family":"Collado-Vides","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,5,22]]},"reference":[{"issue":"D1","key":"200_CR1","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1093\/nar\/gkv1156","volume":"44","author":"S Gama-Castro","year":"2016","unstructured":"Gama-Castro S, Salgado H, Santos-Zavaleta A, Ledezma-Tejeida D, Mu\u00f1iz-Rascado L, Garc\u00eda-Sotelo JS, Alquicira-Hern\u00e1ndez K, Mart\u00ednez-Flores I, Pannier L, Castro-Mondrag\u00f3n JA, Medina-Rivera A, Solano-Lira H, Bonavides-Mart\u00ednez C, P\u00e9rez-Rueda E, Alquicira-Hern\u00e1ndez S, Porr\u00f3n-Sotelo L, L\u00f3pez-Fuentes A, Hern\u00e1ndez-Koutoucheva A, Del Moral-Chavez V, Rinaldi F, Collado-Vides J. RegulonDB version 9.0: High-level integration of gene regulation, coexpression, motif clustering and beyond. Nucleic Acids Res. 2016; 44(D1):133\u201343. \n                    https:\/\/doi.org\/10.1093\/nar\/gkv1156\n                    \n                  .","journal-title":"Nucleic Acids Res"},{"key":"200_CR2","doi-asserted-by":"publisher","unstructured":"Santos-Zavaleta A, Salgado H, Gama-Castro S, S\u00e1nchez-P\u00e9rez M, G\u00f3mez-Romero L, Ledezma-Tejeida D, Garc\u00eda-Sotelo JS, Alquicira-Hern\u00e1ndez K, Mu\u00f1iz-Rascado LJ, Pe\u00f1a-Loredo P, Ishida-Guti\u00e9rrez C, Vel\u00e1zquez-Ram\u00edrez DA, Del Moral-Ch\u00e1vez V, Bonavides-Mart\u00ednez C, M\u00e9ndez-Cruz C-F, Galagan J, Collado-Vides J. RegulonDB v 10.5: tackling challenges to unify classic and high throughput knowledge of gene regulation in E. coli K-12. Nucleic Acids Res. 2018:1\u20139. \n                    https:\/\/doi.org\/10.1093\/nar\/gky1077\n                    \n                  .","DOI":"10.1093\/nar\/gky1077"},{"key":"200_CR3","first-page":"32","volume":"1","author":"E Agirre","year":"2013","unstructured":"Agirre E, Cer D, Diab M, Gonzalez-Agirre A, Guo W. SEM 2013 shared task : Semantic Textual Similarity. Second Jt Conf Lexical Comput Semant (SEM 2013). 2013; 1:32\u201343.","journal-title":"Second Jt Conf Lexical Comput Semant (SEM 2013)"},{"issue":"2","key":"200_CR4","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Van Gool L, Williams CKI, Winn J, Zisserman A. The Pascal visual object classes (VOC) challenge. Int J Comput Vis. 2010; 88(2):303\u201338. \n                    https:\/\/doi.org\/10.1007\/s11263-009-0275-4\n                    \n                  .","journal-title":"Int J Comput Vis"},{"key":"200_CR5","doi-asserted-by":"publisher","DOI":"10.4018\/978-1-61350-447-5.ch006","volume-title":"The User-Language Paraphrase Corpus","author":"PM McCarthy","year":"2012","unstructured":"McCarthy PM, McNamara DS. The User-Language Paraphrase Corpus. Cross-Disciplinary Adv Appl Nat Lang Process [Internet]. Hershey: IGI Global; 2012, pp. 73\u201389. Available from: \n                    http:\/\/services.igi-global.com\/resolvedoi\/resolve.aspx?doi=10.4018\/978-1-61350-447-5.ch006\n                    \n                  ."},{"key":"200_CR6","unstructured":"Rus V, Lintean M, Moldovan C, Baggett W. The SIMILAR Corpus: A Resource to Foster the Qualitative Understanding of Semantic Similarity of Texts. Semant Relations II Enhancing Resour Appl 8th Lang Resour Eval Conf (LREC 2012). 2012.: p. 23\u20135."},{"key":"200_CR7","unstructured":"Dolan WB, Brockett C. Automatically Constructing a Corpus of Sentential Paraphrases. In: Proc Third Int Work Paraphrasing [Internet]. Asia Federation of Natural Language Processing: 2005. p. 9\u201316. Available from: \n                    https:\/\/www.microsoft.com\/en-us\/research\/publication\/automaticallyconstructing-a-corpus-of-sentential-paraphrases\/\n                    \n                  ."},{"key":"200_CR8","doi-asserted-by":"publisher","unstructured":"Bernhard D, Gurevych I. Answering learners\u2019 questions by retrieving question paraphrases from social Q&A sites. Proc Third Work Innov Use NLP Build Educ Appl - EANL \u201908 (June). 2008:44\u201352. \n                    https:\/\/doi.org\/10.3115\/1631836.1631842\n                    \n                  .","DOI":"10.3115\/1631836.1631842"},{"key":"200_CR9","doi-asserted-by":"publisher","unstructured":"Sogancloglu G, \u00d6zt\u00fcrk H, \u00d6zg\u00fcr A. BIOSSES: A semantic sentence similarity estimation system for the biomedical domain. In: Bioinformatics: 2017. p. 49\u201358. \n                    https:\/\/doi.org\/10.1093\/bioinformatics\/btx238\n                    \n                  .","DOI":"10.1093\/bioinformatics\/btx238"},{"key":"200_CR10","unstructured":"Sinclair J. Developing linguistic corpora: a guide to good practice. 2004. \n                    https:\/\/ota.ox.ac.uk\/documents\/creating\/dlc\/chapter1.htm\n                    \n                   Accessed 16 May 2017."},{"key":"200_CR11","doi-asserted-by":"crossref","first-page":"75","DOI":"10.13053\/rcs-117-1-6","volume":"117","author":"B Karaoglan","year":"2016","unstructured":"Karaoglan B, Kisla T, Metin SK, H\u00fcrriyetoglu U, Soleymanzadeh K. Using Multiple Metrics in Automatically Building Turkish Paraphrase Corpus. Res Comput Sci. 2016; 117:75\u201383.","journal-title":"Res Comput Sci"},{"issue":"1","key":"200_CR12","first-page":"7","volume":"48","author":"P Paroubek","year":"2007","unstructured":"Paroubek P, Chaudiron S, Hirschman L. Principles of evaluation in natural language processing. Traitement Automatique des Langues. 2007; 48(1):7\u201331.","journal-title":"Traitement Automatique des Langues"},{"issue":"3","key":"200_CR13","doi-asserted-by":"publisher","first-page":"460","DOI":"10.1016\/j.jbi.2011.12.010","volume":"45","author":"D Juckett","year":"2012","unstructured":"Juckett D. A method for determining the number of documents needed for a gold standard corpus. J Biomed Inform. 2012; 45(3):460\u201370. \n                    https:\/\/doi.org\/10.1016\/j.jbi.2011.12.010\n                    \n                  .","journal-title":"J Biomed Inform"},{"key":"200_CR14","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1037\/0033-2909.112.1.155","volume":"112","author":"J Cohen","year":"1992","unstructured":"Cohen J. A power primer. Psychol Bull. 1992; 112:155\u20139. Available from: \n                    http:\/\/www.ncbi.nlm.nih.gov\/pubmed\/19565683\n                    \n                  .","journal-title":"Psychol Bull"},{"key":"200_CR15","doi-asserted-by":"publisher","first-page":"124","DOI":"10.20982\/tqmp.10.2.p0124","volume":"10","author":"M Moinester","year":"2014","unstructured":"Moinester M, Gottfried R. Sample size estimation for correlations with pre-specified confidence interval. The Quantitative Methods for Psychology. 2014; 10:124\u201330. Available from: \n                    http:\/\/www.tqmp.org\/RegularArticles\/vol10-2\/p124\n                    \n                  .","journal-title":"The Quantitative Methods for Psychology"},{"issue":"1","key":"200_CR16","first-page":"78","volume":"7","author":"CL Chuan","year":"2006","unstructured":"Chuan CL, Penyelidikan J. Sample size estimation using Krejcie and Morgan and Cohen statistical power analysis: A comparison. Jurnal Penyelidikan IPBL. 2006; 7(1):78\u201386.","journal-title":"Jurnal Penyelidikan IPBL"},{"issue":"1","key":"200_CR17","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1007\/s10579-015-9318-3","volume":"50","author":"D Jurgens","year":"2016","unstructured":"Jurgens D, Pilehvar MT, Navigli R. Cross level semantic similarity: an evaluation framework for universal measures of similarity. Lang Resour Eval. 2016; 50(1):5\u201333. \n                    https:\/\/doi.org\/10.1007\/s10579-015-9318-3\n                    \n                  .","journal-title":"Lang Resour Eval"},{"issue":"5","key":"200_CR18","doi-asserted-by":"publisher","first-page":"4777","DOI":"10.3233\/JIFS-179026","volume":"36","author":"O Lithgow-serrano","year":"2019","unstructured":"Lithgow-serrano O, Collado-Vides J. In the pursuit of semantic similarity for literature on microbial transcriptional regulation. J Intell Fuzzy Syst. 2019; 36(5):4777\u201386. \n                    https:\/\/www.doi.org\/10.3233\/JIFS-179026\n                    \n                  .","journal-title":"J Intell Fuzzy Syst"},{"key":"200_CR19","first-page":"144","volume":"2012","author":"L Deleger","year":"2012","unstructured":"Deleger L, Li Q, Lingren T, Kaiser M, Molnar K, Stoutenborough L, Kouril M, Marsolo K, Solti I. Building gold standard corpora for medical natural language processing tasks. AMIA... Ann Symp Proc \/ AMIA Symp. AMIA Symp. 2012; 2012:144\u201353.","journal-title":"AMIA... Ann Symp Proc \/ AMIA Symp. AMIA Symp"},{"key":"200_CR20","unstructured":"Torres-Moreno J-M, Sierra G, Peinl P. A German Corpus for Text Similarity Detection Tasks. 2017; 5(2). http:\/\/arxiv.org\/abs\/1703.03923."},{"issue":"1","key":"200_CR21","doi-asserted-by":"publisher","first-page":"23","DOI":"10.20982\/tqmp.08.1.p023","volume":"8","author":"KA Hallgren","year":"2012","unstructured":"Hallgren KA. Computing Inter-Rater Reliability for Observational Data: An Overview and Tutorial. Tutor Quant Methods Psychol. 2012; 8(1):23\u201334. \n                    https:\/\/doi.org\/10.20982\/tqmp.08.1.p023\n                    \n                  .","journal-title":"Tutor Quant Methods Psychol"},{"key":"200_CR22","first-page":"1","volume":"2","author":"K Gwet","year":"2002","unstructured":"Gwet K. Inter-Rater Reliability : Dependency on trait prevalence and marginal homogeneity. Stat Methods Inter-Reliability Assess. 2002; 2:1\u20139.","journal-title":"Stat Methods Inter-Reliability Assess"},{"issue":"1","key":"200_CR23","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1007\/s10579-014-9272-5","volume":"49","author":"M Vila","year":"2014","unstructured":"Vila M, Bertran M, Mart\u00ed MA, Rodr\u00edguez H. Corpus annotation with paraphrase types: new annotation scheme and inter-annotator agreement measures. Lang Resour Eval. 2014; 49(1):77\u2013105. \n                    https:\/\/doi.org\/10.1007\/s10579-014-9272-5\n                    \n                  .","journal-title":"Lang Resour Eval"},{"key":"200_CR24","doi-asserted-by":"publisher","first-page":"58","DOI":"10.3115\/1611628.1611637","volume":"August","author":"PK Bhowmick","year":"2008","unstructured":"Bhowmick PK, Mitra P, Basu A. An agreement measure for determining inter-annotator reliability of human judgements on affective text. Proc Work Hum Judgements Comput Linguist - HumanJudge \u201908. 2008; August:58\u201365. \n                    https:\/\/doi.org\/10.3115\/1611628.1611637\n                    \n                  .","journal-title":"Proc Work Hum Judgements Comput Linguist - HumanJudge \u201908"},{"key":"200_CR25","doi-asserted-by":"publisher","first-page":"276","DOI":"10.11613\/BM.2012.031","volume":"22","author":"ML Mchugh","year":"2012","unstructured":"Mchugh ML. Interrater reliability : the kappa statistic Importance of measuring interrater reliability Measurement of interrater reliability. Biochem Med (Zagreb). 2012; 22:276\u201382.","journal-title":"Biochem Med (Zagreb)"},{"issue":"5","key":"200_CR26","doi-asserted-by":"publisher","first-page":"378","DOI":"10.1037\/h0031619","volume":"76","author":"JL Fleiss","year":"1971","unstructured":"Fleiss JL. Measuring nominal scale agreement among many raters. Psychol Bull. 1971; 76(5):378\u201382. \n                    https:\/\/doi.org\/10.1037\/h0031619\n                    \n                  .","journal-title":"Psychol Bull"},{"issue":"1","key":"200_CR27","doi-asserted-by":"publisher","first-page":"3","DOI":"10.2466\/pr0.1966.19.1.3","volume":"19","author":"JJ Bartko","year":"1966","unstructured":"Bartko JJ. The Intraclass Correlation Coefficient as a Measure of Reliability. Psychol Rep. 1966; 19(1):3\u201311. \n                    https:\/\/doi.org\/10.2466\/pr0.1966.19.1.3\n                    \n                  .","journal-title":"Psychol Rep"},{"key":"200_CR28","volume-title":"Rank Correlation Methods","author":"MG Kendall","year":"1948","unstructured":"Kendall MG. Rank Correlation Methods. Oxford, England: Griffin; 1948."},{"issue":"1","key":"200_CR29","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1177\/001316446002000104","volume":"20","author":"J Cohen","year":"1960","unstructured":"Cohen J. A coefficient of agreement for nominal scales. Educ Psychol Meas. 1960; 20(1):37\u201346. \n                    https:\/\/doi.org\/10.1177\/001316446002000104\n                    \n                  .","journal-title":"Educ Psychol Meas"},{"key":"200_CR30","first-page":"1","volume":"1","author":"K Gwet","year":"2002","unstructured":"Gwet K. Kappa statistic is not satisfactory for assessing the extent of agreement between raters. Stat Methods Inter-Reliability Assess. 2002; 1:1\u20135.","journal-title":"Stat Methods Inter-Reliability Assess"},{"issue":"1","key":"200_CR31","doi-asserted-by":"publisher","first-page":"159","DOI":"10.2307\/2529310","volume":"33","author":"JR Landis","year":"1977","unstructured":"Landis JR, Koch GG. The measurement of observer agreement for categorical data. Biometrics. 1977; 33(1):159. \n                    https:\/\/doi.org\/10.2307\/2529310\n                    \n                  .","journal-title":"Biometrics"},{"key":"200_CR32","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1186\/1471-2288-13-61","volume":"13","author":"N Wongpakaran","year":"2013","unstructured":"Wongpakaran N, Wongpakaran T, Wedding D, Gwet KL. A comparison of Cohen\u2019s Kappa and Gwet\u2019s AC1 when calculating inter-rater reliability coefficients: a study conducted with personality disorder samples. BMC Med Res Methodol. 2013; 13:61. Available from: \n                    https:\/\/bmcmedresmethodol.biomedcentral.com\/articles\/10.1186\/1471-2288-13-61\n                    \n                  .","journal-title":"BMC Med Res Methodol"},{"key":"200_CR33","doi-asserted-by":"publisher","first-page":"430","DOI":"10.1016\/0010-0285(72)90016-3","volume":"3","author":"D Kahneman","year":"1972","unstructured":"Kahneman D, Tversky A. Subjective probability: A judgment of representativeness. Cogn Psychol. 1972; 3:430\u201354. Available from: \n                    https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/0010028572900163\n                    \n                  .","journal-title":"Cogn Psychol"},{"key":"200_CR34","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1037\/h0055737","volume":"49","author":"CE Osgood","year":"1952","unstructured":"Osgood CE. The nature and measurement of meaning. Psychol Bull. 1952; 49:197\u2013237. Available from: \n                    https:\/\/doi.org\/10.1037\/h0055737\n                    \n                  .","journal-title":"Psychol Bull"},{"key":"200_CR35","doi-asserted-by":"publisher","first-page":"683","DOI":"10.1080\/00048402.2012.728233","volume":"91","author":"AMC Isaac","year":"2013","unstructured":"Isaac AMC. Objective Similarity and Mental Representation. Australas J Philos. 2013; 91:683\u2013704. Available from: \n                    http:\/\/www.tandfonline.com\/doi\/abs\/10.1080\/00048402.2012.728233\n                    \n                  .","journal-title":"Australas J Philos"},{"key":"200_CR36","doi-asserted-by":"crossref","unstructured":"Rubenstein H, Goodenoug JB. Contextual correlates of synonymy. Commun ACM. 1965; 8(10).","DOI":"10.1145\/365628.365657"}],"container-title":["Journal of Biomedical Semantics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13326-019-0200-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s13326-019-0200-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13326-019-0200-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,20]],"date-time":"2020-05-20T19:40:50Z","timestamp":1590003650000},"score":1,"resource":{"primary":{"URL":"https:\/\/jbiomedsem.biomedcentral.com\/articles\/10.1186\/s13326-019-0200-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,5,22]]},"references-count":36,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2019,12]]}},"alternative-id":["200"],"URL":"https:\/\/doi.org\/10.1186\/s13326-019-0200-x","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/219014","asserted-by":"object"}]},"ISSN":["2041-1480"],"issn-type":[{"value":"2041-1480","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,5,22]]},"assertion":[{"value":"9 February 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 April 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 May 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Not applicable.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare that they have no competing interests.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Publisher\u2019s Note"}}],"article-number":"8"}}