{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T09:35:03Z","timestamp":1774517703509,"version":"3.50.1"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"S17","license":[{"start":{"date-parts":[[2018,12,1]],"date-time":"2018-12-01T00:00:00Z","timestamp":1543622400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1186\/s12859-018-2466-x","type":"journal-article","created":{"date-parts":[[2018,12,20]],"date-time":"2018-12-20T07:45:44Z","timestamp":1545291944000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":70,"title":["Using natural language processing and machine learning to identify breast cancer local recurrence"],"prefix":"10.1186","volume":"19","author":[{"given":"Zexian","family":"Zeng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sasa","family":"Espino","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ankita","family":"Roy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoyu","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Seema A.","family":"Khan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Susan E.","family":"Clare","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xia","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Richard","family":"Neapolitan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuan","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,12,28]]},"reference":[{"issue":"9503","key":"2466_CR1","first-page":"2087","volume":"366","author":"Group EBCTC","year":"2006","unstructured":"Group EBCTC. Effects of radiotherapy and of differences in the extent of surgery for early breast cancer on local recurrence and 15-year survival: an overview of the randomised trials. Lancet. 2006;366(9503):2087\u2013106.","journal-title":"Lancet"},{"issue":"5","key":"2466_CR2","doi-asserted-by":"publisher","first-page":"e0197064","DOI":"10.1371\/journal.pone.0197064","volume":"13","author":"Z Zeng","year":"2018","unstructured":"Zeng Z, Jiang X, Li X, Wells A, Luo Y, Neapolitan R. Conjugated equine estrogen and medroxyprogesterone acetate are associated with decreased risk of breast cancer relative to bioidentical hormone therapy and controls. PLoS One. 2018;13(5):e0197064.","journal-title":"PLoS One"},{"issue":"10","key":"2466_CR3","doi-asserted-by":"publisher","first-page":"1684","DOI":"10.1200\/JCO.2009.24.9284","volume":"28","author":"KD Voduc","year":"2010","unstructured":"Voduc KD, Cheang MC, Tyldesley S, Gelmon K, Nielsen TO, Kennecke H. Breast cancer subtypes and the risk of local and regional relapse. J Clin Oncol. 2010;28(10):1684\u201391.","journal-title":"J Clin Oncol"},{"issue":"1","key":"2466_CR4","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1093\/oxfordjournals.epirev.a036102","volume":"15","author":"MC Pike","year":"1993","unstructured":"Pike MC, Spicer DV, Dahmoush L, Press MF. Estrogens progestogens normal breast cell proliferation and breast cancer risk. Epidemiol Rev. 1993;15(1):17\u201335.","journal-title":"Epidemiol Rev"},{"issue":"7261","key":"2466_CR5","doi-asserted-by":"publisher","first-page":"624","DOI":"10.1136\/bmj.321.7261.624","volume":"321","author":"K McPherson","year":"2000","unstructured":"McPherson K, Steel C, Dixon J. Breast cancer-epidemiology, risk factors, and genetics. BMJ: Br Med J. 2000;321(7261):624.","journal-title":"BMJ: Br Med J"},{"issue":"33","key":"2466_CR6","doi-asserted-by":"publisher","first-page":"3744","DOI":"10.1200\/JCO.2014.55.5730","volume":"32","author":"EA Perez","year":"2014","unstructured":"Perez EA, Romond EH, Suman VJ, Jeong J-H, Sledge G, Geyer CE Jr, Martino S, Rastogi P, Gralow J, Swain SM. Trastuzumab plus adjuvant chemotherapy for human epidermal growth factor receptor 2\u2013positive breast cancer: planned joint analysis of overall survival from NSABP B-31 and NCCTG N9831. J Clin Oncol. 2014;32(33):3744\u201352.","journal-title":"J Clin Oncol"},{"issue":"6","key":"2466_CR7","doi-asserted-by":"publisher","first-page":"1746","DOI":"10.1002\/1097-0142(19940915)74:6<1746::AID-CNCR2820740617>3.0.CO;2-Y","volume":"74","author":"SJ Schnitt","year":"1994","unstructured":"Schnitt SJ, Abner A, Gelman R, Connolly JL, Recht A, Duda RB, Eberlein TJ, Mayzel K, Silver B, Harris JR. The relationship between microscopic margins of resection and the risk of local recurrence in patients with breast cancer treated with breast-conserving surgery and radiation therapy. Cancer. 1994;74(6):1746\u201351.","journal-title":"Cancer"},{"issue":"29","key":"2466_CR8","doi-asserted-by":"publisher","first-page":"3885","DOI":"10.1200\/JCO.2011.36.1105","volume":"29","author":"ND Arvold","year":"2011","unstructured":"Arvold ND, Taghian AG, Niemierko A, Abi Raad RF, Sreedhara M, Nguyen PL, Bellon JR, Wong JS, Smith BL, Harris JR. Age, breast cancer subtype approximation, and local recurrence after breast-conserving therapy. J Clin Oncol. 2011;29(29):3885\u201391.","journal-title":"J Clin Oncol"},{"issue":"15","key":"2466_CR9","doi-asserted-by":"publisher","first-page":"2127","DOI":"10.1200\/JCO.2006.10.3523","volume":"25","author":"CA Hudis","year":"2007","unstructured":"Hudis CA, Barlow WE, Costantino JP, Gray RJ, Pritchard KI, Chapman JA, Sparano JA, Hunsberger S, Enos RA, Gelber RD. Proposal for standardized definitions for efficacy end points in adjuvant breast cancer trials: the STEEP system. J Clin Oncol. 2007;25(15):2127\u201332.","journal-title":"J Clin Oncol"},{"issue":"1","key":"2466_CR10","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1186\/s12859-016-1084-8","volume":"17","author":"Z Zeng","year":"2016","unstructured":"Zeng Z, Jiang X, Neapolitan R. Discovering causal interactions using Bayesian network scoring and information gain. BMC Bioinform. 2016;17(1):221.","journal-title":"BMC Bioinform"},{"issue":"16","key":"2466_CR11","doi-asserted-by":"publisher","first-page":"1872","DOI":"10.1200\/JCO.2015.64.0334","volume":"34","author":"C Desmedt","year":"2016","unstructured":"Desmedt C, Zoppoli G, Gundem G, Pruneri G, Larsimont D, Fornili M, Fumagalli D, Brown D, Roth\u00e9 F, Vincent D. Genomic characterization of primary invasive lobular breast cancer. J Clin Oncol. 2016;34(16):1872\u201381.","journal-title":"J Clin Oncol"},{"issue":"9","key":"2466_CR12","doi-asserted-by":"publisher","first-page":"1205","DOI":"10.1093\/bioinformatics\/btq126","volume":"26","author":"JC Denny","year":"2010","unstructured":"Denny JC, Ritchie MD, Basford MA, Pulley JM, Bastarache L, Brown-Gentry K, Wang D, Masys DR, Roden DM, Crawford DC. PheWAS: demonstrating the feasibility of a phenome-wide scan to discover gene\u2013disease associations. Bioinform (Oxford, England). 2010;26(9):1205\u201310.","journal-title":"Bioinform (Oxford, England)"},{"key":"2466_CR13","volume-title":"The computer-based patient record. Institute of Medicine","author":"R Dick","year":"1991","unstructured":"Dick R, Steen E. The computer-based patient record. Institute of Medicine. Washington, DC: National Academy Press; 1991."},{"key":"2466_CR14","doi-asserted-by":"publisher","unstructured":"Zeng Z, Deng Y, Li X, Naumann T, Luo Y. Natural language processing for EHR-based computational phenotyping. IEEE\/ACM Trans Comput Biol Bioinform. 2018. p. 1\u20131. \n                    https:\/\/doi.org\/10.1109\/TCBB.2018.2849968\n                    \n                  .","DOI":"10.1109\/TCBB.2018.2849968"},{"key":"2466_CR15","doi-asserted-by":"crossref","unstructured":"Luo Y, Thompson WK, Herr TM, Zeng Z, Berendsen MA, Jonnalagadda SR, Carson MB, Starren J. Natural language processing for EHR-based pharmacovigilance: a structured review. Drug Saf. 2017;40(11):1075\u201389.","DOI":"10.1007\/s40264-017-0558-6"},{"issue":"5","key":"2466_CR16","doi-asserted-by":"publisher","first-page":"993","DOI":"10.1093\/jamia\/ocv034","volume":"22","author":"S Yu","year":"2015","unstructured":"Yu S, Liao KP, Shaw SY, Gainer VS, Churchill SE, Szolovits P, Murphy SN, Kohane IS, Cai T. Toward high-throughput phenotyping: unbiased automated feature extraction and selection from knowledge sources. J Am Med Inform Assoc. 2015;22(5):993\u20131000.","journal-title":"J Am Med Inform Assoc"},{"issue":"e2","key":"2466_CR17","doi-asserted-by":"publisher","first-page":"e334","DOI":"10.1136\/amiajnl-2013-001999","volume":"20","author":"MF Davis","year":"2013","unstructured":"Davis MF, Sriram S, Bush WS, Denny JC, Haines JL. Automated extraction of clinical traits of multiple sclerosis in electronic medical records. J Am Med Inform Assoc. 2013;20(e2):e334\u201340.","journal-title":"J Am Med Inform Assoc"},{"key":"2466_CR18","doi-asserted-by":"crossref","unstructured":"Luo Y, Xin Y, Hochberg E, Joshi R, Uzuner O, Szolovits P. Subgraph augmented non-negative tensor factorization (SANTF) for modeling clinical narrative text. J Am Med Inform Assoc. 2015;22(5):1009\u201319.","DOI":"10.1093\/jamia\/ocv016"},{"issue":"5","key":"2466_CR19","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1136\/amiajnl-2013-002443","volume":"21","author":"Y Luo","year":"2014","unstructured":"Luo Y, Sohani AR, Hochberg EP, Szolovits P. Automatic lymphoma classification with sentence subgraph mining from pathology reports. J Am Med Inform Assoc. 2014;21(5):824\u201332.","journal-title":"J Am Med Inform Assoc"},{"key":"2466_CR20","series-title":"arXiv preprint arXiv:180604818","volume-title":"Using clinical narratives and structured data to identify distant recurrences in breast Cancer","author":"Z Zeng","year":"2018","unstructured":"Zeng Z, Roy A, Li X, Espino S, Clare S, Khan S, Luo Y: Using clinical narratives and structured data to identify distant recurrences in breast Cancer. arXiv preprint arXiv:180604818 2018."},{"key":"2466_CR21","first-page":"1885","volume":"2017","author":"Z Zeng","year":"2017","unstructured":"Zeng Z, Li X, Espino S, Roy A, Kitsch K, Clare S, Khan S, Luo Y. Contralateral breast Cancer event detection using nature language processing. AMIA Ann Symp Proc. 2017;2017:1885\u201392.","journal-title":"AMIA Ann Symp Proc"},{"issue":"3","key":"2466_CR22","doi-asserted-by":"publisher","first-page":"328","DOI":"10.1197\/jamia.M3028","volume":"16","author":"X Wang","year":"2009","unstructured":"Wang X, Hripcsak G, Markatou M, Friedman C. Active computerized pharmacovigilance using natural language processing, statistics, and electronic health records: a feasibility study. J Am Med Inform Assoc. 2009;16(3):328\u201337.","journal-title":"J Am Med Inform Assoc"},{"issue":"2","key":"2466_CR23","doi-asserted-by":"publisher","first-page":"228","DOI":"10.1038\/clpt.2012.54","volume":"92","author":"K Haerian","year":"2012","unstructured":"Haerian K, Varn D, Vaidya S, Ena L, Chase H, Friedman C. Detection of pharmacovigilance-related adverse events using electronic health records and automated methods. Clin Pharmacol Ther. 2012;92(2):228\u201334.","journal-title":"Clin Pharmacol Ther"},{"issue":"18","key":"2466_CR24","doi-asserted-by":"publisher","first-page":"1335","DOI":"10.1093\/jnci\/djj363","volume":"98","author":"EB Lamont","year":"2006","unstructured":"Lamont EB, Herndon JE, Weeks JC, Henderson IC, Earle CC, Schilsky RL, Christakis NA, Cancer BLG. Measuring disease-free survival and cancer relapse using Medicare claims from CALGB breast cancer trial participants (companion to 9344). J Natl Cancer Inst. 2006;98(18):1335\u20138.","journal-title":"J Natl Cancer Inst"},{"issue":"9","key":"2466_CR25","doi-asserted-by":"publisher","first-page":"666","DOI":"10.1016\/j.annepidem.2014.06.099","volume":"24","author":"N Chawla","year":"2014","unstructured":"Chawla N, Yabroff KR, Mariotto A, McNeel TS, Schrag D, Warren JL. Limited validity of diagnosis codes in Medicare claims for identifying cancer metastases and inferring stage. Ann Epidemiol. 2014;24(9):666\u2013672. e662.","journal-title":"Ann Epidemiol"},{"issue":"2","key":"2466_CR26","doi-asserted-by":"publisher","first-page":"349","DOI":"10.1136\/amiajnl-2012-000928","volume":"20","author":"JA Strauss","year":"2013","unstructured":"Strauss JA, Chao CR, Kwan ML, Ahmed SA, Schottinger JE, Quinn VP. Identifying primary and recurrent cancers using a SAS-based natural language processing algorithm. J Am Med Inform Assoc. 2013;20(2):349\u201355.","journal-title":"J Am Med Inform Assoc"},{"issue":"4","key":"2466_CR27","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1097\/MLR.0000000000000327","volume":"53","author":"R Haque","year":"2015","unstructured":"Haque R, Shi J, Schottinger JE, Ahmed SA, Chung J, Avila C, Lee VS, Cheetham TC, Habel LA, Fletcher SW. A hybrid approach to identify subsequent breast cancer using pathology and automated health information data. Med Care. 2015;53(4):380\u20135.","journal-title":"Med Care"},{"key":"2466_CR28","doi-asserted-by":"crossref","unstructured":"Carrell DS, Halgrim S, Tran D-T, Buist DS, Chubak J, Chapman WW, Savova G. Using natural language processing to improve efficiency of manual chart abstraction in research: the case of breast cancer recurrence. Am J Epidemiol. 2014;179(6):749\u201358.","DOI":"10.1093\/aje\/kwt441"},{"issue":"11","key":"2466_CR29","doi-asserted-by":"publisher","first-page":"2979","DOI":"10.1158\/1055-9965.EPI-09-0607","volume":"18","author":"JL Bosco","year":"2009","unstructured":"Bosco JL, Lash TL, Prout MN, Buist DS, Geiger AM, Haque R, Wei F, Silliman RA, Investigators B. Breast cancer recurrence in older women five to ten years after diagnosis. Cancer Epidemiol Prev Biomarkers. 2009;18(11):2979\u201383.","journal-title":"Cancer Epidemiol Prev Biomarkers"},{"issue":"6","key":"2466_CR30","doi-asserted-by":"publisher","first-page":"R85","DOI":"10.1186\/bcr2453","volume":"11","author":"LA Habel","year":"2009","unstructured":"Habel LA, Achacoso NS, Haque R, Nekhlyudov L, Fletcher SW, Schnitt SJ, Collins LC, Geiger AM, Puligandla B, Acton L. Declining recurrence among ductal carcinoma in situ patients treated with breast-conserving surgery in the community setting. Breast Cancer Res. 2009;11(6):R85.","journal-title":"Breast Cancer Res"},{"issue":"10","key":"2466_CR31","doi-asserted-by":"publisher","first-page":"2738","DOI":"10.1200\/JCO.1996.14.10.2738","volume":"14","author":"T Saphner","year":"1996","unstructured":"Saphner T, Tormey DC, Gray R. Annual hazard rates of recurrence for breast cancer after primary therapy. J Clin Oncol. 1996;14(10):2738\u201346.","journal-title":"J Clin Oncol"},{"issue":"9","key":"2466_CR32","doi-asserted-by":"publisher","first-page":"927","DOI":"10.1200\/JCO.2015.62.3504","volume":"34","author":"M Colleoni","year":"2016","unstructured":"Colleoni M, Sun Z, Price KN, Karlsson P, Forbes JF, Th\u00fcrlimann B, Gianni L, Castiglione M, Gelber RD, Coates AS. Annual hazard rates of recurrence for breast cancer during 24 years of follow-up: results from the international breast cancer study group trials I to V. J Clin Oncol. 2016;34(9):927\u201335.","journal-title":"J Clin Oncol"},{"issue":"5","key":"2466_CR33","doi-asserted-by":"publisher","first-page":"452","DOI":"10.1097\/PHH.0000000000000349","volume":"22","author":"FK Tangka","year":"2016","unstructured":"Tangka FK, Subramanian S, Beebe MC, Weir HK, Trebino D, Babcock F, Ewing J. Cost of operating central cancer registries and factors that affect cost: findings from an economic evaluation of Centers for Disease Control and Prevention National Program of Cancer registries. J Public Health Manag Pract. 2016;22(5):452\u201360.","journal-title":"J Public Health Manag Pract"},{"key":"2466_CR34","first-page":"1","volume-title":"Metamap: mapping text to the umls metathesaurus","author":"AR Aronson","year":"2006","unstructured":"Aronson AR. Metamap: mapping text to the umls metathesaurus. Bethesda: NLM, NIH, DHHS; 2006. p. 1\u201326."},{"key":"2466_CR35","first-page":"677","volume":"192","author":"WW Chapman","year":"2013","unstructured":"Chapman WW, Hilert D, Velupillai S, Kvist M, Skeppstedt M, Chapman BE, Conway M, Tharp M, Mowery DL, Deleger L. Extending the NegEx lexicon for multiple languages. Studies Health Technol Inform. 2013;192:677.","journal-title":"Studies Health Technol Inform"},{"key":"2466_CR36","doi-asserted-by":"crossref","unstructured":"Landis JR, Koch GG. The measurement of observer agreement for categorical data. Biometrics. 1977;33(1):159\u201374.","DOI":"10.2307\/2529310"},{"issue":"4","key":"2466_CR37","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1093\/labmed\/28.4.263","volume":"28","author":"DF Cowan","year":"2015","unstructured":"Cowan DF. How templates improve quality and efficiency in surgical pathology. Lab Med. 2015;28(4):263\u20137.","journal-title":"Lab Med"},{"issue":"Suppl 1","key":"2466_CR38","first-page":"69","volume":"13","author":"Y Luo","year":"2014","unstructured":"Luo Y, Riedlinger G, Szolovits P. Text mining in cancer gene and pathway prioritization. Cancer Informat. 2014;13(Suppl 1):69.","journal-title":"Cancer Informat"}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-018-2466-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s12859-018-2466-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-018-2466-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,12,27]],"date-time":"2019-12-27T19:05:40Z","timestamp":1577473540000},"score":1,"resource":{"primary":{"URL":"https:\/\/bmcbioinformatics.biomedcentral.com\/articles\/10.1186\/s12859-018-2466-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12]]},"references-count":38,"journal-issue":{"issue":"S17","published-print":{"date-parts":[[2018,12]]}},"alternative-id":["2466"],"URL":"https:\/\/doi.org\/10.1186\/s12859-018-2466-x","relation":{},"ISSN":["1471-2105"],"issn-type":[{"value":"1471-2105","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,12]]},"assertion":[{"value":"28 December 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"This project is approved by Northwestern university IRB number STU00202167 and STU00200923-MOD0006.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare that they have no competing interests.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Publisher\u2019s Note"}}],"article-number":"498"}}