{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T16:24:26Z","timestamp":1778343866576,"version":"3.51.4"},"reference-count":94,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/access.2021.3124163","type":"journal-article","created":{"date-parts":[[2021,10,28]],"date-time":"2021-10-28T19:30:16Z","timestamp":1635449416000},"page":"147600-147612","source":"Crossref","is-referenced-by-count":31,"title":["Review: Privacy-Preservation in the Context of Natural Language Processing"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4950-6876","authenticated-orcid":false,"given":"Darshini","family":"Mahendran","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0875-9380","authenticated-orcid":false,"given":"Changqing","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bridget T.","family":"Mcinnes","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref73","first-page":"3853","article-title":"Differential privacy for text analytics via natural text sanitization","author":"yue","year":"2021","journal-title":"Proc Findings Assoc Comput Linguistics"},{"key":"ref72","first-page":"82","article-title":"Broadening the scope of differential privacy using metrics","author":"konstantinos","year":"2013","journal-title":"Proc Int Symp Privacy Enhancing Technol Symp"},{"key":"ref71","first-page":"123","article-title":"Generalised differential privacy for text document processing","author":"natasha","year":"2019","journal-title":"Proc Int Conf Princ Secur Trust"},{"key":"ref70","first-page":"2355","article-title":"Differentially private representation for NLP: Formal guarantee and an empirical study on privacy and fairness","author":"lyu","year":"2020","journal-title":"Proc Findings Assoc Comput Linguistics"},{"key":"ref76","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","volume":"1","author":"devlin","year":"2019","journal-title":"Proc Conf North Amer Chapter Assoc Comput Linguistics Hum Lang Technol"},{"key":"ref77","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"alec","year":"2019","journal-title":"Proc OpenAI Blog"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/FOCS.2007.66"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1136\/amiajnl-2013-001628"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1136\/amiajnl-2011-000203"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/SP40000.2020.00095"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P15-2079"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1136\/jamia.2009.002691"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2015.07.001"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1197\/jamia.M2444"},{"key":"ref30","first-page":"457","article-title":"Getting the data in: Three year experience with a pediatric electronic medical record system","author":"kohane","year":"1994","journal-title":"Proc Annu Symp Comput Appl Med Care"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3054479"},{"key":"ref36","article-title":"MIMIC-III, a freely accessible critical care database","volume":"3","author":"alistair","year":"2016","journal-title":"Data Science Journal"},{"key":"ref35","first-page":"142","article-title":"Introduction to the CoNLL-2003 shared task: Language-independent named entity recognition","volume":"4","author":"sang","year":"2003","journal-title":"Proceeding of the 7th Conference on Natural Language Learning"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2017.06.011"},{"key":"ref60","first-page":"884","article-title":"A step towards usable privacy policy: Automatic alignment of privacy statements","author":"fei","year":"2014","journal-title":"Proc COLING"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.privatenlp-1.3"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1500"},{"key":"ref63","first-page":"3111","article-title":"Distributed representations of words and phrases and their compositionality","author":"mikolov","year":"2013","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2021.3099755"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/RE48521.2020.00025"},{"key":"ref27","first-page":"1","article-title":"BPEmb: Tokenization-free pre-trained subword embeddings in 275 languages","author":"benjamin","year":"2018","journal-title":"Proc 11th Int Conf Lang Resour Eval"},{"key":"ref65","first-page":"1","article-title":"Analyzing vocabulary intersections of expert annotations and topic models for data practices in privacy policies","author":"frederick","year":"2016","journal-title":"Proc AAAI"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1001"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/2808719.2808752"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1145\/1401890.1401959"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/TETC.2020.2983404"},{"key":"ref69","first-page":"2030","article-title":"Domain-adversarial training of neural networks","volume":"17","author":"ganin","year":"2015","journal-title":"J Mach Learn Res"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOMWKSHPS50562.2020.9162683"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3336191.3371881"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1093\/jamia\/ocw156"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106649"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2017.10.003"},{"key":"ref24","first-page":"1","article-title":"Performance of automatic de-identification across different note types","volume":"abs 2102 11032","author":"nicholas","year":"2021","journal-title":"CoRR"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-2017"},{"key":"ref26","first-page":"1638","article-title":"Contextual string embeddings for sequence labeling","author":"alan","year":"2018","journal-title":"Proc 27th Int Conf Comput Linguistics"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2020.106779"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1609\/aimag.v29i3.2157"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1023\/A:1009953814988"},{"key":"ref94","article-title":"Solutions to big data privacy and security challenges associated with COVID-19 surveillance systems","author":"vibhushinie","year":"0","journal-title":"Frontiers in Big Data"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1145\/3343038"},{"key":"ref92","first-page":"1","article-title":"Publicly available clinical BERT embeddings","volume":"abs 1904 3323","author":"emily","year":"2019","journal-title":"CoRR"},{"key":"ref91","doi-asserted-by":"crossref","first-page":"1234","DOI":"10.1093\/bioinformatics\/btz682","article-title":"BioBERT: A pre-trained biomedical language representation model for biomedical text mining","volume":"36","author":"lee","year":"2020","journal-title":"Bioinformatics"},{"key":"ref90","doi-asserted-by":"crossref","first-page":"281","DOI":"10.1007\/s00766-013-0190-7","article-title":"Eddy, a formal language for specifying and analyzing data flow specifications for conflicting privacy requirements","volume":"19","author":"travisd","year":"2014","journal-title":"Requirements Eng"},{"key":"ref59","first-page":"19","article-title":"Towards an information type lexicon for privacy policies","author":"jaspreet","year":"2015","journal-title":"Proc IEEE Int Workshop Requirement Eng Law (RELAW)"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/2393596.2393608"},{"key":"ref57","article-title":"Automatic categorization of privacy policies: A pilot study","author":"waleed","year":"2012"},{"key":"ref56","first-page":"9","article-title":"Towards usable privacy policies: Semi-automatically extracting data practices from websites&#x2019; privacy policies","author":"norman","year":"2014","journal-title":"Proc SOUPS"},{"key":"ref55","article-title":"The usable privacy policy project","author":"norman","year":"2013"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1294"},{"key":"ref53","first-page":"6","article-title":"Data analysis in public social networks","volume":"1","author":"lubos","year":"2012","journal-title":"Proc Conf Int Workshop Trends Innov"},{"key":"ref52","first-page":"1025","article-title":"Inductive representation learning on large graphs","author":"williaml","year":"2017","journal-title":"Proc 31st Int Conf Neural Inf Process Syst"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"680","DOI":"10.5858\/2003-127-680-CMDS","article-title":"Concept-match medical data scrubbing: How pathology text can be used in research","volume":"127","author":"jules","year":"2003","journal-title":"Arch Pathol Lab Med"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1186\/1472-6947-6-12"},{"key":"ref40","first-page":"25","article-title":"Towards robust and privacy-preserving text representations","volume":"2","author":"yitong","year":"2018","journal-title":"Proc Annual Meeting of the Assoc Computational Linguistics"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.comcom.2020.07.032"},{"key":"ref13","first-page":"1300","article-title":"De-identification in natural language processing","author":"veronika","year":"2014","journal-title":"Proc 37th Int Conv Inf Commun Technol Electron Microelectron"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1197\/jamia.M2441"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2015.06.007"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1145\/1060745.1060764"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2015.08.012"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-1130"},{"key":"ref17","article-title":"Clinical records de-identification using CRF and rule-based approaches","author":"cyril","year":"0"},{"key":"ref84","first-page":"275","article-title":"Overview of PAN&#x2019;17","author":"martin","year":"2017","journal-title":"Proc Int Conf Cross-Lang Eval Forum Eur Lang"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2015.06.009"},{"key":"ref83","first-page":"1015","article-title":"Multilingual hierarchical attention networks for document classification","author":"nikolaos","year":"2017","journal-title":"Proc 8th Int Joint Conf Natural Lang Process"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2015.06.015"},{"key":"ref80","article-title":"A universal part-of-speech tagset","author":"petrov","year":"2011","journal-title":"ArXiv 1104 2086"},{"key":"ref89","first-page":"1631","article-title":"Recursive deep models for semantic compositionality over a sentiment treebank","author":"richard","year":"2013","journal-title":"Proc Conf Empirical Methods Natural Lang Process"},{"key":"ref4","first-page":"1051","article-title":"An introduction to NLP-based textual anonymisation","author":"ben","year":"2006","journal-title":"Proc 5th Int Conf Lang Resour Eval"},{"key":"ref3","first-page":"1","article-title":"Better privacy indicators: A new approach to quantification of privacy policies","author":"manar","year":"2016","journal-title":"Proc Symp Usable Privacy Secur"},{"key":"ref6","first-page":"7038","article-title":"Hitzalmed: Anonymisation of clinical text in Spanish","author":"salvadorlima","year":"2020","journal-title":"Proc 12nd Lang Resour Eval Conf"},{"key":"ref5","first-page":"333","article-title":"Replacing personally-identifying information in medical records, the scrub system","author":"latanya","year":"1996","journal-title":"Proc AMIA Annu Fall Symp Amer Med Inform Assoc"},{"key":"ref85","first-page":"199","article-title":"Effects of age and gender on blogging","volume":"6","author":"jonathan","year":"2006","journal-title":"Proc AAAI Spring Symp Comput Approaches Analyzing Weblogs"},{"key":"ref8","first-page":"827","article-title":"Rule-based information extraction is dead long live rule-based information extraction systems","author":"laura","year":"2013","journal-title":"Proc Conf Empirical Methods Natural Lang Process"},{"key":"ref86","article-title":"HS$^{3}\\text{D}$ : Homo sapiens splice site data set","author":"pasquale","year":"2003"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1186\/s12911-019-0867-z"},{"key":"ref49","first-page":"40","article-title":"Revisiting semi-supervised learning with graph embeddings","author":"zhilin","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-018-9431-1"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5446"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1023\/A:1010933404324"},{"key":"ref46","first-page":"107","article-title":"SpaML: A bimodal ensemble learning spam detector based on NLP techniques","author":"fattahi","year":"2021","journal-title":"Proc IEEE 5th Int Conf Cryptogr Secur Privacy (CSP)"},{"key":"ref45","first-page":"1","article-title":"Finding clues for your secrets: Semantics-driven, learning-based privacy discovery in mobile apps","author":"yuhong","year":"2018","journal-title":"Proc NDSS"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/2034691.2034742"},{"key":"ref47","first-page":"1","article-title":"Privacy-preserving graph convolutional networks for text classification","volume":"abs 2102 9604","author":"timour","year":"2021","journal-title":"CoRR"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3085228.3085259"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/3230833.3230845"},{"key":"ref44","first-page":"55","article-title":"The Stanford CoreNLP natural language processing toolkit","author":"christopher","year":"2014","journal-title":"Proc 52nd Annu Meeting Assoc Comput Linguistics Syst Demonstrations"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/219717.219748"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/9312710\/09592788.pdf?arnumber=9592788","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,11]],"date-time":"2023-11-11T22:08:02Z","timestamp":1699740482000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9592788\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":94,"URL":"https:\/\/doi.org\/10.1109\/access.2021.3124163","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}