{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:35:09Z","timestamp":1772908509267,"version":"3.50.1"},"reference-count":82,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T00:00:00Z","timestamp":1759968000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T00:00:00Z","timestamp":1759968000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,9]]},"DOI":"10.1109\/dsaa65442.2025.11247969","type":"proceedings-article","created":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T18:56:45Z","timestamp":1764010605000},"page":"1-9","source":"Crossref","is-referenced-by-count":2,"title":["A Survey on Current Trends and Recent Advances in Text Anonymization"],"prefix":"10.1109","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4685-0847","authenticated-orcid":false,"given":"Tobias","family":"Deu\u00dfer","sequence":"first","affiliation":[{"name":"University of Bonn,Bonn,Germany"}]},{"given":"Lorenz","family":"Sparrenberg","sequence":"additional","affiliation":[{"name":"University of Bonn,Bonn,Germany"}]},{"given":"Armin","family":"Berger","sequence":"additional","affiliation":[{"name":"University of Bonn,Bonn,Germany"}]},{"given":"Max","family":"Hahnb\u00fcck","sequence":"additional","affiliation":[{"name":"University of Bonn,Bonn,Germany"}]},{"given":"Christian","family":"Bauckhage","sequence":"additional","affiliation":[{"name":"University of Bonn,Bonn,Germany"}]},{"given":"Rafet","family":"Sifa","sequence":"additional","affiliation":[{"name":"University of Bonn,Bonn,Germany"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Big data statistics - how much data is there in the world?","author":"Bartley","year":"2025"},{"key":"ref2","article-title":"An anonymization tool for open data publication of legal documents","volume-title":"International Workshop on Artificial Intelligence Technologies for Legal Documents","author":"Oksanen"},{"key":"ref3","article-title":"PSILENCE: A pseudonymization tool for international law","volume-title":"Proc. Workshop on CALD-Pseudo","author":"Cabrera-Diego"},{"key":"ref4","article-title":"Uncovering in-consistencies and contradictions in financial reports using large language models","volume-title":"Proc. BigData","author":"Deu\u00dfer"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-industry.69"},{"key":"ref6","article-title":"Fusing speech and language models for dementia detection","volume-title":"Proc. BigData","author":"DeuBer"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/BigData59044.2023.10386518"},{"key":"ref8","article-title":"LAW: Legal agentic workflows for custody and fund services contracts","volume-title":"Proc. CDLING","author":"Watson"},{"key":"ref9","article-title":"Informed named entity recognition decoding for generative language models","volume-title":"Proc. BigData","author":"DeuBer"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.wnut-1.1"},{"key":"ref11","article-title":"Hello, [REDACTED]\u201d: Protecting student privacy in analyses of online dis-cussion forums","volume-title":"Proc. EDM 2020","author":"Bosch"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3576050.3576070"},{"key":"ref13","volume-title":"Textwash - automated open-source text anonymisation","author":"Kleinberg","year":"2022"},{"key":"ref14","article-title":"Large language models are anonymizers","volume-title":"ICLR 2024 Workshop on Reliable and Responsible Foundation Models","author":"Staab"},{"key":"ref15","article-title":"Language models are advanced anonymizers","volume-title":"Proc. ICLR"},{"key":"ref16","article-title":"Resource-efficient anonymization of textual data via knowledge distillation from large language models","volume-title":"Proc. COLING","author":"Deu\u00dfer"},{"key":"ref17","volume-title":"DeID-GPT: Zero-shot medical text de-identification by GPT-4","author":"Liu","year":"2023"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-025-86890-3"},{"key":"ref19","volume-title":"Robust utility-preserving text anonymization based on large language models","author":"Yang","year":"2024"},{"key":"ref20","article-title":"Incognitext: Privacy-enhancing conditional text anonymization via LLM-based private attribute randomization","volume-title":"Neurips Safe Generative AI Workshop 2024","author":"Frikha"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-023-42977-3"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.artmed.2024.102845"},{"key":"ref23","article-title":"Transforrner-deid: Deidentification of free-text clinical notes with transformers","author":"Moore","year":"2023","journal-title":"PhysioNet"},{"key":"ref24","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","volume-title":"Proc. NAACL-HLT","author":"Devlin"},{"key":"ref25","author":"Liu","year":"2019","journal-title":"RoBERTa: A robustly optimized BERT pretraining approach"},{"key":"ref26","volume-title":"A comparative evaluation of transformer models for de-identification of clinical text data","author":"Meaney","year":"2022"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1055\/a-2282-4340"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.eacl-demo.22"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.jval.2023.09.2860"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.patter.2021.100255"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.privatenlp-1.8"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.1181"},{"key":"ref33","article-title":"Anonymizing medical documents with local, privacy preserving large language models: The llm-anonymizer","author":"Wiest","year":"2024","journal-title":"medRxiv"},{"key":"ref34","volume-title":"Llms-in-the-loop part 2: Expert small ai models for anonymization and de-identification of phi across multiple languages","author":"Gunay","year":"2024"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-024-81170-y"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1186\/s12911-024-02546-8"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.eacl-srw.11"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3594536.3595151"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.nllp-1.9"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3462757.3466087"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.nllp-1.16"},{"key":"ref42","doi-asserted-by":"crossref","DOI":"10.1109\/CISDB64969.2025.11010534","volume-title":"Anonymization of documents for law enforcement with machine learning","author":"Eberhardinger","year":"2025"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.3390\/sym13081490"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-naacl.157"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-70890-9_23"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN55064.2022.9892285"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096816"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.wnut-1.40"},{"key":"ref49","article-title":"Data anonymization for privacy-preserving large language model fine-tuning on call transcripts","volume-title":"Proc CALD-pseudo 2024","author":"Gardiner","year":"2024"},{"key":"ref50","article-title":"Indonesian speech content de-identification in low resource transcripts","volume-title":"Proc. Workshop in South East Asian Language Processing","author":"Abdjul"},{"key":"ref51","article-title":"De-identifying student writing with rules and transformers","volume-title":"Proc. AIED","author":"Holmes"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/3378184.3378229"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/3706468.3706493"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCoSITE57641.2023.10127762"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16290-9_29"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/s41060-021-00285-x"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.econlp-1.7"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.aacl-main.18"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.112945"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.323"},{"key":"ref61","volume-title":"Truthful text sanitization guided by inference attacks","author":"Pilan","year":"2024"},{"key":"ref62","article-title":"DP-Rewrite: Towards reproducibility and transparency in differentially private text rewriting","volume-title":"Proc. COLING 2022","author":"Igamberdiev"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.874"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.554"},{"key":"ref65","article-title":"DP- VAE: Human-readable text anonymization for online reviews with differentially private VAE","volume-title":"Proc. WWW","author":"Weggenmann"},{"key":"ref66","article-title":"Bootstrapping text anonymization models with distant supervision","volume-title":"Proc. LREC","author":"Papadopoulou"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3076632"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2022.11.016"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.privatenlp-main.2"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.87"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1016\/j.nlp.2024.100107"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1162\/coli_a_00458"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/s10618-024-01066-3"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-13945-1_12"},{"key":"ref75","volume-title":"No intruder, no validity: Evaluation criteria for privacy-preserving text anonymization","author":"Mozes","year":"2021"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.trustnlp-1.20"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/MOCAST61810.2024.10615642"},{"key":"ref78","volume-title":"Unmasking the reality of pii masking models: Performance gaps and the call for accountability","author":"Singh","year":"2025"},{"key":"ref79","volume-title":"Microsoft Presidio: Context aware, pluggable and customizable pii anonymization service for text and images","author":"Mendels","year":"2018"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.21105\/joss.05402"},{"key":"ref81","volume-title":"Gemini: A family of highly capable multimodal models","author":"Team","year":"2025"},{"key":"ref82","volume-title":"Gpt-4o system card","author":"OpenAI","year":"2024"}],"event":{"name":"2025 IEEE 12th International Conference on Data Science and Advanced Analytics (DSAA)","location":"Birmingham, United Kingdom","start":{"date-parts":[[2025,10,9]]},"end":{"date-parts":[[2025,10,12]]}},"container-title":["2025 IEEE 12th International Conference on Data Science and Advanced Analytics (DSAA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11247920\/11247921\/11247969.pdf?arnumber=11247969","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T05:58:52Z","timestamp":1764050332000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11247969\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,9]]},"references-count":82,"URL":"https:\/\/doi.org\/10.1109\/dsaa65442.2025.11247969","relation":{},"subject":[],"published":{"date-parts":[[2025,10,9]]}}}