{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T02:25:43Z","timestamp":1763346343906,"version":"3.45.0"},"reference-count":30,"publisher":"Tech Science Press","issue":"3","license":[{"start":{"date-parts":[[2025,10,26]],"date-time":"2025-10-26T00:00:00Z","timestamp":1761436800000},"content-version":"vor","delay-in-days":298,"URL":"https:\/\/doi.org\/10.32604\/TSP-CROSSMARKPOLICY"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["CMC"],"published-print":{"date-parts":[[2025]]},"DOI":"10.32604\/cmc.2025.068221","type":"journal-article","created":{"date-parts":[[2025,9,22]],"date-time":"2025-09-22T08:09:24Z","timestamp":1758528564000},"page":"4629-4643","update-policy":"https:\/\/doi.org\/10.32604\/tsp-crossmarkpolicy","source":"Crossref","is-referenced-by-count":0,"title":["A Study on Re-Identification of Natural Language Data Considering Korean Attributes"],"prefix":"10.32604","volume":"85","author":[{"given":"Segyeong","family":"Bang","sequence":"first","affiliation":[]},{"given":"Soeun","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Gaeun","family":"Ahn","sequence":"additional","affiliation":[]},{"given":"Hyemin","family":"Hong","sequence":"additional","affiliation":[]},{"given":"Junhyoung","family":"Oh","sequence":"additional","affiliation":[]}],"member":"17807","published-online":{"date-parts":[[2025]]},"reference":[{"key":"ref1","unstructured":"Villalobos P, Ho A, Sevilla J, Besiroglu T, Heim L, Hobbhahn M. Will we run out of data? Limits of LLM scaling based on human-generated data. arXiv:2211.04325. 2022."},{"key":"ref2","first-page":"211","article-title":"Large language model and personal data protection: Korean cases and policies","volume":"28","author":"Chung","year":"2024","journal-title":"IT Law Res"},{"key":"ref3","unstructured":"Shin JH. Regulation on the collection and use of artificial intelligence (AI) training data: focusing on disclosed personal information [Ph.D. thesis]. Seoul, Republic of Korea: Seoul National University; 2024. (In Korean)."},{"key":"ref4","unstructured":"Personal Information Protection Commission (PIPC). PIPC releases results of preemptive inspection of some artificial intelligence services; 2024 [Internet]. [cited 2025 Jan 3]. Available from: https:\/\/www.pipc.go.kr\/eng\/user\/ltn\/new\/noticeDetail.do?bbsId=BBSMSTR_000000000001&nttId=2476."},{"key":"ref5","unstructured":"Personal Information Protection Commission (PIPC). Guidelines for pseudonymizing unstructured data; 2024 [Internet]. [cited 2025 Jan 3]. Available from: https:\/\/www.pipc.go.kr\/eng\/user\/lgp\/law\/ordinancesDetail.do?bbsId=BBSMSTR_000000000005&nttId=2699#none."},{"key":"ref6","doi-asserted-by":"crossref","first-page":"135626","DOI":"10.1109\/ACCESS.2024.3461804","article-title":"KDPII: a new Korean dialogic dataset for the deidentification of personally identifiable information","volume":"12","author":"Fei","year":"2024","journal-title":"IEEE Access"},{"key":"ref7","doi-asserted-by":"crossref","unstructured":"Hahm S, Kim H, Lee G, Park H, Lee J. Thunder-DeID: accurate and efficient de-identification framework for korean court judgments. arXiv:2506.15266. 2025.","DOI":"10.18653\/v1\/2025.findings-emnlp.682"},{"key":"ref8","unstructured":"Christen P, Schnell R, Vidanage A. Information leakage in data linkage. arXiv:2505.08596. 2025."},{"key":"ref9","doi-asserted-by":"crossref","unstructured":"Yang T, Zhu X, Gurevych I. Robust utility-preserving text anonymization based on large language models. arXiv:2407.11770. 2024.","DOI":"10.18653\/v1\/2025.acl-long.1404"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"3286623","DOI":"10.1155\/2022\/3286623","article-title":"Detecting illegal online gambling (IOG) services in the mobile environment","volume":"2022","author":"Min","year":"2022","journal-title":"Secur Commun Netw"},{"key":"ref11","series-title":"2020 IEEE International Conference on Big Data (Big Data)","first-page":"2384","article-title":"Accelerating text mining using domain-specific stop word lists","author":"Alshanik","year":"2020 Dec 10\u201313"},{"key":"ref12","series-title":"Emerging Trends in ICT for Sustainable Development: The Proceedings of NICE2020 International Conference","first-page":"3","article-title":"An intelligent chatbot using NLP and TF-IDF algorithm for text understanding applied to the medical field","author":"Soufyane","year":"2021"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"60001","DOI":"10.1063\/5.0037283","article-title":"Paragraph summarization based on word frequency","volume":"2317","author":"Kumar","year":"2021","journal-title":"AIP Conf Proc"},{"key":"ref14","series-title":"Advances in Manufacturing, Automation, Design and Energy Technologies (ICoFT 2020)","first-page":"345","article-title":"Design of smart glove for sign language interpretation using NLP and RNN","author":"Nayak","year":"2023"},{"key":"ref15","first-page":"37979","article-title":"A detailed review on word embedding techniques with emphasis on word2vec","volume":"38","author":"Johnson","year":"2024","journal-title":"Multimed Tools Appl"},{"key":"ref16","series-title":"2020 6th International Conference on Advanced Computing and Communication Systems (ICACCS)","first-page":"466","article-title":"Stopword identification and removal techniques on tc and ir applications: a survey","author":"Ladani","year":"2020 Mar 6\u20137"},{"key":"ref17","doi-asserted-by":"crossref","unstructured":"Park K, Lee J, Jang S, Jung D. An empirical study of tokenization strategies for various Korean NLP tasks. arXiv:2010.02534. 2020.","DOI":"10.18653\/v1\/2020.aacl-main.17"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"1400","DOI":"10.1148\/rg.2021210025","article-title":"Bag-of-words technique in natural language processing: a primer for radiologists","volume":"41","author":"Juluru","year":"2021","journal-title":"RadioGraphics"},{"key":"ref19","series-title":"2021 First International Conference on Advances in Computing and Future Communication Technologies (ICACFCT); 2021 Dec 16\u201317; Meerut, India","first-page":"52","article-title":"LSTM and NLP based forecasting model for stock market analysis","author":"Patel"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"4711","DOI":"10.3390\/app10144711","article-title":"Social media rumor refuter feature analysis and crowd identification based on XGBoost and NLP","volume":"10","author":"Li","year":"2020","journal-title":"Appl Sci"},{"key":"ref21","first-page":"91","article-title":"A novel framework for text preprocessing using NLP approaches and classification using random forest grid search technique for sentiment analysis","volume":"59","author":"Shrivash","year":"2025","journal-title":"Econom Comput Econom Cyberne Stud Res"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"486","DOI":"10.11591\/ijece.v10i1.pp486-493","article-title":"A novel hybrid approach of SVM combined with NLP and probabilistic neural network for email phishing","volume":"10","author":"Kumar","year":"2020","journal-title":"Int J Elect Comput Eng"},{"key":"ref23","first-page":"69","article-title":"Application of logistic regression in natural language processing","volume":"9","author":"Vimal","year":"2020","journal-title":"Int J Eng Res Technol (IJERT)"},{"key":"ref24","doi-asserted-by":"crossref","first-page":"101659","DOI":"10.1016\/j.softx.2024.101659","article-title":"Morpheme-based Korean text cohesion analyzer","volume":"26","author":"Kim","year":"2024","journal-title":"SoftwareX"},{"key":"ref25","first-page":"99","article-title":"The influence of the syllable frequency on transposed letter effect of Korean word recognition","volume":"32","author":"Lee","year":"2021","journal-title":"Korean J Cog Sci"},{"key":"ref26","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1515\/pr-2019-0008","article-title":"Politeness as normative, evaluative and discriminatory: the case of verbal hygiene discourses on correct honorifics use in South Korea","volume":"18","author":"Brown","year":"2022","journal-title":"J Polite Res"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"387","DOI":"10.1080\/00437956.2021.1957549","article-title":"Korean nominal groups: system and structure","volume":"67","author":"Martin","year":"2021","journal-title":"WORD"},{"key":"ref28","unstructured":"Covington C, He X, Honaker J, Kamath G. Unbiased statistical estimation and valid confidence intervals under differential privacy. arXiv:2110.14465. 2021."},{"key":"ref29","first-page":"28","article-title":"Application of three probability distributions to justify central limit theorem","volume":"6","author":"Okoro","year":"2023","journal-title":"Af J Math Stat Stud"},{"key":"ref30","unstructured":"Park S, Moon J, Kim S, Cho WI, Han J, Park J, et al. KLUE: Korean language understanding evaluation. arXiv:2105.09680. 2021."}],"container-title":["Computers, Materials &amp; Continua"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/cdn.techscience.cn\/files\/cmc\/2025\/TSP_CMC-85-3\/TSP_CMC_68221\/TSP_CMC_68221.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T02:22:04Z","timestamp":1763346124000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.techscience.com\/cmc\/v85n3\/64173"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":30,"journal-issue":{"issue":"3","published-online":{"date-parts":[[2025]]},"published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.32604\/cmc.2025.068221","relation":{},"ISSN":["1546-2226"],"issn-type":[{"type":"electronic","value":"1546-2226"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"2025-05-23","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-08-26","order":1,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-10-23","order":2,"name":"published","label":"Published Online","group":{"name":"publication_history","label":"Publication History"}}]}}