{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T16:25:44Z","timestamp":1761582344874,"version":"3.40.3"},"publisher-location":"Cham","reference-count":37,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031579158"},{"type":"electronic","value":"9783031579165"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-57916-5_13","type":"book-chapter","created":{"date-parts":[[2024,4,8]],"date-time":"2024-04-08T03:10:10Z","timestamp":1712545810000},"page":"145-158","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Discovering Personally Identifiable Information in\u00a0Textual Data - A Case Study with\u00a0Automated Concatenation of\u00a0Embeddings"],"prefix":"10.1007","author":[{"given":"Md Hasan","family":"Shahriar","sequence":"first","affiliation":[]},{"given":"Abrar Hasin","family":"Kamal","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6587-5313","authenticated-orcid":false,"given":"Anne V. D. M.","family":"Kayem","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,4,9]]},"reference":[{"key":"13_CR1","doi-asserted-by":"publisher","unstructured":"Ahmad, F., et al.: A deep learning architecture for psychometric natural language processing. In: ACM Trans. Inf. Syst. 38(1) ISSn: 1046-8188 (2020). https:\/\/doi.org\/10.1145\/3365211 URL: https:\/\/doi.org\/10.1145\/3365211","DOI":"10.1145\/3365211"},{"key":"13_CR2","unstructured":"Allen, J.F.: Natural language processing. In: Encyclopedia of Computer Science. GBR: John Wiley and Sons Ltd., pp. 1218\u20131222. isbn: 0470864125 (2003)"},{"key":"13_CR3","unstructured":"Cohen, W.,: Enron email dataset. carnegie mellon university, 2015. url: https:\/\/www.cs.cmu.edu\/~enron. (Accessed: 8 Mar 2023)"},{"key":"13_CR4","unstructured":"Devlin, J., et al.: BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In: CoRR abs\/1810.04805 (2018). arXiv: 1810.04805. URL: http:\/\/arxiv.org\/abs\/1810.04805"},{"key":"13_CR5","doi-asserted-by":"publisher","unstructured":"Dong, J.: Natural language processing pretraining language model for computer intelligent recognition technology\u201d. In: ACM Trans. Asian Low-Resour. Lang. Inf. Process. (2023). issn: 2375-4699. https:\/\/doi.org\/10.1145\/3605210. URL: https:\/\/doi.org\/10.1145\/3605210","DOI":"10.1145\/3605210"},{"key":"13_CR6","doi-asserted-by":"publisher","unstructured":"Garfinkel, S.: De-identification of personal information. (2015), p. 30. https:\/\/doi.org\/10.6028\/NIST.IR.8053","DOI":"10.6028\/NIST.IR.8053"},{"key":"13_CR7","unstructured":"DSGVO Germany. General data protection regulation (GDPR) (2018). URL: https:\/\/gdpr-info.eu"},{"key":"13_CR8","doi-asserted-by":"publisher","unstructured":"Gillette, J.B., et al.: Data protections for minors with named entity recognition. In: 2022 IEEE International Conference on Big Data (Big Data). (2022), pp. 3315-3323. https:\/\/doi.org\/10.1109\/BigData55660.2022.10021086","DOI":"10.1109\/BigData55660.2022.10021086"},{"key":"13_CR9","doi-asserted-by":"publisher","unstructured":"Hamza, R., Zettsu, K.: Investigation on privacy-preserving techniques for personal data. In: Proceedings of the 2021 Workshop on Intelligent Cross-Data Analysis and Retrieval. ICDAR \u201921. Taipei, Taiwan: Association for Computing Machinery (2021), pp. 62\u201366. isbn: 9781450385299. https:\/\/doi.org\/10.1145\/3463944.3469267","DOI":"10.1145\/3463944.3469267"},{"key":"13_CR10","doi-asserted-by":"publisher","unstructured":"Hassan, S.U., Ahamed, J., Ahmad, K.: Analytics of machine learning-based algorithms for text classification. In: Sustainable Operations and Computers 3 (2022), pp. 238\u2013248. issn: 2666-4127. https:\/\/doi.org\/10.1016\/j.susoc.2022.03.001. URL: https:\/\/www.sciencedirect.com\/science\/article\/pii\/S2666412722000101","DOI":"10.1016\/j.susoc.2022.03.001"},{"key":"13_CR11","doi-asserted-by":"publisher","unstructured":"Hathurusinghe, R.: Building a personally identifiable information recognizer in a privacy preserved manner using automated annotation and federated learning (2020). https:\/\/doi.org\/10.20381\/ruor-25235. URL: http:\/\/hdl.handle.net\/10393\/41011","DOI":"10.20381\/ruor-25235"},{"key":"13_CR12","doi-asserted-by":"publisher","unstructured":"Honnibal, M., et al.: SpaCy: industrial-strength natural language processing in python. In: (2020). https:\/\/doi.org\/10.5281\/zenodo.1212303. Accessed 08 Mar 2023","DOI":"10.5281\/zenodo.1212303"},{"key":"13_CR13","doi-asserted-by":"publisher","unstructured":"Kulkarni, P., Cauvery, N.K.: Personally identifiable information (PII) detection in the unstructured large text corpus using natural language processing and unsupervised learning technique. In: Int. J. Adv. Comput. Sci. App. 12(9) (2021). https:\/\/doi.org\/10.14569\/IJACSA.2021.0120957. URL: http:\/\/dx.doi.org\/10.14569\/IJACSA.2021.0120957","DOI":"10.14569\/IJACSA.2021.0120957"},{"key":"13_CR14","unstructured":"Lample, G., Conneau, A.: Cross-lingual language model pretraining (2019). arXiv: 1901.07291 [cs.CL]"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Lewis, M., et al.: BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension (2019). arXiv: 1910.13461 [cs.CL]","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"13_CR16","doi-asserted-by":"publisher","unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach (2019). https:\/\/doi.org\/10.48550\/ARXIV.1907.11692. URL: https:\/\/arxiv.org\/abs\/1907.11692","DOI":"10.48550\/ARXIV.1907.11692"},{"key":"13_CR17","doi-asserted-by":"publisher","unstructured":"Liu. Y., et al.: Automated PII extraction from social media for raising privacy awareness: a deep transfer learning approach. In: 2021 IEEE International Conference on Intelligence and Security Informatics (ISI) (2021), pp. 1\u20136. https:\/\/doi.org\/10.1109\/ISI53945.2021.9624678","DOI":"10.1109\/ISI53945.2021.9624678"},{"key":"13_CR18","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization (2019). arXiv: 1711.05101 [cs.LG]"},{"key":"13_CR19","doi-asserted-by":"publisher","unstructured":"McCallister, E., Grance, T., Scarfone, K.: Guide to protecting the confidentiality of personally identifiable information (PII), pp. 2\u20131 (2010). https:\/\/doi.org\/10.6028\/NIST.SP.800-122","DOI":"10.6028\/NIST.SP.800-122"},{"key":"13_CR20","unstructured":"McCann, B., et al.: Learned in translation: contextualized word vectors. In: Proceedings of the 31st International Conference on Neural Information Processing Systems. NIPS\u201917. Long Beach, California, USA: Curran Associates Inc., pp. 6297\u20136308 (2017). isbn: 9781510860964"},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"Mohan, V.: Text mining: open source tokenization tools: an analysis. In: 3, pp. 37\u201347 (2016)","DOI":"10.5121\/acii.2016.3104"},{"key":"13_CR22","unstructured":"MongoDB.: Unstructured Data. Dec (2023). url: https:\/\/www.mongodb.com\/unstructured-data"},{"key":"13_CR23","doi-asserted-by":"publisher","unstructured":"Ratinov, L., Roth, D.: Design challenges and misconceptions in named entity recognition. In: Proceedings of the Thirteenth Conference on Computational Natural Language Learning. CoNLL \u201909. Boulder, Colorado: Association for Computational Linguistics, pp. 147\u2013155. (2009) isbn: 9781932432299. https:\/\/doi.org\/10.3115\/1596374.1596399","DOI":"10.3115\/1596374.1596399"},{"key":"13_CR24","doi-asserted-by":"publisher","unstructured":"Sarikaya, R., Hinton, G.E., Deoras, A.: Application of deep belief networks for natural language understanding. In: IEEE\/ACM Trans. Audio, Speech and Lang. Proc. 22(4), pp. 778\u2013784 (2014) issn: 2329-9290. https:\/\/doi.org\/10.1109\/TASLP.2014.2303296. URL: https:\/\/doi.org\/10.1109\/TASLP.2014.2303296","DOI":"10.1109\/TASLP.2014.2303296"},{"key":"13_CR25","unstructured":"da Silva, C.J.A.P.: Detecting and protecting personally identifiable information through machine learning techniques (2020). URL: https:\/\/hdl.handle.net\/10216\/129033"},{"key":"13_CR26","unstructured":"Sang, E.F.T.K.: Introduction to the CoNLL-2002 shared task: language-independent named entity recognition. In: COLING-02: The 6th Conference on Natural Language Learning 2002 (CoNLL-2002) (2002). URL: https:\/\/aclanthology.org\/W02-2024"},{"key":"13_CR27","unstructured":"Sang, E.F.T.K., De Meulder, F.: Introduction to the CoNLL-2003 shared task: language-independent named entity recognition. In: Proceedings of the Seventh Conference on Natural Language Learning at HLT-NAACL 2003, pp. 142\u2013147 (2003) URL: https:\/\/aclanthology.org\/W03-0419"},{"key":"13_CR28","unstructured":"Tziortziotis, N., Dimitrakakis, C., Blekas, K.: Cover tree bayesian reinforcement learning. In: J. Mach. Learn. Res. 15(1), pp. 2313\u20132335 (2014) issn: 1532-4435"},{"key":"13_CR29","doi-asserted-by":"publisher","unstructured":"Vajjala, S., Balasubramaniam, R.: What do we really know about state of the art NER? (2022). https:\/\/doi.org\/10.48550\/ARXIV.2205.00034. URL: https:\/\/arxiv.org\/abs\/2205.00034","DOI":"10.48550\/ARXIV.2205.00034"},{"key":"13_CR30","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Adv. Neural Inf. process. syst. 30 (2017)"},{"key":"13_CR31","unstructured":"Wang, X., Jiang, Y.: Automated concatenation of embeddings for structured prediction (2022). URL: https:\/\/github.com\/Alibaba-NLP\/ACE"},{"key":"13_CR32","unstructured":"Xinyu, W., et al.: Automated concatenation of embeddings for structured prediction. In: ArXiv abs\/2010.05006 (2020). URL: https:\/\/api.semanticscholar.org\/CorpusID:222290783"},{"key":"13_CR33","doi-asserted-by":"publisher","unstructured":"Xinyu, W., et al.: Automated concatenation of embeddings for structured prediction. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Vol 1: Long Papers). Online: Association for Computational Linguistics, Sept., pp. 2643\u20132660 (2021) https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.206. URL: https:\/\/aclanthology.org\/2021.acl-long.206","DOI":"10.18653\/v1\/2021.acl-long.206"},{"key":"13_CR34","doi-asserted-by":"publisher","unstructured":"Wei, Y-C., Liao, T-Y., Wu, W-C.: Using machine learning to detect PII from attributes and supporting activities of information assets. In: J. Supercomputing 78 , pp. 9392\u20139413 (2022). https:\/\/doi.org\/10.1007\/s11227-021-04239-9","DOI":"10.1007\/s11227-021-04239-9"},{"key":"13_CR35","doi-asserted-by":"publisher","unstructured":"Woo, S., et al.: I\u2019ve got your packages: harvesting customers\u2019 delivery order information using package tracking number enumeration attacks. In: Proceedings of The Web Conference 2020. WWW \u201920. Taipei, Taiwan: Association for Computing Machinery, 2020, pp. 2948-2954. isbn: 9781450370233. https:\/\/doi.org\/10.1145\/3366423.3380062. URL: https:\/\/doi.org\/10.1145\/3366423.3380062","DOI":"10.1145\/3366423.3380062"},{"key":"13_CR36","doi-asserted-by":"publisher","unstructured":"Yang, Z., et al.: XLNet: generalized autoregressive pretraining for language understanding (2019). https:\/\/doi.org\/10.48550\/ARXIV.1906.08237. URL: https:\/\/arxiv.org\/abs\/1906.08237","DOI":"10.48550\/ARXIV.1906.08237"},{"key":"13_CR37","doi-asserted-by":"publisher","unstructured":"Zini El, J., Awad, M.: On the explainability of natural language processing deep models. In: ACM Comput. Surv. 55.5 (Dec. 2022). issn: 0360-0300. https:\/\/doi.org\/10.1145\/3529755. URL: https:\/\/doi.org\/10.1145\/3529755","DOI":"10.1145\/3529755"}],"container-title":["Lecture Notes on Data Engineering and Communications Technologies","Advanced Information Networking and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-57916-5_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,8]],"date-time":"2024-04-08T03:21:41Z","timestamp":1712546501000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-57916-5_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031579158","9783031579165"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-57916-5_13","relation":{},"ISSN":["2367-4512","2367-4520"],"issn-type":[{"type":"print","value":"2367-4512"},{"type":"electronic","value":"2367-4520"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"9 April 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AINA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Advanced Information Networking and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kitakyushu","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 April 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 April 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"38","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aina2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/voyager.ce.fit.ac.jp\/conf\/aina\/2024\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}