{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T02:25:59Z","timestamp":1774578359235,"version":"3.50.1"},"reference-count":40,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,11,9]],"date-time":"2022-11-09T00:00:00Z","timestamp":1667952000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,11,9]],"date-time":"2022-11-09T00:00:00Z","timestamp":1667952000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,11,9]]},"DOI":"10.23919\/fruct56874.2022.9953810","type":"proceedings-article","created":{"date-parts":[[2022,11,28]],"date-time":"2022-11-28T20:25:13Z","timestamp":1669667113000},"page":"162-168","source":"Crossref","is-referenced-by-count":14,"title":["Automated Rule-Based Data Cleaning Using NLP"],"prefix":"10.23919","author":[{"given":"Konstantinos","family":"Mavrogiorgos","sequence":"first","affiliation":[{"name":"University of Piraeus,Piraeus,Greece"}]},{"given":"Argyro","family":"Mavrogiorgou","sequence":"additional","affiliation":[{"name":"University of Piraeus,Piraeus,Greece"}]},{"given":"Athanasios","family":"Kiourtis","sequence":"additional","affiliation":[{"name":"University of Piraeus,Piraeus,Greece"}]},{"given":"Nikolaos","family":"Zafeiropoulos","sequence":"additional","affiliation":[{"name":"University of Piraeus,Piraeus,Greece"}]},{"given":"Spyridon","family":"Kleftakis","sequence":"additional","affiliation":[{"name":"University of Piraeus,Piraeus,Greece"}]},{"given":"Dimosthenis","family":"Kyriazis","sequence":"additional","affiliation":[{"name":"University of Piraeus,Piraeus,Greece"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.5455\/aim.2019.27.369-373"},{"key":"ref38","first-page":"178","article-title":"Towards a secure semantic knowledge of healthcare data through structural ontological transformations","author":"kiourtis","year":"2018","journal-title":"Joint Conference on Knowledge-Based Software Engineering"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.2196\/23099"},{"key":"ref32","article-title":"Text Mining an Automatic Short Answer Grading (ASAG), Comparison of Three Methods of Cosine Similarity, Jaccard Similarity and Dice's Coefficient","volume":"2","author":"henderi","year":"2021","journal-title":"Journal of Applied Data Sciences"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.11591\/ijece.v12i6.pp6461-6471"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/s12530-019-09286-5"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.3233\/SHTI210332"},{"key":"ref36","year":"2022","journal-title":"Kaggle - Stroke Prediction Dataset"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1186\/s12911-020-1023-5"},{"key":"ref34","year":"2022"},{"key":"ref10","article-title":"Discovery and contextual data cleaning with ontology functional dependencies","author":"zheng","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref40","first-page":"283","article-title":"be HEALTHIER: A microservices platform for analyzing and exploiting healthcare data","author":"mavrogiorgou","year":"0","journal-title":"2021 IEEE 34th International Symposium on Computer-Based Medical Systems (CBMS)"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CAC53003.2021.9727565"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3512850.3512856"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2979670"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2020.3001216"},{"key":"ref15","first-page":"1","article-title":"Alberto: Italian BERT language understanding model for NLP challenging tasks based on tweets","volume":"2481","author":"polignano","year":"0","journal-title":"6th Italian Conference on Computational Linguistics CLiC-it"},{"key":"ref16","doi-asserted-by":"crossref","DOI":"10.51391\/trva.2020.10.07","article-title":"A Literature Review of NLP Approaches to Fake News Detection and Their Applicability to RomanianLanguage News Analysis","author":"busioc","year":"2020","journal-title":"Revista Transilvania"},{"key":"ref17","first-page":"7517","article-title":"ake news detection in social media using graph neural networks and NLP techniques: A COVID-19 use-case","author":"hamid","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref18","article-title":"NLP-based feature extraction for the detection of COVID-19 misinformation videos on YouTube","author":"serrano","year":"0","journal-title":"Proceedings of the 1st Workshop on NLP for COVID-19 at ACL"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-020-19266-y"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3481646.3481648"},{"key":"ref4","first-page":"104967","volume":"181","author":"mavrogiorgou","year":"2019","journal-title":"Analyzing data and data sources towards a unified approach for ensuring end-to-end data and data sources quality in healthcare 4 0 Computer methods and programs in biomedicine"},{"key":"ref27","year":"2022","journal-title":"MongoDB MongoDB"},{"key":"ref3","year":"2017","journal-title":"Statista - Revenues from the natural language processing (NLP) market worldwide"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICICS52457.2021.9464551"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.5455\/aim.2019.27.341-347"},{"key":"ref5","first-page":"2201","author":"chu","year":"0","journal-title":"Data cleaning Overview and emerging challenges In Proceedings of the 2016 international conference on management of data"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.14778\/3476311.3476339"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.apenergy.2021.116851"},{"key":"ref2","year":"2022","journal-title":"Statista - Big data - Statistics & Facts"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1080\/02626667.2022.2144323"},{"key":"ref1","year":"2022","journal-title":"Statista - Data usage in marketing and advertising - Statistics & Facts"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.36548\/jscp.2020.4.002"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457258"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASC48083.2019.8946281"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2912574"},{"key":"ref23","article-title":"DataCLUE: A Benchmark Suite for Data-centric NLP","author":"xu","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref26","year":"2022","journal-title":"Python The python programming language"},{"key":"ref25","first-page":"266","article-title":"NLP for Product Safety Risk Assessment","author":"hellwig","year":"2021","journal-title":"International Conference on Intelligent Systems Design and Applications"}],"event":{"name":"2022 32nd Conference of Open Innovations Association (FRUCT)","location":"Tampere, Finland","start":{"date-parts":[[2022,11,9]]},"end":{"date-parts":[[2022,11,11]]}},"container-title":["2022 32nd Conference of Open Innovations Association (FRUCT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9953737\/9953804\/09953810.pdf?arnumber=9953810","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,6]],"date-time":"2023-02-06T19:29:11Z","timestamp":1675711751000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9953810\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,9]]},"references-count":40,"URL":"https:\/\/doi.org\/10.23919\/fruct56874.2022.9953810","relation":{},"subject":[],"published":{"date-parts":[[2022,11,9]]}}}