{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T17:13:17Z","timestamp":1743095597051,"version":"3.40.3"},"publisher-location":"Cham","reference-count":13,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031429408"},{"type":"electronic","value":"9783031429415"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-42941-5_23","type":"book-chapter","created":{"date-parts":[[2023,8,30]],"date-time":"2023-08-30T08:02:46Z","timestamp":1693382566000},"page":"256-265","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Thesaurus-Based Transformation: A Classification Method for\u00a0Real Dirty Data"],"prefix":"10.1007","author":[{"given":"Maxime","family":"Perrot","sequence":"first","affiliation":[]},{"given":"Micka\u00ebl","family":"Baron","sequence":"additional","affiliation":[]},{"given":"Brice","family":"Chardin","sequence":"additional","affiliation":[]},{"given":"St\u00e9phane","family":"Jean","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,31]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Abdi, H., Williams, L.J.: Principal component analysis. Wiley Interdisciplinary Rev. Comput. Stat., 2(4), 433\u2013459 (2010)","key":"23_CR1","DOI":"10.1002\/wics.101"},{"doi-asserted-by":"crossref","unstructured":"Borisov, V., Leemann, T., Se\u00dfler, K., Haug, J., Pawelczyk, M., Kasneci, G.: Deep neural networks and tabular data: a survey. IEEE Trans. Neural Networks Learn. Syst., 1\u201321 (2022)","key":"23_CR2","DOI":"10.1109\/TNNLS.2022.3229161"},{"unstructured":"Broder, A.Z.: On the resemblance and containment of documents. In: Proceedings. Compression and Complexity of SEQUENCES 1997, pp. 21\u201329. IEEE (1997)","key":"23_CR3"},{"doi-asserted-by":"crossref","unstructured":"Canny, J.: Gap: a factor model for discrete data. In: Proceedings of the 27th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 122\u2013129 (2004)","key":"23_CR4","DOI":"10.1145\/1008992.1009016"},{"unstructured":"Cerda, P., Varoquaux, G.: Encoding high-cardinality string categorical variables. IEEE Trans. Knowl. Data Eng. (2020)","key":"23_CR5"},{"doi-asserted-by":"publisher","unstructured":"Cerda, P., Varoquaux, G., K\u00e9gl, B.: Similarity encoding for learning with dirty categorical variables. Mach. Learn., 1477\u20131494 (2018). https:\/\/doi.org\/10.1007\/s10994-018-5724-2","key":"23_CR6","DOI":"10.1007\/s10994-018-5724-2"},{"doi-asserted-by":"crossref","unstructured":"Chen, T., Guestrin, C.: Xgboost: a scalable tree boosting system. In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, ppp. 785\u2013794 (2016)","key":"23_CR7","DOI":"10.1145\/2939672.2939785"},{"doi-asserted-by":"crossref","unstructured":"Crow, J.F., Kimura, M.: Efficiency of truncation selection. Proc. Natl. Acad. Sci. 76(1), 396\u2013399 (1979)","key":"23_CR8","DOI":"10.1073\/pnas.76.1.396"},{"unstructured":"Dwarampudi, M., Reddy, N.V.: Effects of padding on lstms and cnns. arXiv preprint arXiv:1903.07288 (2019)","key":"23_CR9"},{"unstructured":"Prokhorenkova, L., Gusev, G., Vorobev, A., Dorogush, A.V., Gulin, A.: Catboost: unbiased boosting with categorical features. Advances in neural information processing systems, 31 (2018)","key":"23_CR10"},{"doi-asserted-by":"crossref","unstructured":"Tenney, I., Das, D., Pavlick, E.: BERT rediscovers the classical NLP pipeline. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 4593\u20134601, July 2019","key":"23_CR11","DOI":"10.18653\/v1\/P19-1452"},{"doi-asserted-by":"crossref","unstructured":"Turney, P.D.: A uniform approach to analogies, synonyms, antonyms, and associations. In: 22nd International Conference on Computational Linguistics (COLING-08) (2008)","key":"23_CR12","DOI":"10.3115\/1599081.1599195"},{"issue":"10","key":"23_CR13","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1145\/135239.135244","volume":"35","author":"W Sun","year":"1992","unstructured":"Sun, W., Manber, U.: Fast text searching: allowing errors. Commun. ACM 35(10), 83\u201391 (1992)","journal-title":"Commun. ACM"}],"container-title":["Communications in Computer and Information Science","New Trends in Database and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-42941-5_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T18:28:02Z","timestamp":1710268082000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-42941-5_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031429408","9783031429415"],"references-count":13,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-42941-5_23","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"31 August 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ADBIS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Advances in Databases and Information Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Barcelona","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"adbis2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/adbis.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"77","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"14","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"25","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"18% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"N\/A","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"N\/A","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}