{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T14:31:14Z","timestamp":1730298674185,"version":"3.28.0"},"reference-count":17,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,12,5]],"date-time":"2021-12-05T00:00:00Z","timestamp":1638662400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,12,5]],"date-time":"2021-12-05T00:00:00Z","timestamp":1638662400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,12,5]],"date-time":"2021-12-05T00:00:00Z","timestamp":1638662400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,12,5]]},"DOI":"10.1109\/ssci50451.2021.9659897","type":"proceedings-article","created":{"date-parts":[[2022,1,24]],"date-time":"2022-01-24T21:09:51Z","timestamp":1643058591000},"page":"1-7","source":"Crossref","is-referenced-by-count":3,"title":["Near duplicate column identification: a machine learning approach"],"prefix":"10.1109","author":[{"given":"Marc","family":"Chevallier","sequence":"first","affiliation":[]},{"given":"Faouzi","family":"Boufares","sequence":"additional","affiliation":[]},{"given":"Nistor","family":"Grozavu","sequence":"additional","affiliation":[]},{"given":"Nicoleta","family":"Rogovschi","sequence":"additional","affiliation":[]},{"given":"Charly","family":"Clairmont","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"2579","article-title":"Visualizing high-dimensional data using t-sne","volume":"9","author":"van der maaten","year":"2008","journal-title":"Journal of Machine Learning Research"},{"key":"ref11","article-title":"Stochastic neighbor embedding","volume":"15","author":"hinton","year":"2003","journal-title":"Advances in neural information processing systems"},{"key":"ref12","first-page":"iii-477","article-title":"Entropic affinities: Properties and efficient numerical computation","author":"vladymyrov","year":"0","journal-title":"Proceedings of the 30th International Conference on International Conference on Machine Learning - Volume 28 ser ICML'13 JMLR org"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1023\/A:1010933404324"},{"key":"ref14","article-title":"Lightgbm: A highly efficient gradient boosting decision tree","volume":"30","author":"ke","year":"2017","journal-title":"Advances in neural information processing systems"},{"journal-title":"Deep Learning","year":"2016","author":"goodfellow","key":"ref15"},{"key":"ref16","first-page":"1401","article-title":"A short introduction to boosting","author":"freund","year":"0","journal-title":"In Proceedings of the Sixteenth International Joint Conference on Artificial Intelligence"},{"journal-title":"CatBoost unbiased boosting with categorical features","year":"2019","author":"prokhorenkova","key":"ref17"},{"journal-title":"Single-column data profiling","year":"2020","author":"harmouch","key":"ref4"},{"key":"ref3","first-page":"311","article-title":"Sampling-based estimation of the number of distinct values of an attribute","author":"haas","year":"1995","journal-title":"Proc 21th Int Conf Very Large Data Bases VLDB 95"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1017\/9781108684163"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1111\/j.1469-8137.1912.tb05611.x"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.14778\/3407790.3407793"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330993"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3310205"},{"key":"ref1","first-page":"11","article-title":"Bad data costs the us $3 trillion per year","volume":"22","author":"redman","year":"2016","journal-title":"Harvard Business Review"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-73689-7_55"}],"event":{"name":"2021 IEEE Symposium Series on Computational Intelligence (SSCI)","start":{"date-parts":[[2021,12,5]]},"location":"Orlando, FL, USA","end":{"date-parts":[[2021,12,7]]}},"container-title":["2021 IEEE Symposium Series on Computational Intelligence (SSCI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9659537\/9659538\/09659897.pdf?arnumber=9659897","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T16:56:30Z","timestamp":1652201790000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9659897\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,5]]},"references-count":17,"URL":"https:\/\/doi.org\/10.1109\/ssci50451.2021.9659897","relation":{},"subject":[],"published":{"date-parts":[[2021,12,5]]}}}