{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T20:54:39Z","timestamp":1762030479033,"version":"3.28.0"},"reference-count":24,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,12]]},"DOI":"10.1109\/bigdata.2016.7840596","type":"proceedings-article","created":{"date-parts":[[2017,2,7]],"date-time":"2017-02-07T16:46:59Z","timestamp":1486486019000},"page":"122-127","source":"Crossref","is-referenced-by-count":1,"title":["PSH: A probabilistic signature hash method with hash neighborhood candidate generation for fast edit-distance string comparison on big data"],"prefix":"10.1109","author":[{"given":"Joseph","family":"Jupin","sequence":"first","affiliation":[]},{"given":"Justin Y.","family":"Shi","sequence":"additional","affiliation":[]},{"given":"Eduard C.","family":"Dragut","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1090\/S0002-9947-04-03613-X"},{"key":"ref11","article-title":"Record Linkage Software in the Public Domain: A Comparison of Link Plus, The Link King, and a Basic","volume":"14","author":"campbell","year":"2008","journal-title":"Deterministic Algorithm In Health Informatics Journal"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/BF01185432"},{"key":"ref13","article-title":"TrieJoin: Efficient Trie-based String Similarity Joins with Edit-Distance Constraints","volume":"3","author":"wang","year":"2010","journal-title":"PVLDB"},{"journal-title":"The Art of Computer Programming Volume 3 Sorting and Searching","year":"1997","author":"knuth","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-82456-2"},{"key":"ref16","first-page":"933","volume":"1","author":"xiao","year":"2008","journal-title":"Ed-join an efficient algorithm for similarity joins with edit distance constraints In PVLDB"},{"journal-title":"Binary codes capable of correcting deletions insertions and reversals Soviet Physics Doklady","year":"1966","author":"levenshtein","key":"ref17"},{"journal-title":"A technique for computer detection and correction of spelling errors In Communications of the ACM","year":"1964","author":"damerau","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511574931"},{"key":"ref4","first-page":"414","volume":"84","author":"jaro","year":"1989","journal-title":"Advances in Record Linkage Methodology as Applied to Matching the 1985 Census of Tampa Florida In Journal of the American Statistical Society"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.2307\/2286061"},{"journal-title":"Febrl Freely Extensible Biomedical Record Linkage Manual Release 0 2","year":"2003","author":"christen","key":"ref6"},{"key":"ref5","volume":"1","author":"hernandez","year":"1998","journal-title":"Real-world data is dirty data cleansing and the merge\/purge problem In Journal of Data Mining and Knowledge Discovery"},{"journal-title":"Learning to Match and Cluster Large High-Dimensional Data Sets for Data Integration In SIGKDD'02","year":"2002","author":"cohen","key":"ref8"},{"key":"ref7","first-page":"169","author":"mc callum","year":"2000","journal-title":"Efficient clustering of high-dimensional data sets In KDD"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.2105\/AJPH.36.12.1412"},{"key":"ref1","volume":"17","author":"fayyad","year":"1996","journal-title":"Data Mining to Knowledge Discovery in Databases In AI Magazine"},{"key":"ref9","first-page":"684","author":"papadakis","year":"2016","journal-title":"Comparative analysis of approximate blocking techniques for entity resolution In PVLDB 9"},{"key":"ref20","first-page":"159","author":"sakoe","year":"1990","journal-title":"Dynamic programming algorithm optimization for spoken word recognition"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/375360.375365"},{"key":"ref21","volume":"3","author":"wang","year":"2010","journal-title":"Trie-Join Efficient Trie-based String Similarity Joins with Edit-Distance Constraints In PVLDB"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/367236.367286"},{"key":"ref23","first-page":"491","article-title":"Approximate string joins in a database (almost) for free","author":"gravano","year":"2001","journal-title":"VLDB"}],"event":{"name":"2016 IEEE International Conference on Big Data (Big Data)","start":{"date-parts":[[2016,12,5]]},"location":"Washington DC,USA","end":{"date-parts":[[2016,12,8]]}},"container-title":["2016 IEEE International Conference on Big Data (Big Data)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7818133\/7840573\/07840596.pdf?arnumber=7840596","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,12,13]],"date-time":"2017-12-13T15:55:04Z","timestamp":1513180504000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7840596\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,12]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/bigdata.2016.7840596","relation":{},"subject":[],"published":{"date-parts":[[2016,12]]}}}