{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T08:31:05Z","timestamp":1744187465287,"version":"3.28.0"},"reference-count":35,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015,10]]},"DOI":"10.1109\/bigdata.2015.7363845","type":"proceedings-article","created":{"date-parts":[[2015,12,28]],"date-time":"2015-12-28T16:36:21Z","timestamp":1451320581000},"page":"958-963","source":"Crossref","is-referenced-by-count":13,"title":["Scalable k-NN based text clustering"],"prefix":"10.1109","author":[{"given":"Alessandro","family":"Lulli","sequence":"first","affiliation":[]},{"given":"Thibault","family":"Debatty","sequence":"additional","affiliation":[]},{"given":"Matteo","family":"Dell'Amico","sequence":"additional","affiliation":[]},{"given":"Pietro","family":"Michiardi","sequence":"additional","affiliation":[]},{"given":"Laura","family":"Ricci","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","article-title":"The state of record linkage and current research problems","author":"winkler","year":"1999","journal-title":"Statistical Research Division US Census Bureau"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/S14-2038"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/2030376.2030395"},{"key":"ref30","article-title":"A comparison of document clustering techniques","author":"steinbach","year":"2000","journal-title":"Proc KDD Workshop Text Mining"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1023\/B:MACH.0000027785.44527.d6"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-40991-2_42"},{"key":"ref10","article-title":"Scalable graph building from text data","author":"debatty","year":"2014","journal-title":"Proc ACM BigMine"},{"key":"ref11","article-title":"Clustering indices","author":"desgraupes","year":"2013","journal-title":"University of Paris Ouest"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/1963405.1963487"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.physrep.2009.11.002"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1002\/sim.4780140510"},{"key":"ref15","article-title":"Super-bit locality-sensitive hashing","author":"ji","year":"2012","journal-title":"Proc of NIPS"},{"key":"ref16","first-page":"21","article-title":"Global min-cuts in rnc, and other ramifications of a simple min-cut algorithm","volume":"93","author":"karger","year":"1993","journal-title":"SODA"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/354756.354772"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2670979.2670997"},{"key":"ref19","first-page":"303","article-title":"A very fast method for clustering big text datasets","author":"lin","year":"2010","journal-title":"ECAI"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/0377-0427(87)90125-7"},{"year":"0","key":"ref4"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2013.6544813"},{"journal-title":"Clustering the News with Spark and MLLib","year":"0","key":"ref3"},{"key":"ref6","article-title":"Beyond trending topics: Real-world event identification on twitter","author":"becker","year":"2011","journal-title":"Proc of ICWSM"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2014.7004285"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-3223-4"},{"key":"ref8","article-title":"Dimension independent similarity computation","author":"bosagh-zadeh","year":"2012","journal-title":"Journal of Machine Learning Research"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/775047.775110"},{"journal-title":"Apache spark machine learning library","year":"0","key":"ref2"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2014.7004276"},{"journal-title":"Apache SPARK","year":"0","key":"ref1"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2010.35"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ISCC.2015.7405576"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1982.1056489"},{"key":"ref24","article-title":"Distributed representations of words and phrases and their compositionality","author":"mikolov","year":"2013","journal-title":"Proc of NIPS"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1142\/S0218213004001466"},{"key":"ref26","volume":"77","author":"rajaraman","year":"2012","journal-title":"Mining of Massive Datasets"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.74.036104"}],"event":{"name":"2015 IEEE International Conference on Big Data (Big Data)","start":{"date-parts":[[2015,10,29]]},"location":"Santa Clara, CA, USA","end":{"date-parts":[[2015,11,1]]}},"container-title":["2015 IEEE International Conference on Big Data (Big Data)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7347101\/7363706\/07363845.pdf?arnumber=7363845","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,3,24]],"date-time":"2017-03-24T20:45:20Z","timestamp":1490388320000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7363845\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,10]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/bigdata.2015.7363845","relation":{},"subject":[],"published":{"date-parts":[[2015,10]]}}}