{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T14:36:51Z","timestamp":1774535811566,"version":"3.50.1"},"reference-count":48,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,5]]},"DOI":"10.1109\/icde.2016.7498272","type":"proceedings-article","created":{"date-parts":[[2016,6,25]],"date-time":"2016-06-25T07:36:34Z","timestamp":1466840194000},"page":"577-588","source":"Crossref","is-referenced-by-count":36,"title":["Input selection for fast feature engineering"],"prefix":"10.1109","author":[{"given":"Michael R.","family":"Anderson","sequence":"first","affiliation":[]},{"given":"Michael","family":"Cafarella","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","first-page":"2825","article-title":"Scikit-learn: Machine learning in Python","volume":"12","author":"pedregosa","year":"2011","journal-title":"Journal of Machine Learning Research"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.3115\/1219840.1219885"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2008.239"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1561\/2200000024"},{"key":"ref30","article-title":"Intrusion detection with unlabeled data using clustering","author":"portnoy","year":"2001","journal-title":"in ACM Workshop on Data Mining Applied to Security"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/1656274.1656278"},{"key":"ref36","author":"rogers","year":"2011","journal-title":"A First Course in Machine Learning"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-21579-2_9"},{"key":"ref34","article-title":"Heterogenous uncertainty sampling for supervised learning","author":"lewis","year":"1994","journal-title":"ICML"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.14778\/2536274.2536315"},{"key":"ref40","article-title":"Google word2vec","year":"0"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/2428556.2428570"},{"key":"ref12","article-title":"Feature engineering for knowledge base construction","volume":"37","author":"r\u00e9","year":"2014","journal-title":"IEEE Data Eng Bulletin"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/2588555.2593678"},{"key":"ref14","article-title":"MLbase: A distributed machine-learning system","author":"kraska","year":"2013","journal-title":"CIDR"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1142473.1142504"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.3115\/1690219.1690287"},{"key":"ref17","author":"witten","year":"2011","journal-title":"Data Mining Practical Machine Learning Tools and Techniques"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/253262.253291"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2007.12.001"},{"key":"ref28","article-title":"Impact of similarity measures on web-page clustering","author":"strehl","year":"2000","journal-title":"Proc Workshop AI Web Search"},{"key":"ref4","article-title":"How the Netflix Prize Was Won","author":"buskirk","year":"2009","journal-title":"Wired"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022699900025"},{"key":"ref3","article-title":"How Google's Algorithm Rules the Web","author":"levy","year":"2010","journal-title":"Wired"},{"key":"ref6","article-title":"MapReduce: Simplified data processing on large clusters","author":"dean","year":"2004","journal-title":"OSDI"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1186\/1471-2105-9-497"},{"key":"ref5","article-title":"An Overview of the DeepQA Project","author":"ferrucci","year":"2012","journal-title":"AI Magazine"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.938"},{"key":"ref7","article-title":"Spark: Cluster computing with working sets","author":"zaharia","year":"2010","journal-title":"HotCloud"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/2347736.2347755"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.14778\/2733004.2733054"},{"key":"ref1","article-title":"Brainwash: A data system for feature engineering","author":"anderson","year":"2013","journal-title":"CIDR"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.1080.0952"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/1753783.1753784"},{"key":"ref45","article-title":"Cloudera Impala","year":"0"},{"key":"ref48","article-title":"Building high-level features using large scale unsupervised learning","author":"le","year":"2012","journal-title":"ICML"},{"key":"ref22","article-title":"Approximate query processing: Taming the terabytes","author":"garofalakis","year":"2001","journal-title":"VLDB"},{"key":"ref47","article-title":"An analysis of single-layer networks in unsupervised feature learning","author":"coates","year":"2011","journal-title":"AISTATS"},{"key":"ref21","article-title":"Active learning literature survey","author":"settles","year":"2009"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/1807167.1807184"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/2465351.2465355"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.14778\/1920841.1920886"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/872757.872822"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.14778\/2350229.2350239"},{"key":"ref26","first-page":"1909","article-title":"Incremental support vector learning: Analysis, implementation and applications","volume":"7","author":"laskov","year":"2006","journal-title":"The Journal of Machine Learning Research"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.14778\/1454159.1454211"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/5326.983933"}],"event":{"name":"2016 IEEE 32nd International Conference on Data Engineering (ICDE)","location":"Helsinki, Finland","start":{"date-parts":[[2016,5,16]]},"end":{"date-parts":[[2016,5,20]]}},"container-title":["2016 IEEE 32nd International Conference on Data Engineering (ICDE)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7491900\/7498210\/07498272.pdf?arnumber=7498272","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2016,9,29]],"date-time":"2016-09-29T20:22:08Z","timestamp":1475180528000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7498272\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,5]]},"references-count":48,"URL":"https:\/\/doi.org\/10.1109\/icde.2016.7498272","relation":{},"subject":[],"published":{"date-parts":[[2016,5]]}}}