{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T17:07:14Z","timestamp":1725469634444},"reference-count":12,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,12]]},"DOI":"10.1109\/bigdata.2016.7840776","type":"proceedings-article","created":{"date-parts":[[2017,2,7]],"date-time":"2017-02-07T21:46:59Z","timestamp":1486504019000},"page":"1639-1646","source":"Crossref","is-referenced-by-count":1,"title":["QED: Groupon's ETL management and curated feature catalog system for machine learning"],"prefix":"10.1109","author":[{"given":"Derrick C.","family":"Spell","sequence":"first","affiliation":[]},{"given":"Ling-Yong","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Richard T.","family":"Shomer","sequence":"additional","affiliation":[]},{"given":"Bahador","family":"Nooraei","sequence":"additional","affiliation":[]},{"given":"Jarrell","family":"Waggoner","sequence":"additional","affiliation":[]},{"given":"Xiao-Han T.","family":"Zeng","sequence":"additional","affiliation":[]},{"given":"Jae Young","family":"Chung","sequence":"additional","affiliation":[]},{"given":"Kai-Chen","family":"Cheng","sequence":"additional","affiliation":[]},{"given":"Daniel","family":"Kirsche","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref4","article-title":"Data Wrangling: The Challenging Journey from the Wild to the Lake","author":"terrizzano","year":"2015","journal-title":"7th Biennial Conference on Innovative Data Systems ResearchCIDR'15"},{"key":"ref3","article-title":"Governing and Managing Big Data for Analytics and Decision Makers","author":"chessell","year":"2014","journal-title":"IBM Tech Rep"},{"key":"ref10","article-title":"Datahub: Collaborative data science & dataset version management at scale","author":"bhardwaj","year":"2014","journal-title":"arXiv preprint arXiv 1409 0798"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.4018\/jdwm.2009070101"},{"key":"ref11","first-page":"2","article-title":"Resilient distributed datasets: A fault-tolerant abstraction for in-memory cluster computing","author":"zaharia","year":"2012","journal-title":"NSDI'12 Proceedings of the 9th USENIX Conference on Networked Systems Design and Implementation"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2903730"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939693"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.14778\/2367502.2367572"},{"article-title":"Big data requires a big, new architecture","year":"2011","author":"woods","key":"ref7"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/2481244.2481247"},{"article-title":"Governing and managing big data for analytics and decision makers","year":"2014","author":"chessell","key":"ref9"},{"key":"ref1","article-title":"Challenges and Opportunities with Big Data","author":"agrawal","year":"2011","journal-title":"Tech Rep"}],"event":{"name":"2016 IEEE International Conference on Big Data (Big Data)","start":{"date-parts":[[2016,12,5]]},"location":"Washington, DC","end":{"date-parts":[[2016,12,8]]}},"container-title":["2016 IEEE International Conference on Big Data (Big Data)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7818133\/7840573\/07840776.pdf?arnumber=7840776","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T01:13:20Z","timestamp":1588295600000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/7840776\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,12]]},"references-count":12,"URL":"https:\/\/doi.org\/10.1109\/bigdata.2016.7840776","relation":{},"subject":[],"published":{"date-parts":[[2016,12]]}}}