{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T06:23:04Z","timestamp":1761978184188,"version":"build-2065373602"},"reference-count":24,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2013,11,1]],"date-time":"2013-11-01T00:00:00Z","timestamp":1383264000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J. Comput. Sci. Technol."],"published-print":{"date-parts":[[2013,11]]},"DOI":"10.1007\/s11390-013-1393-6","type":"journal-article","created":{"date-parts":[[2013,11,14]],"date-time":"2013-11-14T05:46:44Z","timestamp":1384408004000},"page":"989-1011","source":"Crossref","is-referenced-by-count":12,"title":["Partition-Based Online Aggregation with Shared Sampling in the Cloud"],"prefix":"10.1007","volume":"28","author":[{"given":"Yu-Xiang","family":"Wang","sequence":"first","affiliation":[]},{"given":"Jun-Zhou","family":"Luo","sequence":"additional","affiliation":[]},{"given":"Ai-Bo","family":"Song","sequence":"additional","affiliation":[]},{"given":"Fang","family":"Dong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2013,11,8]]},"reference":[{"key":"1393_CR1","unstructured":"Herodotou H, Lim H, Luo G et al. Starfish: A self-tuning system for big data analytics. In Proc. the 15th CIDR, Apr. 2011, pp.261-272."},{"key":"1393_CR2","doi-asserted-by":"crossref","unstructured":"Wu S, Ooi B C, Tan K L. Continuous sampling for online aggregation over multiple queries. In Proc. the 2010 International Conference on Management of Data (SIGMOD), June 2010, pp.651-662.","DOI":"10.1145\/1807167.1807238"},{"key":"1393_CR3","doi-asserted-by":"crossref","unstructured":"Chaudhuri S, Das G, Datar M et al. Overcoming limitations of sampling for aggregation queries. In Proc. the 17th Int. Conf. Data Engineering, Apr. 2001, pp.534-544.","DOI":"10.1109\/ICDE.2001.914867"},{"issue":"10","key":"1393_CR4","first-page":"1028","volume":"5","author":"N Laptev","year":"2012","unstructured":"Laptev N, Zeng K, Zaniolo C. Early accurate results for advanced analytics on MapReduce. PVLDB, 2012, 5(10): 1028-1039.","journal-title":"PVLDB"},{"issue":"2","key":"1393_CR5","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1145\/253262.253291","volume":"26","author":"JM Hellerstein","year":"1997","unstructured":"Hellerstein J M, Haas P J, Wang H J. Online aggregation. ACM SIGMOD Record., 1997, 26(2): 171\u2013182.","journal-title":"ACM SIGMOD Record"},{"issue":"1","key":"1393_CR6","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean J, Ghemawat S. MapReduce: Simplified data processing on large clusters. Communications of the ACM, 2008, 51(1): 107\u2013113.","journal-title":"Communications of the ACM"},{"key":"1393_CR7","doi-asserted-by":"crossref","unstructured":"Borkar V, Carey M, Grover R et al. Hyracks: A flexible and extensible foundation for data-intensive computing. In Proc. the 27th International Conference on Data Engineering, Apr. 2011, pp.1151-1162.","DOI":"10.1109\/ICDE.2011.5767921"},{"issue":"11","key":"1393_CR8","first-page":"1135","volume":"4","author":"N Pansare","year":"2011","unstructured":"Pansare N, Borkar V R, Jermaine C et al. Online aggregation for large MapReduce jobs. PVLDB, 2011, 4(11): 1135\u20131145.","journal-title":"PVLDB"},{"key":"1393_CR9","doi-asserted-by":"crossref","unstructured":"B\u00f6se J H, Andrzejak A, H\u00f6gqvist M. Beyond online aggregation: Parallel and incremental data mining with online map- reduce. In Proc. MDAC, Apr. 2010, Article No.3.","DOI":"10.1145\/1779599.1779602"},{"key":"1393_CR10","doi-asserted-by":"crossref","unstructured":"Condie T, Conway N, Alvaro P et al. Online aggregation and continuous query support in MapReduce. In Proc. the 2010 International Conference on Management of Data, June 2010, pp.1115-1118.","DOI":"10.1145\/1807167.1807295"},{"key":"1393_CR11","doi-asserted-by":"crossref","unstructured":"Shi Y, Meng X, Wang F et al. You can stop early with COLA: Online processing of aggregate queries in the cloud. In Proc. the 21st ACM International Conference on Information and Knowledgy Management, Oct. 29-Nov. 2, 2012, pp.1223-1232.","DOI":"10.1145\/2396761.2398423"},{"key":"1393_CR12","doi-asserted-by":"crossref","unstructured":"Grover R, Carey M J. Extending MapReduce for efficient predicate-based sampling. In Proc. the 28th International Conference on Data Engineering, Apr. 2012, pp.486-497.","DOI":"10.1109\/ICDE.2012.104"},{"key":"1393_CR13","doi-asserted-by":"crossref","unstructured":"Wang Y, Luo J, Song A, Jin J H, Dong F. Improving online aggregation performance for skewed data distribution. In Proc. Database Systems for Advanced Applications, Apr. 2012, pp.18-32.","DOI":"10.1007\/978-3-642-29038-1_4"},{"key":"1393_CR14","doi-asserted-by":"crossref","unstructured":"Chaudhuri S, Das G, Srivastava U. Effective use of block-level sampling in statistics estimation. In Proc. the 2004 International Conference on Management of Data, June 2004, pp.287-298.","DOI":"10.1145\/1007568.1007602"},{"issue":"8","key":"1393_CR15","doi-asserted-by":"crossref","first-page":"36","DOI":"10.1145\/1536616.1536632","volume":"52","author":"A Jacobs","year":"2009","unstructured":"Jacobs A. The pathologies of big data. Communications of the ACM, 2009, 52(8): 36\u201344.","journal-title":"Communications of the ACM"},{"key":"1393_CR16","doi-asserted-by":"crossref","unstructured":"Soroush E, Balazinska M, Wang D. Arraystore: A storage manager for complex parallel array processing. In Proc. the 2011 International Conference on Management of Data, June 2011, pp.253-264.","DOI":"10.1145\/1989323.1989351"},{"issue":"9","key":"1393_CR17","first-page":"575","volume":"4","author":"MY Eltabakh","year":"2011","unstructured":"Eltabakh M Y, Tian Y, Ozcan F et al. CoHadoop: Flexible data placement and its exploitation in Hadoop. PVLDB, 2011, 4(9): 575\u2013585.","journal-title":"PVLDB"},{"issue":"1\/2","key":"1393_CR18","first-page":"494","volume":"3","author":"T Nykiel","year":"2010","unstructured":"Nykiel T, Potamias M, Mishra C et al. MRShare: Sharing across multiple queries in MapReduce. PVLDB, 2010, 3(1\/2): 494\u2013505.","journal-title":"PVLDB"},{"issue":"1","key":"1393_CR19","first-page":"443","volume":"2","author":"S Wu","year":"2009","unstructured":"Wu S, Jiang S, Ooi B C et al. Distributed online aggregations. PVLDB, 2009, 2(1): 443\u2013454.","journal-title":"PVLDB"},{"key":"1393_CR20","doi-asserted-by":"crossref","unstructured":"Zaharia M, Borthakur D, Sen Sarma J et al. Delay scheduling: A simple technique for achieving locality and fairness in cluster scheduling. In Proc. the 5th European Conference on Computer System, Apr. 2010, pp.265-278.","DOI":"10.1145\/1755913.1755940"},{"key":"1393_CR21","unstructured":"Chaudhuri S, Narasyrra V. Program for tpc-d data generation with skew. Technical Report, ftp:\/\/ftp.research.micro-soft.com\/pub\/user.\/viveknar\/tpcdskew, Dec. 2012."},{"key":"1393_CR22","unstructured":"Haas P J. Large-sample and deterministic confidence intervals for online aggregation. In Proc. the 9th International Conference on Scientific and Statistical Database Management, Aug. 1997, pp.51-62."},{"issue":"2","key":"1393_CR23","doi-asserted-by":"crossref","first-page":"287","DOI":"10.1145\/304181.304208","volume":"28","author":"PJ Haas","year":"1999","unstructured":"Haas P J, Hellerstein J M. Ripple joins for online aggregation. ACM SIGMOD Record, 1999, 28(2): 287\u2013298.","journal-title":"ACM SIGMOD Record"},{"key":"1393_CR24","doi-asserted-by":"crossref","unstructured":"Luo G, Ellmann C J, Haas P J et al. A scalable hash ripple join algorithm. In Proc. the 2002 International Conference on Management of Data, June 2002, pp.252-262.","DOI":"10.1145\/564691.564721"}],"container-title":["Journal of Computer Science and Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-013-1393-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11390-013-1393-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-013-1393-6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,30]],"date-time":"2025-04-30T20:31:27Z","timestamp":1746045087000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11390-013-1393-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,11]]},"references-count":24,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2013,11]]}},"alternative-id":["1393"],"URL":"https:\/\/doi.org\/10.1007\/s11390-013-1393-6","relation":{},"ISSN":["1000-9000","1860-4749"],"issn-type":[{"type":"print","value":"1000-9000"},{"type":"electronic","value":"1860-4749"}],"subject":[],"published":{"date-parts":[[2013,11]]}}}