{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,4]],"date-time":"2025-05-04T00:02:40Z","timestamp":1746316960213},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2015,2,18]],"date-time":"2015-02-18T00:00:00Z","timestamp":1424217600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Algorithmica"],"published-print":{"date-parts":[[2016,2]]},"DOI":"10.1007\/s00453-015-9974-0","type":"journal-article","created":{"date-parts":[[2015,2,17]],"date-time":"2015-02-17T16:23:08Z","timestamp":1424190188000},"page":"787-811","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Space-Efficient Estimation of Statistics Over Sub-Sampled Streams"],"prefix":"10.1007","volume":"74","author":[{"given":"Andrew","family":"McGregor","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"A.","family":"Pavan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Srikanta","family":"Tirthapura","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David P.","family":"Woodruff","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,2,18]]},"reference":[{"issue":"1","key":"9974_CR1","doi-asserted-by":"crossref","first-page":"137","DOI":"10.1006\/jcss.1997.1545","volume":"58","author":"N Alon","year":"1999","unstructured":"Alon, N., Matias, Y., Szegedy, M.: The space complexity of approximating the frequency moments. J. Comput. Syst. Sci. 58(1), 137\u2013147 (1999)","journal-title":"J. Comput. Syst. Sci."},{"key":"9974_CR2","unstructured":"Babcock, B., Datar, M., Motwani, R.: Sampling from a moving window over streaming data. In: Proceedings of ACM-SIAM Symposium on Discrete Algorithms (SODA), pp. 633\u2013634 (2002)"},{"key":"9974_CR3","unstructured":"Bar-Yossef, Z.: The complexity of massive dataset computations. Ph.D. thesis, University of California at Berkeley (2002)"},{"key":"9974_CR4","doi-asserted-by":"crossref","unstructured":"Bar-Yossef, Z.: Sampling lower bounds via information theory. In: Proceedings of 35th Annual ACM Symposium on Theory of Computing (STOC), pp. 335\u2013344 (2003)","DOI":"10.1145\/780542.780593"},{"key":"9974_CR5","doi-asserted-by":"crossref","unstructured":"Barakat, C., Iannaccone, G., Diot, C.: Ranking flows from sampled traffic. In: Proceedings of ACM Conference on Emerging Network Experiment and Technology (CoNEXT), pp. 188\u2013199 (2005)","DOI":"10.1145\/1095921.1095947"},{"key":"9974_CR6","doi-asserted-by":"crossref","unstructured":"Bhattacharyya, S., Madeira, A., Muthukrishnan, S., Ye, T.: How to scalably and accurately skip past streams. In: Proceedings of 23rd International Conference on Data Engineering (ICDE) Workshops, pp. 654\u2013663 (2007)","DOI":"10.1109\/ICDEW.2007.4401052"},{"key":"9974_CR7","doi-asserted-by":"crossref","unstructured":"Charikar, M., Chaudhuri, S., Motwani, R., Narasayya, V.R.: Towards estimation error guarantees for distinct values. In: Proceedings of 19th ACM Symposium on Principles of Database Systems (PODS), pp. 268\u2013279 (2000)","DOI":"10.1145\/335168.335230"},{"issue":"1","key":"9974_CR8","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1016\/S0304-3975(03)00400-6","volume":"312","author":"M Charikar","year":"2004","unstructured":"Charikar, M., Chen, K., Farach-Colton, M.: Finding frequent items in data streams. Theor. Comput. Sci. 312(1), 3\u201315 (2004)","journal-title":"Theor. Comput. Sci."},{"key":"9974_CR9","unstructured":"Cisco Systems: Random Sampled NetFlow. http:\/\/www.cisco.com\/en\/US\/docs\/ios\/12_0s\/feature\/guide\/nfstatsa.html"},{"issue":"11","key":"9974_CR10","doi-asserted-by":"crossref","first-page":"819","DOI":"10.14778\/3402707.3402721","volume":"4","author":"E Cohen","year":"2011","unstructured":"Cohen, E., Cormode, G., Duffield, N.G.: Structure-aware sampling: flexible and accurate summarization. Proc. VLDB Endow. 4(11), 819\u2013830 (2011)","journal-title":"Proc. VLDB Endow."},{"issue":"5","key":"9974_CR11","doi-asserted-by":"crossref","first-page":"1402","DOI":"10.1137\/10079817X","volume":"40","author":"E Cohen","year":"2011","unstructured":"Cohen, E., Duffield, N.G., Kaplan, H., Lund, C., Thorup, M.: Efficient stream sampling for variance-optimal estimation of subset sums. SIAM J. Comput. 40(5), 1402\u20131431 (2011)","journal-title":"SIAM J. Comput."},{"issue":"7","key":"9974_CR12","doi-asserted-by":"crossref","first-page":"1214","DOI":"10.1016\/j.jcss.2014.04.009","volume":"80","author":"E Cohen","year":"2014","unstructured":"Cohen, E., Duffield, N.G., Kaplan, H., Lund, C., Thorup, M.: Algorithms and estimators for summarization of unaggregated data streams. J. Comput. Syst. Sci. 80(7), 1214\u20131244 (2014)","journal-title":"J. Comput. Syst. Sci."},{"issue":"14","key":"9974_CR13","doi-asserted-by":"crossref","first-page":"2605","DOI":"10.1016\/j.comnet.2008.04.021","volume":"52","author":"E Cohen","year":"2008","unstructured":"Cohen, E., Grossaug, N., Kaplan, H.: Processing top-k queries from samples. Comput. Netw. 52(14), 2605\u20132622 (2008)","journal-title":"Comput. Netw."},{"key":"9974_CR14","doi-asserted-by":"crossref","unstructured":"Cormode, G., Garofalakis, M.: Sketching probabilistic data streams. In: Proceedings of 26th ACM International Conference on Management of Data (SIGMOD), pp. 281\u2013292 (2007)","DOI":"10.1145\/1247480.1247513"},{"issue":"1","key":"9974_CR15","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1016\/j.jalgor.2003.12.001","volume":"55","author":"G Cormode","year":"2005","unstructured":"Cormode, G., Muthukrishnan, S.: An improved data stream summary: the count-min sketch and its applications. J. Algorithms 55(1), 58\u201375 (2005)","journal-title":"J. Algorithms"},{"key":"9974_CR16","doi-asserted-by":"crossref","unstructured":"Cormode, G., Muthukrishnan, S., Yi, K., Zhang, Q.: Optimal sampling from distributed streams. In: Proceedings of ACM Symposium on Principles of Database Systems (PODS), pp. 77\u201386 (2010)","DOI":"10.1145\/1807085.1807099"},{"key":"9974_CR17","doi-asserted-by":"crossref","unstructured":"Duffield, N.G., Lund, C., Thorup, M.: Properties and prediction of flow statistics from sampled packet streams. In: Proceedings of Internet Measurement Workshop, pp. 159\u2013171 (2002)","DOI":"10.1145\/637201.637225"},{"issue":"5","key":"9974_CR18","doi-asserted-by":"crossref","first-page":"933","DOI":"10.1109\/TNET.2005.852874","volume":"13","author":"NG Duffield","year":"2005","unstructured":"Duffield, N.G., Lund, C., Thorup, M.: Estimating flow distributions from sampled flow statistics. IEEE\/ACM Trans. Netw. 13(5), 933\u2013946 (2005)","journal-title":"IEEE\/ACM Trans. Netw."},{"key":"9974_CR19","doi-asserted-by":"crossref","unstructured":"Duffield, N.G., Lund, C., Thorup, M.: Priority sampling for estimation of arbitrary subset sums. J. ACM 54(6) (2007)","DOI":"10.1145\/1314690.1314696"},{"issue":"5","key":"9974_CR20","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1016\/j.ipl.2005.11.003","volume":"97","author":"P Efraimidis","year":"2006","unstructured":"Efraimidis, P., Spirakis, P.G.: Weighted random sampling with a reservoir. Inf. Process. Lett. 97(5), 181\u2013185 (2006)","journal-title":"Inf. Process. Lett."},{"key":"9974_CR21","doi-asserted-by":"crossref","unstructured":"Estan, C., Keys, K., Moore, D., Varghese, G.: Building a better netflow. In: Proceedings of ACM Conference on Applications, Technologies, Architectures, and Protocols for Computer Communication (SIGCOMM), pp. 245\u2013256 (2004)","DOI":"10.1145\/1015467.1015495"},{"key":"9974_CR22","doi-asserted-by":"crossref","unstructured":"Estan, C., Varghese, G.: New directions in traffic measurement and accounting. In: Proceedings of ACM Conference on Applications, Technologies, Architectures, and Protocols for Computer Communication (SIGCOMM), pp. 323\u2013336 (2002)","DOI":"10.1145\/633025.633056"},{"key":"9974_CR23","doi-asserted-by":"crossref","unstructured":"Gibbons, P.B., Matias, Y.: New sampling-based summary statistics for improving approximate query answers. In: Proceedings of ACM SIGMOD International Conference on Management of Data, pp. 331\u2013342 (1998)","DOI":"10.1145\/276304.276334"},{"key":"9974_CR24","doi-asserted-by":"crossref","unstructured":"Guha, S., Huang, Z.: Revisiting the direct sum theorem and space lower bounds in random order streams. In: Automata, Languages and Programming, 36th International Colloquium, ICALP (1), pp. 513\u2013524 (2009)","DOI":"10.1007\/978-3-642-02927-1_43"},{"key":"9974_CR25","doi-asserted-by":"crossref","unstructured":"Harvey, N.J.A., Nelson, J., Onak, K.: Sketching and streaming entropy via approximation theory. In: Proceedings of 49th IEEE Conference on Foundations of Computer Science (FOCS), pp. 489\u2013498 (2008)","DOI":"10.1109\/FOCS.2008.76"},{"issue":"1","key":"9974_CR26","doi-asserted-by":"crossref","first-page":"68","DOI":"10.1109\/TNET.2005.863456","volume":"14","author":"N Hohn","year":"2006","unstructured":"Hohn, N., Veitch, D.: Inverting sampled traffic. IEEE\/ACM Trans. Netw. 14(1), 68\u201380 (2006)","journal-title":"IEEE\/ACM Trans. Netw."},{"key":"9974_CR27","doi-asserted-by":"crossref","unstructured":"Indyk, P., Woodruff, D.P.: Optimal approximations of the frequency moments of data streams. In: Proceedings of 37th Annual ACM Symposium on Theory of Computing (STOC), pp. 202\u2013208 (2005)","DOI":"10.1145\/1060590.1060621"},{"key":"9974_CR28","doi-asserted-by":"crossref","first-page":"26:1","DOI":"10.1145\/1412331.1412338","volume":"33","author":"TS Jayram","year":"2008","unstructured":"Jayram, T.S., McGregor, A., Muthukrishnan, S., Vee, E.: Estimating statistical aggregates on probabilistic data streams. ACM Trans. Database Syst. 33, 26:1\u201326:30 (2008)","journal-title":"ACM Trans. Database Syst."},{"key":"9974_CR29","doi-asserted-by":"crossref","unstructured":"Kane, D.M., Nelson, J., Woodruff, D.P.: On the exact space complexity of sketching and streaming small norms. In: Proceedings of 21st ACM-SIAM Symposium on Discrete Algorithms (SODA), pp. 1161\u20131178 (2010)","DOI":"10.1137\/1.9781611973075.93"},{"key":"9974_CR30","doi-asserted-by":"crossref","first-page":"2838","DOI":"10.1007\/978-0-387-39940-9_372","volume-title":"Encyclopedia of Database Systems","author":"B Lahiri","year":"2009","unstructured":"Lahiri, B., Tirthapura, S.: Stream sampling. In: Liu, L., \u00d6zsu, M.T. (eds.) Encyclopedia of Database Systems, pp. 2838\u20132842. Springer, US (2009)"},{"key":"9974_CR31","unstructured":"McGregor, A. (ed.): Open Problems in Data Streams and Related Topics (2007). http:\/\/www.cse.iitk.ac.in\/users\/sganguly\/data-stream-probs"},{"key":"9974_CR32","doi-asserted-by":"crossref","unstructured":"McGregor, A., Pavan, A., Tirthapura, S., Woodruff, D.: Space-efficient estimation of statistics over sub-sampled streams. In: Proceedings of 31st ACM Symposium on Principles of Database Systems (PODS), pp. 273\u2013282 (2012)","DOI":"10.1145\/2213556.2213594"},{"issue":"2","key":"9974_CR33","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1016\/0167-6423(82)90012-0","volume":"2","author":"J Misra","year":"1982","unstructured":"Misra, J., Gries, D.: Finding repeated elements. Sci. Comput. Program. 2(2), 143\u2013152 (1982)","journal-title":"Sci. Comput. Program."},{"key":"9974_CR34","doi-asserted-by":"crossref","unstructured":"Rusu, F., Dobra, A.: Sketching sampled data streams. In: Proceedings of 25th IEEE International Conference on Data Engineering (ICDE), pp. 381\u2013392 (2009)","DOI":"10.1109\/ICDE.2009.31"},{"key":"9974_CR35","doi-asserted-by":"crossref","unstructured":"Szegedy, M.: The dlt priority sampling is essentially optimal. In: Proceedings of Annual ACM Symposium on Theory of Computing (STOC), pp. 150\u2013158 (2006)","DOI":"10.1145\/1132516.1132539"},{"key":"9974_CR36","doi-asserted-by":"crossref","unstructured":"Tirthapura, S., Woodruff, D.P.: Optimal random sampling from distributed streams revisited. In: Proceedings of International Symposium on Distributed Computing (DISC), pp. 283\u2013297 (2011)","DOI":"10.1007\/978-3-642-24100-0_27"},{"issue":"1","key":"9974_CR37","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1145\/3147.3165","volume":"11","author":"JS Vitter","year":"1985","unstructured":"Vitter, J.S.: Random sampling with a reservoir. ACM Trans. Math. Softw. 11(1), 37\u201357 (1985)","journal-title":"ACM Trans. Math. Softw."}],"container-title":["Algorithmica"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00453-015-9974-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00453-015-9974-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00453-015-9974-0","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,30]],"date-time":"2020-08-30T02:38:37Z","timestamp":1598755117000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00453-015-9974-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,2,18]]},"references-count":37,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2016,2]]}},"alternative-id":["9974"],"URL":"https:\/\/doi.org\/10.1007\/s00453-015-9974-0","relation":{},"ISSN":["0178-4617","1432-0541"],"issn-type":[{"value":"0178-4617","type":"print"},{"value":"1432-0541","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,2,18]]}}}