{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T10:05:39Z","timestamp":1760609139557},"reference-count":27,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2019,5,8]],"date-time":"2019-05-08T00:00:00Z","timestamp":1557273600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Erasmus Mundus IT4BI-DC"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Distrib Parallel Databases"],"published-print":{"date-parts":[[2020,6]]},"DOI":"10.1007\/s10619-019-07271-0","type":"journal-article","created":{"date-parts":[[2019,5,8]],"date-time":"2019-05-08T19:38:03Z","timestamp":1557344283000},"page":"335-364","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["A cost-based storage format selector for materialized results in big data frameworks"],"prefix":"10.1007","volume":"38","author":[{"given":"Rana Faisal","family":"Munir","sequence":"first","affiliation":[]},{"given":"Alberto","family":"Abell\u00f3","sequence":"additional","affiliation":[]},{"given":"Oscar","family":"Romero","sequence":"additional","affiliation":[]},{"given":"Maik","family":"Thiele","sequence":"additional","affiliation":[]},{"given":"Wolfgang","family":"Lehner","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,5,8]]},"reference":[{"key":"7271_CR1","unstructured":"Alagiannis, I., Idreos, S., Ailamaki, A.: H2O: a hands-free adaptive store. In: International Conference on Management of Data, SIGMOD 2014, Snowbird, UT, USA, June 22\u201327, 2014, pp. 1103\u20131114 (2014)"},{"key":"7271_CR2","unstructured":"Atscale: Big data maturity survey. Cloudera (2016)"},{"issue":"3","key":"7271_CR3","first-page":"324","volume":"11","author":"T Azim","year":"2017","unstructured":"Azim, T., Karpathiotakis, M., Ailamaki, A.: Recache: reactive caching for fast analytics over heterogeneous data. PVLDB 11(3), 324\u2013337 (2017)","journal-title":"PVLDB"},{"key":"7271_CR4","unstructured":"Bian, H., Yan, Y., Tao, W., Chen, L.J., Chen, Y., Du, X., Moscibroda, T.: Wide table layout optimization based on column ordering and duplication. In: Proceedings of the 2017 ACM International Conference on Management of Data, SIGMOD Conference 2017, Chicago, IL, USA, May 14\u201319, 2017, pp. 299\u2013314 (2017)"},{"issue":"5","key":"7271_CR5","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1145\/360762.360766","volume":"18","author":"AF Cardenas","year":"1975","unstructured":"Cardenas, A.F.: Analysis and performance of inverted data base structures. Commun. ACM 18(5), 253\u2013263 (1975)","journal-title":"Commun. ACM"},{"issue":"12","key":"7271_CR6","first-page":"1802","volume":"5","author":"Y Chen","year":"2012","unstructured":"Chen, Y., Alspaugh, S., Katz, R.H.: Interactive analytical processing in big data systems: a cross-industry study of mapreduce workloads. PVLDB 5(12), 1802\u20131813 (2012)","journal-title":"PVLDB"},{"key":"7271_CR7","unstructured":"DeWitt, D.J., Halverson, A., Nehme, R.V., Shankar, S., Aguilar-Saborit, J., Avanes, A., Flasza, M., Gramling, J.: Split query processing in polybase. In: Proceedings of the ACM SIGMOD International Conference on Management of Data, SIGMOD 2013, New York, NY, USA, June 22\u201327, 2013, pp. 1255\u20131266 (2013)"},{"issue":"6","key":"7271_CR8","first-page":"586","volume":"5","author":"I Elghandour","year":"2012","unstructured":"Elghandour, I., Aboulnaga, A.: Restore: reusing results of mapreduce jobs. PVLDB 5(6), 586\u2013597 (2012)","journal-title":"PVLDB"},{"issue":"12","key":"7271_CR9","first-page":"1908","volume":"8","author":"AJ Elmore","year":"2015","unstructured":"Elmore, A.J., Duggan, J., Stonebraker, M., Balazinska, M., \u00c7etintemel, U., Gadepally, V., Heer, J., Howe, B., Kepner, J., Kraska, T., Madden, S., Maier, D., Mattson, T.G., Papadopoulos, S., Parkhurst, J., Tatbul, N., Vartak, M., Zdonik, S.: A demonstration of the bigdawg polystore system. PVLDB 8(12), 1908\u20131911 (2015)","journal-title":"PVLDB"},{"issue":"4","key":"7271_CR10","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1145\/2094114.2094126","volume":"40","author":"F F\u00e4rber","year":"2011","unstructured":"F\u00e4rber, F., Cha, S.K., Primsch, J., Bornh\u00f6vd, C., Sigg, S., Lehner, W.: SAP HANA database: data management for modern business applications. SIGMOD Rec. 40(4), 45\u201351 (2011)","journal-title":"SIGMOD Rec."},{"issue":"7","key":"7271_CR11","first-page":"419","volume":"4","author":"A Floratou","year":"2011","unstructured":"Floratou, A., Patel, J.M., Shekita, E.J., Tata, S.: Column-oriented storage techniques for mapreduce. PVLDB 4(7), 419\u2013429 (2011)","journal-title":"PVLDB"},{"issue":"11","key":"7271_CR12","first-page":"1111","volume":"4","author":"H Herodotou","year":"2011","unstructured":"Herodotou, H., Babu, S.: Profiling, what-if analysis, and cost-based optimization of mapreduce programs. PVLDB 4(11), 1111\u20131122 (2011)","journal-title":"PVLDB"},{"key":"7271_CR13","unstructured":"Idreos, S., Alagiannis, I., Johnson, R., Ailamaki, A.: Here are my data files. here are my queries. where are my results? In: CIDR 2011, Fifth Biennial Conference on Innovative Data Systems Research, Asilomar, CA, USA, January 9\u201312, 2011, Online Proceedings, pp. 57\u201368 (2011)"},{"issue":"7","key":"7271_CR14","first-page":"800","volume":"11","author":"A Jindal","year":"2018","unstructured":"Jindal, A., Karanasos, K., Rao, S., Patel, H.: Selecting subexpressions to materialize at datacenter scale. PVLDB 11(7), 800\u2013812 (2018)","journal-title":"PVLDB"},{"key":"7271_CR15","doi-asserted-by":"crossref","unstructured":"Jindal, A., Qiao, S., Patel, H., Yin, Z., Di, J., Bag, M., Friedman, M., Lin, Y., Karanasos, K., Rao, S.: Computation reuse in analytics job service at microsoft. In: SIGMOD Conference, pp. 191\u2013203 (2018)","DOI":"10.1145\/3183713.3190656"},{"key":"7271_CR16","unstructured":"Jindal, A., Quian\u00e9-Ruiz, J., Dittrich, J.: Trojan data layouts: right shoes for a running elephant. In: ACM Symposium on Cloud Computing in conjunction with SOSP 2011, SOCC \u201911, Cascais, Portugal, October 26\u201328, 2011, p. 21 (2011)"},{"key":"7271_CR17","unstructured":"Jindal, A., Quian\u00e9-Ruiz, J., Dittrich, J.: Wwhow! freeing data storage from cages. In: CIDR 2013, Sixth Biennial Conference on Innovative Data Systems Research, Asilomar, CA, USA, January 6\u20139, 2013, Online Proceedings (2013)"},{"issue":"5","key":"7271_CR18","doi-asserted-by":"publisher","first-page":"1203","DOI":"10.1109\/TKDE.2016.2515609","volume":"28","author":"P Jovanovic","year":"2016","unstructured":"Jovanovic, P., Romero, O., Simitsis, A., Abell\u00f3, A.: Incremental consolidation of data-intensive multi-flows. IEEE Trans. Knowl. Data Eng. 28(5), 1203\u20131216 (2016)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"7271_CR19","unstructured":"Kalavri, V., Shang, H., Vlassov, V.: m2r2: a framework for results materialization and reuse in high-level dataflow systems for big data. In: 16th IEEE International Conference on Computational Science and Engineering, CSE 2013, December 3\u20135, 2013, Sydney, Australia, pp. 894\u2013901 (2013)"},{"key":"7271_CR20","doi-asserted-by":"crossref","unstructured":"Laga, A., Boukhobza, J., Koskas, M., Singhoff, F.: Lynx: a learning linux prefetching mechanism for SSD performance model. In: NVMSA, pp. 1\u20136 (2016)","DOI":"10.1109\/NVMSA.2016.7547186"},{"issue":"2","key":"7271_CR21","doi-asserted-by":"publisher","first-page":"111","DOI":"10.3233\/FI-2018-1734","volume":"163","author":"RF Munir","year":"2018","unstructured":"Munir, R.F., Nadal, S., Romero, O., Abell\u00f3, A., Jovanovic, P., Thiele, M., Lehner, W.: Intermediate results materialization selection and format for data-intensive flows. Fundam. Inf. 163(2), 111\u2013138 (2018)","journal-title":"Fundam. Inf."},{"key":"7271_CR22","unstructured":"Munir, R.F., Romero, O., Abell\u00f3, A., Bilalli, B., Thiele, M., Lehner, W.: Resilientstore: A heuristic-based data format selector for intermediate results. In: Model and Data Engineering - 6th International Conference, MEDI 2016, Almer\u00eda, Spain, September 21\u201323, 2016, Proceedings, pp. 42\u201356 (2016)"},{"issue":"1\u20132","key":"7271_CR23","doi-asserted-by":"publisher","first-page":"494","DOI":"10.14778\/1920841.1920906","volume":"3","author":"T Nykiel","year":"2010","unstructured":"Nykiel, T., Potamias, M., Mishra, C., Kollios, G., Koudas, N.: Mrshare: sharing across multiple queries in mapreduce. Proc. VLDB Endow. 3(1\u20132), 494\u2013505 (2010)","journal-title":"Proc. VLDB Endow."},{"issue":"11","key":"7271_CR24","first-page":"1080","volume":"6","author":"V Raman","year":"2013","unstructured":"Raman, V., Attaluri, G.K., Barber, R., Chainani, N., Kalmuk, D., KulandaiSamy, V., Leenstra, J., Lightstone, S., Liu, S., Lohman, G.M., Malkemus, T., M\u00fcller, R., Pandis, I., Schiefer, B., Sharpe, D., Sidle, R., Storm, A.J., Zhang, L.: DB2 with BLU acceleration: so much more than just a column store. PVLDB 6(11), 1080\u20131091 (2013)","journal-title":"PVLDB"},{"key":"7271_CR25","unstructured":"Schaarschmidt, M., Gessert, F., Ritter, N.: Towards automated polyglot persistence. In: Datenbanksysteme f\u00fcr Business, Technologie und Web (BTW), 16. Fachtagung des GI-Fachbereichs \u201cDatenbanken und Informationssysteme\u201d (DBIS), 4.-6.3.2015 in Hamburg, Germany. Proceedings, pp. 73\u201382 (2015)"},{"issue":"2","key":"7271_CR26","first-page":"6","volume":"35","author":"KV Shvachko","year":"2010","unstructured":"Shvachko, K.V.: Hdfs scalability: the limits to growth. Login 35(2), 6\u201316 (2010)","journal-title":"Login"},{"key":"7271_CR27","doi-asserted-by":"crossref","unstructured":"Silva, Y.N., Larson, P.A., Zhou, J.: Exploiting common subexpressions for cloud query processing. In: Proceedings of the 2012 IEEE 28th International Conference on Data Engineering, pp. 1337\u20131348 (2012)","DOI":"10.1109\/ICDE.2012.106"}],"container-title":["Distributed and Parallel Databases"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10619-019-07271-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10619-019-07271-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10619-019-07271-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,6]],"date-time":"2020-05-06T23:12:45Z","timestamp":1588806765000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10619-019-07271-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,5,8]]},"references-count":27,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2020,6]]}},"alternative-id":["7271"],"URL":"https:\/\/doi.org\/10.1007\/s10619-019-07271-0","relation":{},"ISSN":["0926-8782","1573-7578"],"issn-type":[{"value":"0926-8782","type":"print"},{"value":"1573-7578","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,5,8]]},"assertion":[{"value":"8 May 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}