{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T14:44:35Z","timestamp":1777128275299,"version":"3.51.4"},"reference-count":293,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2018,9,18]],"date-time":"2018-09-18T00:00:00Z","timestamp":1537228800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Knowl Inf Syst"],"published-print":{"date-parts":[[2019,9]]},"DOI":"10.1007\/s10115-018-1248-0","type":"journal-article","created":{"date-parts":[[2018,9,18]],"date-time":"2018-09-18T16:29:13Z","timestamp":1537288153000},"page":"1165-1245","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":117,"title":["The big data system, components, tools, and technologies: a survey"],"prefix":"10.1007","volume":"60","author":[{"given":"T. Ramalingeswara","family":"Rao","sequence":"first","affiliation":[]},{"given":"Pabitra","family":"Mitra","sequence":"additional","affiliation":[]},{"given":"Ravindara","family":"Bhatt","sequence":"additional","affiliation":[]},{"given":"A.","family":"Goswami","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,9,18]]},"reference":[{"key":"1248_CR1","unstructured":"The size of the world wide web (the internet). http:\/\/worldwidewebsize.com\/"},{"issue":"7433","key":"1248_CR2","doi-asserted-by":"publisher","first-page":"473","DOI":"10.1038\/493473a","volume":"493","author":"CA Mattmann","year":"2013","unstructured":"Mattmann CA (2013) Computing: a vision for data science. Nature 493(7433):473\u2013475","journal-title":"Nature"},{"key":"1248_CR3","unstructured":"National Aeronautics and Space Administration. https:\/\/www.nasa.gov\/"},{"key":"1248_CR4","unstructured":"Clavin W (2013) Managing the deluge of \u2018big data\u2019 from space. NASA Jet Propulsion Labratory"},{"issue":"15","key":"1248_CR5","doi-asserted-by":"publisher","first-page":"2787","DOI":"10.1016\/j.comnet.2010.05.010","volume":"54","author":"L Atzori","year":"2010","unstructured":"Atzori L, Iera A, Morabito G (2010) The internet of things: a survey. Comput Netw 54(15):2787\u20132805","journal-title":"Comput Netw"},{"key":"1248_CR6","unstructured":"SCB Intelligence (2008) Six technologies with potential impacts on us interests out to 2025. National Intelligent Concil, Tech. Rep"},{"issue":"1","key":"1248_CR7","doi-asserted-by":"publisher","first-page":"531","DOI":"10.1109\/COMST.2016.2610963","volume":"19","author":"S Yu","year":"2017","unstructured":"Yu S, Liu M, Dou W, Liu X, Zhou S (2017) Networking for big data: a survey. IEEE Commun Surv Tutor 19(1):531\u2013549","journal-title":"IEEE Commun Surv Tutor"},{"issue":"1","key":"1248_CR8","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1145\/3150226","volume":"51","author":"S Pouyanfar","year":"2018","unstructured":"Pouyanfar S, Yang Y, Chen S-C, Shyu M-L, Iyengar SS (2018) Multimedia big data analytics: a survey. ACM Comput Surv 51(1):10","journal-title":"ACM Comput Surv"},{"key":"1248_CR9","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1016\/j.jnca.2017.04.002","volume":"88","author":"FA Alaba","year":"2017","unstructured":"Alaba FA, Othman M, Hashem IAT, Alotaibi F (2017) Internet of things security: a survey. J Netw Comput Appl 88:10\u201328","journal-title":"J Netw Comput Appl"},{"key":"1248_CR10","unstructured":"Zikopoulos P, Eaton C, et\u00a0al (2011) Understanding big data: analytics for enterprise class hadoop and streaming data. ISBN: 0071790535"},{"issue":"2","key":"1248_CR11","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1007\/s11036-013-0489-0","volume":"19","author":"M Chen","year":"2014","unstructured":"Chen M, Mao S, Liu Y (2014) Big data: a survey. Mob Netw Appl 19(2):171\u2013209","journal-title":"Mob Netw Appl"},{"key":"1248_CR12","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1016\/j.is.2014.07.006","volume":"47","author":"IAT Hashem","year":"2015","unstructured":"Hashem IAT, Yaqoob I, Anuar NB, Mokhtar S, Gani A, Khan SU (2015) The rise of big data on cloud computing: review and open research issues. Inf Syst 47:98\u2013115","journal-title":"Inf Syst"},{"issue":"12","key":"1248_CR13","doi-asserted-by":"publisher","first-page":"798","DOI":"10.1016\/j.tplants.2014.08.004","volume":"19","author":"C Ma","year":"2014","unstructured":"Ma C, Zhang HH, Wang X (2014) Machine learning for big data analytics in plants. Trends Plant Sci 19(12):798\u2013808","journal-title":"Trends Plant Sci"},{"key":"1248_CR14","unstructured":"Laney D (2013) 3d data management: controlling data volume, velocity and variety. META Group Research Note 6(70), 1"},{"issue":"2","key":"1248_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2481244.2481246","volume":"14","author":"W Fan","year":"2013","unstructured":"Fan W, Bifet A (2013) Mining big data: current status, and forecast to the future. ACM sIGKDD Explor Newsl 14(2):1\u20135","journal-title":"ACM sIGKDD Explor Newsl"},{"key":"1248_CR16","doi-asserted-by":"crossref","unstructured":"Demchenko Y, De\u00a0Laat C, Membrey P (2014) Defining architecture components of the big data ecosystem. In: Collaboration technologies and systems (CTS), 2014 international conference on, pp 104\u2013112","DOI":"10.1109\/CTS.2014.6867550"},{"issue":"5","key":"1248_CR17","first-page":"380","volume":"4","author":"A Fern\u00e1ndez","year":"2014","unstructured":"Fern\u00e1ndez A, del R\u00edo S, L\u00f3pez V, Bawakid A, del Jesus MJ, Ben\u00edtez JM, Herrera F (2014) Big data with cloud computing: an insight on the computing environment, mapreduce, and programming frameworks. Wiley Interdiscip Rev: Data Min Knowl Discov 4(5):380\u2013409","journal-title":"Wiley Interdiscip Rev: Data Min Knowl Discov"},{"key":"1248_CR18","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.jpdc.2014.08.003","volume":"79","author":"MD Assun\u00e7\u00e3o","year":"2015","unstructured":"Assun\u00e7\u00e3o MD, Calheiros RN, Bianchi S, Netto MAS, Buyya R (2015) Big data computing and clouds: trends and future directions. J Parallel Distrib Comput 79:3\u201315","journal-title":"J Parallel Distrib Comput"},{"key":"1248_CR19","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1016\/j.cosrev.2015.05.002","volume":"17","author":"CK Emani","year":"2015","unstructured":"Emani CK, Cullot N, Nicolle C (2015) Understandable big data: a survey. Comput Sci Rev 17:70\u201381","journal-title":"Comput Sci Rev"},{"key":"1248_CR20","doi-asserted-by":"publisher","first-page":"937","DOI":"10.1016\/j.rser.2015.07.128","volume":"52","author":"B-A Schuelke-Leech","year":"2015","unstructured":"Schuelke-Leech B-A, Barry B, Muratori M, Yurkovich BJ (2015) Big data issues and opportunities for electric utilities. Renew Sustain Energy Rev 52:937\u2013947","journal-title":"Renew Sustain Energy Rev"},{"issue":"6","key":"1248_CR21","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1109\/MIS.2015.110","volume":"30","author":"DE O\u2019Leary","year":"2015","unstructured":"O\u2019Leary DE (2015) Big data and privacy: emerging issues. IEEE Intell Syst 30(6):92\u201396","journal-title":"IEEE Intell Syst"},{"issue":"1","key":"1248_CR22","first-page":"79","volume":"46","author":"R Kune","year":"2016","unstructured":"Kune R, Konugurthi PK, Agarwal A, Chillarige RR, Buyya R (2016) The anatomy of big data computing. Softw: Pract Exp 46(1):79\u2013105","journal-title":"Softw: Pract Exp"},{"key":"1248_CR23","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1016\/j.inffus.2015.08.005","volume":"28","author":"G Bello-Orgaz","year":"2016","unstructured":"Bello-Orgaz G, Jung JJ, Camacho D (2016) Social big data: recent achievements and new challenges. Inf Fusion 28:45\u201359","journal-title":"Inf Fusion"},{"issue":"3","key":"1248_CR24","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1007\/s10723-016-9371-1","volume":"14","author":"F Bajaber","year":"2016","unstructured":"Bajaber F, Elshawi R, Batarfi O, Altalhi A, Barnawi A, Sakr S (2016) Big data 2.0 processing systems: taxonomy and open challenges. J Grid Comput 14(3):379\u2013405","journal-title":"J Grid Comput"},{"key":"1248_CR25","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1016\/j.infsof.2017.06.001","volume":"90","author":"S Nadal","year":"2017","unstructured":"Nadal S, Herrero V, Romero O, Abell A, Franch X, Vansummeren S, Valerio D (2017) A software reference architecture for semantic-aware big data systems. Inf Softw Technol 90:75\u201392","journal-title":"Inf Softw Technol"},{"key":"1248_CR26","unstructured":"Big data and veracity challenges. https:\/\/www.isical.ac.in\/~acmsc\/TMW2014\/LVS.pdf"},{"issue":"2","key":"1248_CR27","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1016\/j.ijinfomgt.2014.10.007","volume":"35","author":"A Gandomi","year":"2015","unstructured":"Gandomi A, Haider M (2015) Beyond the hype: big data concepts, methods, and analytics. Int J Inf Manag 35(2):137\u2013144","journal-title":"Int J Inf Manag"},{"issue":"3","key":"1248_CR28","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1016\/j.bushor.2017.01.004","volume":"60","author":"I Lee","year":"2017","unstructured":"Lee I (2017) Big data: dimensions, evolution, impacts, and challenges. Bus Horiz 60(3):293\u2013303","journal-title":"Bus Horiz"},{"key":"1248_CR29","unstructured":"Kung S-Y (2015) Visualization of big data. In: Cognitive informatics and cognitive computing (ICCI* CC), 2015 IEEE 14th international conference on, pp 447\u2013448"},{"key":"1248_CR30","doi-asserted-by":"crossref","unstructured":"Strohbach M, Ziekow H, Gazis V, Akiva N (2015) Towards a big data analytics framework for IoT and smart city applications. In: Modeling and processing for next-generation big-data technologies. pp 257\u2013282. ISBN: 14-9783319385006","DOI":"10.1007\/978-3-319-09177-8_11"},{"issue":"1","key":"1248_CR31","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1109\/TKDE.2013.109","volume":"26","author":"X Wu","year":"2014","unstructured":"Wu X, Zhu X, Wu G-Q, Ding W (2014) Data mining with big data. IEEE Trans Knowl Data Eng 26(1):97\u2013107","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"5","key":"1248_CR32","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1109\/MIS.2015.56","volume":"30","author":"X Wu","year":"2015","unstructured":"Wu X, Chen H, Wu G, Liu J, Zheng Q, He X, Zhou A, Zhao Z-Q, Wei B, Ming G (2015) Knowledge engineering with big data. IEEE Intell Syst 30(5):46\u201355","journal-title":"IEEE Intell Syst"},{"key":"1248_CR33","doi-asserted-by":"publisher","first-page":"12696","DOI":"10.1109\/ACCESS.2017.2710298","volume":"5","author":"X Wu","year":"2017","unstructured":"Wu X, Chen H, Liu J, Gongqing W, Ruqian L, Zheng N (2017) Knowledge engineering with big data (bigke): a 54-month, 45-million rmb, 15-institution national grand project. IEEE Access 5:12696\u201312701","journal-title":"IEEE Access"},{"key":"1248_CR34","unstructured":"Venner J, Wadkar S, Siddalingaiah M (2014) Pro apache hadoop. ISBN-13: 9781430248637"},{"key":"1248_CR35","doi-asserted-by":"crossref","unstructured":"Pavlo A, Paulson E, Rasin A, Abadi DJ, DeWitt DJ, Madden S, Stonebraker M (2009) A comparison of approaches to large-scale data analysis. In: Proceedings of the 2009 ACM SIGMOD international conference on management of data, pp 165\u2013178","DOI":"10.1145\/1559845.1559865"},{"key":"1248_CR36","unstructured":"Teradata. http:\/\/www.teradata.com\/Press-Releases\/2016\/Teradata-Announces-the-World%E2%80%99s-Most-Powerful"},{"key":"1248_CR37","doi-asserted-by":"crossref","unstructured":"Chang L, Wang Z, Ma T, Jian L, Ma L, Goldshuv A, Lonergan L, Cohen J, Welton C, Sherry G et al (2014) HAWQ: a massively parallel processing SQL engine in hadoop. In: Proceedings of the 2014 ACM SIGMOD international conference on management of data, pp 1223\u20131234","DOI":"10.1145\/2588555.2595636"},{"key":"1248_CR38","unstructured":"Greenplum architecture. http:\/\/greenplum.org\/gpdb-sandbox-tutorials\/ introduction-greenplum-database-architecture\/"},{"key":"1248_CR39","unstructured":"Ibm netezza. https:\/\/www-01.ibm.com\/software\/data\/netezza\/"},{"issue":"1","key":"1248_CR40","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean J, Ghemawat S (2008) Mapreduce: simplified data processing on large clusters. Commun ACM 51(1):107\u2013113","journal-title":"Commun ACM"},{"issue":"8","key":"1248_CR41","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1145\/79173.79181","volume":"33","author":"LG Valiant","year":"1990","unstructured":"Valiant LG (1990) A bridging model for parallel computation. Commun ACM 33(8):103\u2013111","journal-title":"Commun ACM"},{"issue":"5","key":"1248_CR42","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1145\/2901919","volume":"59","author":"A Lenharth","year":"2016","unstructured":"Lenharth A, Nguyen D, Pingali K (2016) Parallel graph analytics. Commun ACM 59(5):78\u201387","journal-title":"Commun ACM"},{"key":"1248_CR43","unstructured":"Apache hama project. https:\/\/hama.apache.org\/"},{"key":"1248_CR44","doi-asserted-by":"crossref","unstructured":"Malewicz G, Austern MH, Bik AJC, Dehnert JC, Horn I, Leiser N, Czajkowski G (2010) Pregel: a system for large-scale graph processing. In: Proceedings of the 2010 ACM SIGMOD international conference on management of data, pp 135\u2013146","DOI":"10.1145\/1807167.1807184"},{"key":"1248_CR45","unstructured":"Apache giraph project. http:\/\/giraph.apache.org\/"},{"issue":"7","key":"1248_CR46","doi-asserted-by":"publisher","first-page":"1920","DOI":"10.1109\/TKDE.2015.2427795","volume":"27","author":"H Zhang","year":"2015","unstructured":"Zhang H, Chen G, Ooi BC, Tan K-L, Zhang M (2015) In-memory big data management and processing: a survey. IEEE Trans Knowl Data Eng 27(7):1920\u20131948","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"1248_CR47","doi-asserted-by":"crossref","unstructured":"Cai Q, Zhang H, Guo W, Chen G, Ooi BC, Tan K-L, Wong WF (2018) Memepic: towards a unified in-memory big data management system. IEEE Trans Big Data","DOI":"10.1109\/TBDATA.2017.2789286"},{"key":"1248_CR48","unstructured":"Lim H, Han D, Andersen DG, Kaminsky M (2014) Mica: a holistic approach to fast in-memory key-value storage. USENIX, pp 429\u2013444"},{"issue":"6","key":"1248_CR49","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1134\/S0361768814060152","volume":"40","author":"SD Kuznetsov","year":"2014","unstructured":"Kuznetsov SD, Poskonin AV (2014) Nosql data management systems. Program Comput Softw 40(6):323\u2013332","journal-title":"Program Comput Softw"},{"key":"1248_CR50","unstructured":"In-memory storage engine. https:\/\/docs.mongodb.com\/manual\/core\/inmemory\/"},{"key":"1248_CR51","doi-asserted-by":"publisher","first-page":"314","DOI":"10.1016\/j.ins.2014.01.015","volume":"275","author":"CLP Chen","year":"2014","unstructured":"Chen CLP, Zhang C-Y (2014) Data-intensive applications, challenges, techniques and technologies: a survey on big data. Inf Sci 275:314\u2013347","journal-title":"Inf Sci"},{"issue":"12","key":"1248_CR52","doi-asserted-by":"publisher","first-page":"1452","DOI":"10.1016\/j.datak.2009.07.010","volume":"68","author":"J-N Maz\u00f3n","year":"2009","unstructured":"Maz\u00f3n J-N, Lechtenb\u00f6rger J, Trujillo J (2009) A survey on summarizability issues in multidimensional modeling. Data Knowl Eng 68(12):1452\u20131469","journal-title":"Data Knowl Eng"},{"key":"1248_CR53","doi-asserted-by":"publisher","first-page":"652","DOI":"10.1109\/ACCESS.2014.2332453","volume":"2","author":"H Hu","year":"2014","unstructured":"Hu H, Wen Y, Chua T-S, Li X (2014) Toward scalable systems for big data analytics: a technology tutorial. IEEE Access 2:652\u2013687","journal-title":"IEEE Access"},{"key":"1248_CR54","first-page":"1","volume":"1142","author":"J Gantz","year":"2011","unstructured":"Gantz J, Reinsel D (2011) Extracting value from chaos. IDC iview 1142:1\u201312","journal-title":"IDC iview"},{"issue":"1","key":"1248_CR55","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/MC.2009.26","volume":"42","author":"RT Kouzes","year":"2009","unstructured":"Kouzes RT, Anderson GA, Elbert ST, Gorton I, Gracio DK (2009) The changing paradigm of data-intensive computing. IEEE Comput 42(1):26\u201334","journal-title":"IEEE Comput"},{"issue":"12","key":"1248_CR56","doi-asserted-by":"publisher","first-page":"2032","DOI":"10.14778\/2367502.2367572","volume":"5","author":"A Labrinidis","year":"2012","unstructured":"Labrinidis A, Jagadish HV (2012) Challenges and opportunities with big data. Proc VLDB Endow 5(12):2032\u20132033","journal-title":"Proc VLDB Endow"},{"key":"1248_CR57","unstructured":"UN Global Pulse (2012) Big data for development: challenges and opportunities. UN Global Pulse, New York"},{"issue":"7","key":"1248_CR58","doi-asserted-by":"publisher","first-page":"2561","DOI":"10.1016\/j.jpdc.2014.01.003","volume":"74","author":"K Kambatla","year":"2014","unstructured":"Kambatla K, Kollias G, Kumar V, Grama A (2014) Trends in big data analytics. J Parallel Distrib Comput 74(7):2561\u20132573","journal-title":"J Parallel Distrib Comput"},{"key":"1248_CR59","doi-asserted-by":"crossref","unstructured":"Chen Y, Qin X, Bian H, Chen J, Dong Z, Du X, Gao Y, Liu D, Lu J, Zhang H (2014) A study of SQL-on-hadoop systems. In: Workshop on big data benchmarks, performance optimization, and emerging hardware, pp 154\u2013166","DOI":"10.1007\/978-3-319-13021-7_12"},{"issue":"1","key":"1248_CR60","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1756-0381-7-22","volume":"7","author":"EA Mohammed","year":"2014","unstructured":"Mohammed EA, Far BH, Naugler C (2014) Applications of the mapreduce programming framework to clinical big data analysis: current landscape and future trends. BioData Min 7(1):1","journal-title":"BioData Min"},{"issue":"1","key":"1248_CR61","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1080\/17538947.2016.1239771","volume":"10","author":"C Yang","year":"2017","unstructured":"Yang C, Huang Q, Li Z, Liu K, Hu F (2017) Big data and cloud computing: innovation opportunities and challenges. Int J Digit Earth 10(1):13\u201353","journal-title":"Int J Digit Earth"},{"key":"1248_CR62","doi-asserted-by":"crossref","unstructured":"Oussous A, Benjelloun F-Z, Lahcen AA, Belfkih S (2017) Big data technologies: a survey. J King Saud Univ-Comput Inf Sci","DOI":"10.1016\/j.jksuci.2017.06.001"},{"key":"1248_CR63","doi-asserted-by":"crossref","unstructured":"Salloum S, Dautov R, Chen X, Peng PX, Huang JZ (2016) Big data analytics on apache spark. Int J Data Sci Anal, pp 1\u201320","DOI":"10.1007\/s41060-016-0027-9"},{"key":"1248_CR64","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.jnca.2017.12.001","volume":"103","author":"MD de Assuncao","year":"2018","unstructured":"de Assuncao MD, da Silva Veith A, Buyya R (2018) Distributed data stream processing and edge computing: a survey on resource elasticity and future directions. J Netw Comput Appl 103:1\u201317","journal-title":"J Netw Comput Appl"},{"issue":"7","key":"1248_CR65","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1109\/MPRV.2008.85","volume":"4","author":"J Krumm","year":"2008","unstructured":"Krumm J, Davies N, Narayanaswami C (2008) User-generated content. IEEE Pervasive Comput 4(7):10\u201311","journal-title":"IEEE Pervasive Comput"},{"key":"1248_CR66","unstructured":"White paper: How machine data supports gdpr compliance. https:\/\/www.splunk.com\/pdfs\/white-papers\/splunk-how-machine-data-dupports-gdpr-compliance.pdf"},{"key":"1248_CR67","unstructured":"Shameer K, Badgeley MA, Miotto R, Glicksberg BS, Morgan JW, Dudley JT (2016) Translational bioinformatics in the era of real-time biomedical, health care and wellness data streams. Briefings in Bioinformatics, bbv118"},{"issue":"7453","key":"1248_CR68","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1038\/498255a","volume":"498","author":"V Marx","year":"2013","unstructured":"Marx V (2013) Biology: the big challenges of big data. Nature 498(7453):255\u2013260","journal-title":"Nature"},{"issue":"D1","key":"1248_CR69","doi-asserted-by":"publisher","first-page":"D21","DOI":"10.1093\/nar\/gkx1154","volume":"46","author":"CE Cook","year":"2017","unstructured":"Cook CE, Bergman MT, Cochrane G, Apweiler R, Birney E (2017) The european bioinformatics institute in 2017: data coordination and integration. Nucleic Acids Res 46(D1):D21\u2013D29","journal-title":"Nucleic Acids Res"},{"issue":"2","key":"1248_CR70","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1007\/s12525-016-0219-0","volume":"26","author":"S Akter","year":"2016","unstructured":"Akter S, Wamba SF (2016) Big data analytics in e-commerce: a systematic review and agenda for future research. Electron Mark 26(2):173\u2013194","journal-title":"Electron Mark"},{"key":"1248_CR71","unstructured":"Aws: streaming data. https:\/\/aws.amazon.com\/streaming-data\/"},{"key":"1248_CR72","unstructured":"Groenfeldt T, At nyse, the data deluge overwhelms traditional databases. https:\/\/www.forbes.com\/sites\/tomgroenfeldt\/2013\/02\/14\/at-nyse-the-data-deluge-overwhelms-traditional-databases\/#25cda10f5aab"},{"key":"1248_CR73","doi-asserted-by":"crossref","unstructured":"Sun J, Reddy CK (2013) Big data analytics for healthcare. In: Proceedings of the 19th ACM SIGKDD international conference on Knowledge discovery and data mining, pp 1525\u20131525","DOI":"10.1145\/2487575.2506178"},{"issue":"1\u20132","key":"1248_CR74","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s00607-015-0471-8","volume":"98","author":"R Ranjan","year":"2016","unstructured":"Ranjan R, Georgakopoulos D, Wang L (2016) A note on software tools and technologies for delivering smart media-optimized big data applications in the cloud. Computing 98(1\u20132):1\u20135","journal-title":"Computing"},{"key":"1248_CR75","unstructured":"Lloyd MD, Minor B. Harnessing the power of data in health. https:\/\/med.stanford.edu\/content\/dam\/sm\/sm-news\/documents\/StanfordMedicineHealthTrendsWhitePaper2017.pdf"},{"key":"1248_CR76","unstructured":"Twitter statistics and facts. https:\/\/www.statista.com\/topics\/737\/twitter\/"},{"key":"1248_CR77","unstructured":"Twitter by the numbers: stats, demographics and fun facts. https:\/\/www.omnicoreagency.com\/twitter-statistics\/"},{"key":"1248_CR78","unstructured":"Number of monthly active facebook users worldwide as of 4th quarter 2017. https:\/\/www.statista.com\/statistics\/264810\/number-of-monthly-active-facebook-users-worldwide\/"},{"key":"1248_CR79","doi-asserted-by":"crossref","unstructured":"Rob Kitchin (2017) Big data. The International Encyclopedia of Geography","DOI":"10.1002\/9781118786352.wbieg0145"},{"issue":"3","key":"1248_CR80","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1109\/MC.2015.62","volume":"48","author":"VN Gudivada","year":"2017","unstructured":"Gudivada VN, Baeza-Yates RA, Raghavan VV (2017) Big data: promises and problems. IEEE Comput 48(3):20\u201323","journal-title":"IEEE Comput"},{"issue":"4","key":"1248_CR81","doi-asserted-by":"publisher","first-page":"2347","DOI":"10.1109\/COMST.2015.2444095","volume":"17","author":"A Al-Fuqaha","year":"2015","unstructured":"Al-Fuqaha A, Guizani M, Mohammadi M, Aledhari M, Ayyash M (2015) Internet of things: a survey on enabling technologies, protocols, and applications. IEEE Commun Surv Tutor 17(4):2347\u20132376","journal-title":"IEEE Commun Surv Tutor"},{"key":"1248_CR82","doi-asserted-by":"publisher","first-page":"202","DOI":"10.1016\/j.tourman.2016.06.006","volume":"57","author":"J Raun","year":"2016","unstructured":"Raun J, Ahas R, Tiru M (2016) Measuring tourism destinations using mobile tracking data. Tour Manag 57:202\u2013212","journal-title":"Tour Manag"},{"key":"1248_CR83","doi-asserted-by":"crossref","unstructured":"Kitchin R (2014) The data revolution: Big data, open data, data infrastructures and their consequences. Sage, ISBN: 13-9781446287484","DOI":"10.4135\/9781473909472"},{"key":"1248_CR84","doi-asserted-by":"crossref","unstructured":"Abiteboul S, Manolescu I, Rigaux P, Rousset M-C, Senellart P (2011) Web data management. Cambridge University Press, ISBN-13: 9781107012431","DOI":"10.1017\/CBO9780511998225"},{"key":"1248_CR85","doi-asserted-by":"crossref","unstructured":"Ghemawat S, Gobioff H, Leung S-T (2003) The google file system. In: ACM SIGOPS operating systems review, vol 37, pp 29\u201343","DOI":"10.1145\/945445.945450"},{"issue":"7209","key":"1248_CR86","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1038\/455016a","volume":"455","author":"C Doctorow","year":"2008","unstructured":"Doctorow C (2008) Big data: welcome to the petacenre. Nat News 455(7209):16\u201321","journal-title":"Nat News"},{"issue":"11","key":"1248_CR87","doi-asserted-by":"publisher","first-page":"1092","DOI":"10.14778\/2536222.2536234","volume":"6","author":"M Ovsiannikov","year":"2013","unstructured":"Ovsiannikov M, Rus S, Reeves D, Sutter P, Rao S, Kelly J (2013) The quantcast file system. Proc VLDB Endow 6(11):1092\u20131101","journal-title":"Proc VLDB Endow"},{"key":"1248_CR88","doi-asserted-by":"crossref","unstructured":"Guerraoui R, Schiper A (1996) Fault-tolerance by replication in distributed systems. In: International conference on reliable software technologies, pp 38\u201357","DOI":"10.1007\/BFb0013477"},{"key":"1248_CR89","doi-asserted-by":"crossref","unstructured":"Wiesmann M, Pedone F, Schiper A, Kemme B, Alonso G (2000) Understanding replication in databases and distributed systems. In: Distributed computing systems, 2000. Proceedings of 20th international conference on, pp 464\u2013474","DOI":"10.1109\/ICDCS.2000.840959"},{"key":"1248_CR90","doi-asserted-by":"crossref","unstructured":"Shvachko K, Kuang H, Radia S, Chansler R (2010) The hadoop distributed file system. In: 2010 IEEE 26th symposium on mass storage systems and technologies (MSST), pp 1\u201310","DOI":"10.1109\/MSST.2010.5496972"},{"key":"1248_CR91","unstructured":"Hdfs architecture. https:\/\/hadoop.apache.org\/docs\/stable\/hadoop-project-dist\/hadoop-hdfs\/HdfsDesign.html"},{"key":"1248_CR92","unstructured":"Schmuck FB, Haskin RL (2002) Gpfs: a shared-disk file system for large computing clusters. In: FAST, vol 2, pp 231\u2013244"},{"key":"1248_CR93","doi-asserted-by":"crossref","unstructured":"Jones T, Koniges AE, Yates RK (2000) Performance of the IBM general parallel file system. In: IPDPS, pp 673\u2013681","DOI":"10.1109\/IPDPS.2000.846052"},{"key":"1248_CR94","unstructured":"Limitations: The IBM SONAS system. https:\/\/www.ibm.com\/support\/knowledgecenter\/en\/STAV45\/com.ibm.sonas.doc\/adm_limitations.h"},{"key":"1248_CR95","unstructured":"Thanh TD, Mohan S, Choi E, Kim SB, Kim P (2008) A taxonomy and survey on distributed file systems. In: Networked computing and advanced information management, 2008. NCM\u201908. Fourth international conference on 1, pp 144\u2013149"},{"key":"1248_CR96","first-page":"1","volume":"10","author":"D Beaver","year":"2010","unstructured":"Beaver D, Kumar S, Li HC, Sobel J, Vajgel P (2010) Finding a needle in haystack: facebook\u2019s photo storage. OSDI 10:1\u20138","journal-title":"OSDI"},{"key":"1248_CR97","unstructured":"Fetterly D, Haridasan M, Isard M, Sundararaman S (2011) Tidyfs: a simple and small distributed file system. In: USENIX annual technical conference, pp 34\u201334"},{"key":"1248_CR98","unstructured":"Quantcast file system. https:\/\/www.quantcast.com\/wp-content\/uploads\/2012\/09\/QC-QFS-One-Pager2.pdf"},{"key":"1248_CR99","unstructured":"Mapr file system. https:\/\/maprdocs.mapr.com\/52\/MapROverview\/c_maprfs.html"},{"key":"1248_CR100","doi-asserted-by":"crossref","unstructured":"Brewer E (2010) A certain freedom: thoughts on the cap theorem. In: Proceedings of the 29th ACM SIGACT-SIGOPS symposium on principles of distributed computing, pp 335\u2013335","DOI":"10.1145\/1835698.1835701"},{"issue":"1","key":"1248_CR101","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-015-0025-0","volume":"2","author":"JR Louren\u00e7o","year":"2015","unstructured":"Louren\u00e7o JR, Cabral B, Carreiro P, Vieira M, Bernardino J (2015) Choosing the right nosql database for the job: a quality attribute evaluation. J Big Data 2(1):1\u201326","journal-title":"J Big Data"},{"key":"1248_CR102","unstructured":"Buyya R, Calheiros RN, Dastjerdi AV (2016) Big data: principles and paradigms. Morgan Kaufmann, ISBN-13: 9780128053942"},{"issue":"3","key":"1248_CR103","first-page":"197","volume":"5","author":"D Abadi","year":"2013","unstructured":"Abadi D, Boncz P, Harizopoulos S, Idreos S, Madden S et al (2013) The design and implementation of modern column-oriented database systems. Now 5(3):197\u2013280","journal-title":"Now"},{"key":"1248_CR104","unstructured":"Matei G, Bank RC (2010) Column-oriented databases, an alternative for analytical environment. Database Syst J 1(2):3\u201316"},{"issue":"7","key":"1248_CR105","doi-asserted-by":"publisher","first-page":"419","DOI":"10.14778\/1988776.1988778","volume":"4","author":"A Floratou","year":"2011","unstructured":"Floratou A, Patel JM, Shekita EJ, Tata S (2011) Column-oriented storage techniques for mapreduce. Proc VLDB Endow 4(7):419\u2013429","journal-title":"Proc VLDB Endow"},{"issue":"2","key":"1248_CR106","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1365815.1365816","volume":"26","author":"F Chang","year":"2008","unstructured":"Chang F, Dean J, Ghemawat S, Hsieh WC, Wallach DA, Burrows M, Chandra T, Fikes A, Gruber RE (2008) Bigtable: a distributed storage system for structured data. ACM Trans Comput Syst 26(2):1\u201326","journal-title":"ACM Trans Comput Syst"},{"issue":"2","key":"1248_CR107","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1145\/1773912.1773922","volume":"44","author":"A Lakshman","year":"2010","unstructured":"Lakshman A, Malik P (2010) Cassandra: a decentralized structured storage system. ACM SIGOPS Oper Syst Rev 44(2):35\u201340","journal-title":"ACM SIGOPS Oper Syst Rev"},{"key":"1248_CR108","unstructured":"Stonebraker M, Abadi DJ, Batkin A, Chen X, Cherniack M, Ferreira M, Lau E, Lin A, Madden S, O\u2019Neil E et al. (2005) C-store: a column-oriented DBMS. In: Proceedings of the 31st international conference on very large data bases, pp 553\u2013564"},{"key":"1248_CR109","first-page":"225","volume":"5","author":"PA Boncz","year":"2005","unstructured":"Boncz PA, Zukowski M, Nes N (2005) Monetdb\/x100: hyper-pipelining query execution. CIDR 5:225\u2013237","journal-title":"CIDR"},{"issue":"1","key":"1248_CR110","first-page":"40","volume":"35","author":"S Idreos","year":"2012","unstructured":"Idreos S, Groffen F, Nes N, Manegold S, Mullender S, Kersten M (2012) Monetdb: two decades of research in column-oriented database architectures. Bull IEEE Comput Soc Tech Comm Data Eng 35(1):40\u201345","journal-title":"Bull IEEE Comput Soc Tech Comm Data Eng"},{"issue":"1","key":"1248_CR111","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1145\/1227504.1227498","volume":"39","author":"E Sciore","year":"2007","unstructured":"Sciore E (2007) Simpledb: a simple java-based multiuser syst for teaching database internals. ACM SIGCSE Bull 39(1):561\u2013565","journal-title":"ACM SIGCSE Bull"},{"issue":"1","key":"1248_CR112","first-page":"21","volume":"35","author":"M Zukowski","year":"2012","unstructured":"Zukowski M, Boncz P (2012) Vectorwise: beyond column stores. IEEE Data Eng Bull 35(1):21\u201327","journal-title":"IEEE Data Eng Bull"},{"key":"1248_CR113","unstructured":"Edward SG, Sabharwal N (2015) Mongodb limitations. In: Practical MongoDB, pp 227\u2013232"},{"key":"1248_CR114","unstructured":"Ravendb project. https:\/\/ravendb.net\/docs\/article-page\/3.0\/csharp"},{"key":"1248_CR115","unstructured":"Cross datacenter replication. http:\/\/docs.couchbase.com\/admin\/admin\/XDCR\/xdcr-intro.html"},{"issue":"6","key":"1248_CR116","doi-asserted-by":"publisher","first-page":"205","DOI":"10.1145\/1323293.1294281","volume":"41","author":"G DeCandia","year":"2007","unstructured":"DeCandia G, Hastorun D, Jampani M, Kakulapati G, Lakshman A, Pilchin A, Sivasubramanian S, Vosshall P, Vogels W (2007) Dynamo: amazon\u2019s highly available key-value store. ACM SIGOPS Oper Syst Rev 41(6):205\u2013220","journal-title":"ACM SIGOPS Oper Syst Rev"},{"key":"1248_CR117","unstructured":"Basho products-riak products. http:\/\/basho.com\/products\/"},{"key":"1248_CR118","unstructured":"Sumbaly R, Kreps J, Gao L, Feinberg A, Soman C, Shah S (2012) Serving large-scale batch computed data with project voldemort. In: Proceedings of the 10th USENIX conference on file and storage technologies, pp 18\u201318"},{"key":"1248_CR119","doi-asserted-by":"crossref","unstructured":"Gudivada VN, Rao D, Raghavan VV (2014) NoSQL systems for big data management. In: 2014 IEEE World congress on services, pp 190\u2013197","DOI":"10.1109\/SERVICES.2014.42"},{"key":"1248_CR120","unstructured":"Allegrograph. https:\/\/franz.com\/agraph\/allegrograph\/"},{"key":"1248_CR121","unstructured":"Hypergraphdb. http:\/\/www.hypergraphdb.org\/"},{"key":"1248_CR122","unstructured":"Infinitegraph. http:\/\/www.objectivity.com\/products\/infinitegraph\/"},{"key":"1248_CR123","unstructured":"Moniruzzaman ABM, Hossain SA (2013) Nosql database: new era of databases for big data analytics-classification, characteristics and comparison. arXiv preprint arXiv:1307.0191"},{"key":"1248_CR124","unstructured":"Apache hbase reference guide. https:\/\/hbase.apache.org\/apache_hbase_reference_guide.pdf"},{"key":"1248_CR125","unstructured":"Transparent data encryption. http:\/\/docs.datastax.com\/en\/archived\/datastax_enterprise\/4.0\/datastax_enterprise\/sec\/secTDE.html"},{"key":"1248_CR126","unstructured":"Khetrapal A, Ganesh V (2006) Hbase and hypertable for large scale distributed storage systems. Dept. of Computer Science, Purdue University, pp 22\u201328"},{"key":"1248_CR127","unstructured":"Apache accumulo project. https:\/\/accumulo.apache.org\/"},{"key":"1248_CR128","unstructured":"Ghaffari Amir, Chechina Natalia, Trinder Phil, Meredith Jon (2013) Scalable persistent storage for Erlang: theory and practice. In: Proceedings of the twelfth ACM SIGPLAN workshop on Erlang, pp 73\u201374"},{"issue":"1","key":"1248_CR129","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1145\/1435417.1435432","volume":"52","author":"W Vogels","year":"2009","unstructured":"Vogels W (2009) Eventually consistent. Commun ACM 52(1):40\u201344","journal-title":"Commun ACM"},{"key":"1248_CR130","unstructured":"Apache hbase project. https:\/\/blogs.apache.org\/hbase\/entry\/hbase_cell_security"},{"key":"1248_CR131","unstructured":"Mongodb mannual. https:\/\/docs.mongodb.org\/manual\/core\/security-encryption-at-rest"},{"key":"1248_CR132","unstructured":"Redis project. https:\/\/redis.io\/"},{"key":"1248_CR133","unstructured":"Random notes on improving the Redis LRU algorithm. http:\/\/antirez.com\/news\/109"},{"key":"1248_CR134","unstructured":"Redis4.0. https:\/\/redislabs.com\/blog\/redis-4-0-0-released\/"},{"key":"1248_CR135","unstructured":"Redis cluster specification. https:\/\/redis.io\/topics\/cluster-spec"},{"key":"1248_CR136","unstructured":"In-memory storage engine. http:\/\/learnmongodbthehardway.com\/schema\/wiredtiger\/"},{"key":"1248_CR137","unstructured":"The apache mahout project. https:\/\/mahout.apache.org\/"},{"key":"1248_CR138","unstructured":"Spark 2.3-mllib guide. https:\/\/spark.apache.org\/releases\/spark-release-2-3-0.html#mllib"},{"key":"1248_CR139","unstructured":"Flinkml: Machine learning for flink. https:\/\/ci.apache.org\/projects\/flink\/flink-docs-release-1.4\/dev\/libs\/ml\/"},{"key":"1248_CR140","unstructured":"Mllib guide. https:\/\/spark.apache.org\/docs\/1.6.2\/mllib-guide.html"},{"issue":"34","key":"1248_CR141","first-page":"1","volume":"17","author":"X Meng","year":"2016","unstructured":"Meng X, Bradley J, Yuvaz B, Sparks E, Venkataraman S, Liu D, Freeman J, Tsai D, Amde M, Owen S et al (2016) Mllib: Machine learning in apache spark. JMLR 17(34):1\u20137","journal-title":"JMLR"},{"issue":"11","key":"1248_CR142","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1145\/2934664","volume":"59","author":"M Zaharia","year":"2016","unstructured":"Zaharia M, Xin RS, Wendell P, Das T, Armbrust M, Dave A, Meng X, Rosen J, Venkataraman S, Franklin MJ (2016) Apache spark: a unified engine for big data processing. Commun ACM 59(11):56\u201365","journal-title":"Commun ACM"},{"key":"1248_CR143","unstructured":"Machine learning library (mllib) guide. https:\/\/spark.apache.org\/docs\/latest\/ml-guide.html"},{"key":"1248_CR144","unstructured":"Different default regparam values in als. https:\/\/issues.apache.org\/jira\/browse\/SPARK-19787"},{"key":"1248_CR145","unstructured":"Spark 2.3, mllib guide. https:\/\/spark.apache.org\/docs\/2.3.0\/ml-guide.html"},{"key":"1248_CR146","first-page":"28","volume":"38","author":"P Carbone","year":"2015","unstructured":"Carbone P, Ewen S, Haridi S, Katsifodimos A, Markl V, Tzoumas K (2015) Apache flink: stream and batch processing in a single engine. Data Eng 38:28\u201338","journal-title":"Data Eng"},{"key":"1248_CR147","unstructured":"Introducing Neo4j Bloom: Graph Data Visualization for Everyone. https:\/\/neo4j.com\/blog\/introducing-neo4j-bloom-graph-data-visualization-for-everyone\/"},{"key":"1248_CR148","unstructured":"Orange documentation https:\/\/orange.biolab.si\/docs\/"},{"issue":"3","key":"1248_CR149","doi-asserted-by":"publisher","first-page":"036106","DOI":"10.1103\/PhysRevE.76.036106","volume":"76","author":"UN Raghavan","year":"2007","unstructured":"Raghavan UN, R\u00e9ka A, Kumara S (2007) Near linear time algorithm to detect community structures in large-scale networks. Phys Rev E 76(3):036106","journal-title":"Phys Rev E"},{"key":"1248_CR150","unstructured":"Chappell D (2015) Introducing azure machine learning. A guide for technical professionals, sponsored by microsoft corporation"},{"key":"1248_CR151","unstructured":"Overview diagram of azure machine learning studio capabilities. https:\/\/docs.microsoft.com\/en-in\/azure\/machine-learning\/studio\/studio-overview-diagram"},{"key":"1248_CR152","unstructured":"Azure capabilities, limitations and support. https:\/\/docs.microsoft.com\/en-us\/azure\/machine-learning\/studio\/faq"},{"key":"1248_CR153","unstructured":"Ibm cloud\/machine learning. https:\/\/console.bluemix.net\/docs\/services\/PredictiveModeling\/index.html#WMLgettingstarted"},{"key":"1248_CR154","unstructured":"Amazon machine learning. https:\/\/aws.amazon.com\/aml\/"},{"key":"1248_CR155","unstructured":"Amazon sagemaker features. https:\/\/aws.amazon.com\/sagemaker\/features\/"},{"key":"1248_CR156","unstructured":"Netflix\u2019s recommendation ml pipeline using apache spark. https:\/\/www.dbtsai.com\/assets\/pdf\/2017-netflixs-recommendation-ml-pipeline-using-apache-spark.pdf"},{"key":"1248_CR157","unstructured":"Role of spark in transforming ebay\u2019s enterprise data platform. https:\/\/databricks.com\/session\/role-of-spark-in-transforming-ebays-enterprise-data-platform"},{"key":"1248_CR158","unstructured":"Number of full-time employees at alibaba from 2012 to 2017. https:\/\/www.statista.com\/statistics\/226794\/number-of-employees-at-alibabacom\/"},{"key":"1248_CR159","unstructured":"Number of active consumers across alibaba\u2019s online shopping. https:\/\/www.statista.com\/statistics\/226927\/alibaba-cumulative-active-online-buyers-taobao-tmall\/"},{"key":"1248_CR160","doi-asserted-by":"crossref","unstructured":"Huang L, Hu G, Lu X (2009) E-business ecosystem and its evolutionary path: the case of the alibaba group in china. Pacific Asia J Assoc Inf Syst 1(4)","DOI":"10.17705\/1pais.01402"},{"key":"1248_CR161","unstructured":"A year of blink at alibaba: apache flink in large scale production. http:\/\/www.dataversity.net\/year-blink-alibaba\/"},{"issue":"6","key":"1248_CR162","first-page":"194","volume":"6","author":"P Gupta","year":"2016","unstructured":"Gupta P, Sharma A, Jindal R (2016) Scalable machine-learning algorithms for big data analytics: a comprehensive review. Wiley Interdiscip Rev: Data Min Knowl Discov 6(6):194\u2013214","journal-title":"Wiley Interdiscip Rev: Data Min Knowl Discov"},{"key":"1248_CR163","unstructured":"Alibaba Blink: Real-time computing for big-time gains. https:\/\/medium.com\/@alitech_2017\/alibaba-blink-real-time-computing-for-big-time-gains-707fdd583c26"},{"issue":"2","key":"1248_CR164","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1177\/0165551515625029","volume":"43","author":"X Ji","year":"2017","unstructured":"Ji X, Chun SA, Cappellari P, Geller J (2017) Linking and using social media data for enhancing public health analytics. J Inf Sci 43(2):221\u2013245","journal-title":"J Inf Sci"},{"key":"1248_CR165","doi-asserted-by":"crossref","unstructured":"Kanaujia PKM, Pandey M, Rautaray SS (2017) Real time financial analysis using big data technologies. In: I-SMAC (IoT in social, mobile, analytics and cloud)(I-SMAC), 2017 international conference on, pp 131\u2013136","DOI":"10.1109\/I-SMAC.2017.8058323"},{"issue":"5","key":"1248_CR166","doi-asserted-by":"publisher","first-page":"697","DOI":"10.1111\/jpim.12405","volume":"34","author":"WW Moe","year":"2017","unstructured":"Moe WW, Schweidel DA (2017) Opportunities for innovation in social media analytics. J Prod Innov Manag 34(5):697\u2013702","journal-title":"J Prod Innov Manag"},{"key":"1248_CR167","doi-asserted-by":"crossref","unstructured":"Psyllidis A, Bozzon A, Bocconi S, Bolivar CT (2015) A platform for urban analytics and semantic data integration in city planning. In: International conference on computer-aided architectural design futures, pp 21\u201336","DOI":"10.1007\/978-3-662-47386-3_2"},{"key":"1248_CR168","unstructured":"Gust G, Flath C, Brandt T, Str\u00f6hle P, Neumann D (2016) Bringing analytics into practice: evidence from the power sector"},{"key":"1248_CR169","doi-asserted-by":"crossref","unstructured":"Nguyen D, Lenharth A, Pingali K (2013) A lightweight infrastructure for graph analytics. In: Proceedings of the twenty-fourth ACM symposium on operating systems principles, pp 456\u2013471","DOI":"10.1145\/2517349.2522739"},{"key":"1248_CR170","doi-asserted-by":"crossref","unstructured":"Baesens B, Van Vlasselaer V, Verbeke W (2015) Fraud analytics: a broader perspective. Fraud analytics using descriptive, predictive, and social network techniques: a guide to data science for fraud detection, pp 313\u2013346","DOI":"10.1002\/9781119146841.ch7"},{"issue":"3","key":"1248_CR171","doi-asserted-by":"publisher","first-page":"1283","DOI":"10.1007\/s10586-016-0581-x","volume":"19","author":"Z Xu","year":"2016","unstructured":"Xu Z, Mei L, Chuanping H, Liu Y (2016) The big data analytics and applications of the surveillance system using video structured description technology. Cluster Comput 19(3):1283\u20131292","journal-title":"Cluster Comput"},{"issue":"1","key":"1248_CR172","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1146\/annurev-financial-110311-101754","volume":"4","author":"D Bisias","year":"2012","unstructured":"Bisias D, Flood M, Lo AW, Valavanis S (2012) A survey of systemic risk analytics. Annu Rev Financ Econ 4(1):255\u2013296","journal-title":"Annu Rev Financ Econ"},{"key":"1248_CR173","doi-asserted-by":"crossref","unstructured":"Sagiroglu S, Sinanc D (2013) Big data: a review. In: Collaboration technologies and systems (CTS), 2013 international conference on, pp 42\u201347","DOI":"10.1109\/CTS.2013.6567202"},{"key":"1248_CR174","unstructured":"Rabkin A, Arye M, Sen S, Pai VS, Freedman MJ (2014) Aggregation and degradation in JetStream: streaming analytics in the wide area. In: NSDI vol 14, 275\u2013288"},{"key":"1248_CR175","doi-asserted-by":"crossref","unstructured":"Zhang L, Stoffel A, Behrisch M, Mittelstadt S, Schreck T, Pompl R, Weber S, Last H, Keim D (2012) Visual analytics for the big data era comparative review of state-of-the-art commercial systems. In: Visual analytics science and technology (VAST), 2012 IEEE conference on, pp 173\u2013182","DOI":"10.1109\/VAST.2012.6400554"},{"issue":"2","key":"1248_CR176","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1111\/jbl.12010","volume":"34","author":"MA Waller","year":"2013","unstructured":"Waller MA, Fawcett SE (2013) Data science, predictive analytics, and big data: a revolution that will transform supply chain design and management. J Bus Logist 34(2):77\u201384","journal-title":"J Bus Logist"},{"issue":"4","key":"1248_CR177","doi-asserted-by":"publisher","first-page":"1165","DOI":"10.2307\/41703503","volume":"36","author":"H Chen","year":"2012","unstructured":"Chen H, Chiang RHL, Storey VC (2012) Business intelligence and analytics: from big data to big impact. MIS Q 36(4):1165\u20131188","journal-title":"MIS Q"},{"issue":"3","key":"1248_CR178","first-page":"1","volume":"4","author":"W Raghupathi","year":"2013","unstructured":"Raghupathi W, Raghupathi V (2013) An overview of health analytics. J Health Med Inform 4(3):1\u201311","journal-title":"J Health Med Inform"},{"key":"1248_CR179","doi-asserted-by":"publisher","DOI":"10.1002\/0470073047","volume-title":"Mining graph data","author":"DJ Cook","year":"2006","unstructured":"Cook DJ, Holder LB (2006) Mining graph data. Wiley, London"},{"issue":"3","key":"1248_CR180","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1016\/j.physrep.2009.11.002","volume":"486","author":"S Fortunato","year":"2010","unstructured":"Fortunato S (2010) Community detection in graphs. Phys Rep 486(3):75\u2013174","journal-title":"Phys Rep"},{"key":"1248_CR181","doi-asserted-by":"crossref","unstructured":"Xin RS, Gonzalez JE, Franklin MJ, Stoica I (2013) Graphx: a resilient distributed graph system on spark. In: First international workshop on graph data management experiences and systems 2(1\u20132):6","DOI":"10.1145\/2484425.2484427"},{"key":"1248_CR182","unstructured":"Low Y, Gonzalez J, Kyrola A, Bickson D, Guestrin C (2011) Graphlab: A distributed framework for machine learning in the cloud. arXiv preprint arXiv:1107.0922"},{"key":"1248_CR183","unstructured":"Introducing gelly: Graph processing with apache flink. https:\/\/flink.apache.org\/news\/2015\/08\/24\/introducing-flink-gelly.html"},{"key":"1248_CR184","unstructured":"Liu B (2007) Web data mining: exploring hyperlinks, contents, and usage data. Springer, Berlin. ISBN-13: 9783642194597"},{"key":"1248_CR185","doi-asserted-by":"crossref","unstructured":"Wesley R, Eldridge M, Terlecki PT (2011) An analytic data engine for visualization in tableau. In: Proceedings of the 2011 ACM SIGMOD international conference on management of data, pp 1185\u20131194","DOI":"10.1145\/1989323.1989449"},{"key":"1248_CR186","unstructured":"Garc\u00eda M, Harmsen B (2012) Qlikview 11 for developers. Packt Publishing Ltd"},{"key":"1248_CR187","unstructured":"JMP https:\/\/www.jmp.com\/en_us\/home.html"},{"key":"1248_CR188","unstructured":"Microstrategy enterprise analytics and mobility. http:\/\/www.microstrategy.com\/us\/capabilities\/visualizations"},{"key":"1248_CR189","unstructured":"Tibco spotfire. http:\/\/spotfire.tibco.com\/"},{"key":"1248_CR190","doi-asserted-by":"crossref","unstructured":"Abousalh-Neto NA, Kazgan S (2012) Big data exploration through visual analytics. In: Visual analytics science and technology (VAST), 2012 IEEE conference on, pp 285\u2013286","DOI":"10.1109\/VAST.2012.6400514"},{"key":"1248_CR191","unstructured":"Sas. http:\/\/www.sas.com\/en_in\/home.html"},{"key":"1248_CR192","unstructured":"Advizor. http:\/\/www.advizorsolutions.com\/"},{"issue":"3","key":"1248_CR193","doi-asserted-by":"publisher","first-page":"431","DOI":"10.1093\/bioinformatics\/btq675","volume":"27","author":"ME Smoot","year":"2011","unstructured":"Smoot ME, Ono K, Ruscheinski J, Wang P-L, Ideker T (2011) Cytoscape 2.8: new features for data integration and network visualization. Bioinformatics 27(3):431\u2013432","journal-title":"Bioinformatics"},{"issue":"2","key":"1248_CR194","first-page":"47","volume":"21","author":"V Batagelj","year":"1998","unstructured":"Batagelj V, Mrvar A (1998) Pajek-program for large network analysis. Connections 21(2):47\u201357","journal-title":"Connections"},{"key":"1248_CR195","doi-asserted-by":"crossref","unstructured":"Smith MA, Shneiderman B, Milic-Frayling N, Mendes Rodrigues E, Barash V, Dunne C, Capone T, Perer A, Gleave E (2009) Analyzing (social media) networks with NodeXL. In: Proceedings of the fourth international conference on communities and technologies, pp 255\u2013264","DOI":"10.1145\/1556460.1556497"},{"key":"1248_CR196","doi-asserted-by":"crossref","first-page":"361","DOI":"10.1609\/icwsm.v3i1.13937","volume":"8","author":"M Bastian","year":"2009","unstructured":"Bastian M, Heymann S, Jacomy M et al (2009) Gephi: an open source software for exploring and manipulating networks. ICWSM 8:361\u2013362","journal-title":"ICWSM"},{"issue":"5","key":"1248_CR197","first-page":"1","volume":"1695","author":"G Csardi","year":"2006","unstructured":"Csardi G, Nepusz T (2006) The igraph software package for complex network research. Int J Complex Syst 1695(5):1\u20139","journal-title":"Int J Complex Syst"},{"key":"1248_CR198","unstructured":"Apache hadoop project. http:\/\/hadoop.apache.org"},{"issue":"1","key":"1248_CR199","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1145\/2522968.2522979","volume":"46","author":"S Sakr","year":"2013","unstructured":"Sakr S, Liu A, Fayoumi AG (2013) The family of mapreduce and large-scale data processing systems. ACM Comput Surv 46(1):11","journal-title":"ACM Comput Surv"},{"issue":"4","key":"1248_CR200","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1145\/2094114.2094118","volume":"40","author":"K-H Lee","year":"2012","unstructured":"Lee K-H, Lee Y-J, Choi H, Chung YD, Moon B (2012) Parallel data processing with mapreduce: a survey. AcM sIGMoD Rec 40(4):11\u201320","journal-title":"AcM sIGMoD Rec"},{"key":"1248_CR201","doi-asserted-by":"crossref","unstructured":"Chen Y, Kreulen J, Campbell M, Abrams C (2011) Analytics ecosystem transformation: a force for business model innovation. In: 2011 Annual SRII global conference, pp 11\u201320","DOI":"10.1109\/SRII.2011.12"},{"key":"1248_CR202","unstructured":"Venner J, Wadkar S, Siddalingaiah M (2014) Pro apache Hadoop. ISBN: 9781430248637"},{"key":"1248_CR203","unstructured":"Apache hadoop project. http:\/\/hadoop.apache.org\/docs\/r2.5.2\/hadoop-project-dist\/hadoop-hdfs\/HDFSHighAvailabilityWithQJM.html"},{"key":"1248_CR204","unstructured":"Hdfs high availability using the quorum journal manager. https:\/\/hadoop.apache.org\/docs\/r2.7.1\/hadoop-project-dist\/hadoop-hdfs\/HDFSHighAvailabilityWithQJM.html"},{"key":"1248_CR205","doi-asserted-by":"crossref","unstructured":"Vavilapalli VK, Murthy AC, Douglas C, Agarwal S, Konar M, Evans R, Graves T, Lowe Jason, Shah Hitesh, Seth Siddharth et\u00a0al (2013) Apache hadoop yarn: Yet another resource negotiator. In: Proceedings of the 4th annual symposium on cloud computing, pp 5:1\u201316","DOI":"10.1145\/2523616.2523633"},{"key":"1248_CR206","unstructured":"HDFS Erasure Coding. http:\/\/hadoop.apache.org\/docs\/r3.0.1\/hadoop-project-dist\/hadoop-hdfs\/HDFSErasureCoding.html"},{"key":"1248_CR207","unstructured":"Apache Hadoop 3.0.1. http:\/\/hadoop.apache.org\/docs\/r3.0.1\/"},{"key":"1248_CR208","first-page":"10","volume":"10","author":"M Zaharia","year":"2010","unstructured":"Zaharia M, Chowdhury M, Franklin MJ, Shenker S, Stoica I (2010) Spark: cluster computing with working sets. HotCloud 10:10\u201310","journal-title":"HotCloud"},{"key":"1248_CR209","unstructured":"Marcu O-C, Costan A, Antoniu G, P\u00e9rez-Hern\u00e1ndez MS (2016) Spark versus flink: understanding performance in big data analytics frameworks. In: Cluster computing (CLUSTER), 2016 IEEE international conference on, pp 433\u2013442"},{"key":"1248_CR210","unstructured":"Kubernetes concepts. https:\/\/kubernetes.io\/docs\/concepts\/overview\/what-is-kubernetes\/"},{"key":"1248_CR211","unstructured":"Rensin DK (2015) Kubernetes-scheduling the future at cloud scale"},{"key":"1248_CR212","doi-asserted-by":"crossref","unstructured":"Thusoo A, Sarma JS, Jain N, Shao Z, Chakka P, Zhang N, Antony S, Liu H, Murthy R (2010) Hive-a petabyte scale data warehouse using hadoop. In: 2010 IEEE 26th international conference on data engineering (ICDE 2010), pp 996\u20131005","DOI":"10.1109\/ICDE.2010.5447738"},{"key":"1248_CR213","unstructured":"Impala project. http:\/\/docs.aws.amazon.com\/ElasticMapReduce\/latest\/DeveloperGuide\/emr-impala.html"},{"key":"1248_CR214","doi-asserted-by":"crossref","unstructured":"Armbrust M, Xin RS, Lian C, Huai Y, Liu D, Bradley JK, Meng X, Kaftan T, Franklin MJ, Ghodsi A, et\u00a0al (2015) Spark SQL: relational data processing in spark. In: Proceedings of the 2015 ACM SIGMOD international conference on management of data, pp 1383\u20131394","DOI":"10.1145\/2723372.2742797"},{"key":"1248_CR215","unstructured":"Traverso M (2013) Presto: interacting with petabytes of data at facebook. Retrieved February 4:2014"},{"issue":"2","key":"1248_CR216","doi-asserted-by":"publisher","first-page":"100","DOI":"10.1089\/big.2013.0011","volume":"1","author":"M Hausenblas","year":"2013","unstructured":"Hausenblas M, Nadeau J (2013) Apache drill: interactive ad-hoc analysis at scale. Big Data 1(2):100\u2013104","journal-title":"Big Data"},{"key":"1248_CR217","unstructured":"Apache kylin. http:\/\/kylin.apache.org\/docs"},{"key":"1248_CR218","unstructured":"Ho L-Y, Li T-H, Wu J-J, Liu P (2013) Kylin: an efficient and scalable graph data processing system. In: Big data, 2013 IEEE international conference on, pp 193\u2013198"},{"issue":"12","key":"1248_CR219","doi-asserted-by":"publisher","first-page":"1790","DOI":"10.14778\/2367502.2367518","volume":"5","author":"A Lamb","year":"2012","unstructured":"Lamb A, Fuller M, Varadarajan R, Tran N, Vandiver B, Doshi L, Bear C (2012) The vertica analytic database: C-store 7 years later. Proc VLDB Endow 5(12):1790\u20131801","journal-title":"Proc VLDB Endow"},{"key":"1248_CR220","doi-asserted-by":"crossref","unstructured":"Chattopadhyay B, Lin L, Liu W, Mittal S, Aragonda P, Lychagina V, Kwon Y, Wong M (2011) Tenzing a SQL implementation on the mapreduce framework","DOI":"10.14778\/3402755.3402765"},{"issue":"12","key":"1248_CR221","doi-asserted-by":"publisher","first-page":"1295","DOI":"10.14778\/2732977.2733002","volume":"7","author":"A Floratou","year":"2014","unstructured":"Floratou A, Minhas UF, \u00d6zcan F (2014) Sql-on-hadoop: full circle back to shared-nothing database architectures. Proc VLDB Endow 7(12):1295\u20131306","journal-title":"Proc VLDB Endow"},{"key":"1248_CR222","unstructured":"Nasir MAU (2016) Fault tolerance for stream processing engines. arXiv preprint arXiv:1605.00928"},{"key":"1248_CR223","unstructured":"Apache storm. http:\/\/storm.apache.org\/"},{"key":"1248_CR224","unstructured":"Apache storm. http:\/\/storm.apache.org\/releases\/current\/Concepts.html"},{"key":"1248_CR225","doi-asserted-by":"crossref","unstructured":"van\u00a0der Veen JS, van\u00a0der Waaij B, Lazovik E, Wijbrandi W, Meijer RJ (2015) Dynamically scaling apache storm for the analysis of streaming data. In: Big data computing service and applications (BigDataService), 2015 IEEE first international conference on, pp 154\u2013161","DOI":"10.1109\/BigDataService.2015.56"},{"key":"1248_CR226","doi-asserted-by":"crossref","unstructured":"Toshniwal A, Taneja S, Shukla A, Ramasamy K, Patel JM, Kulkarni S, Jackson J, Gade K, Fu M, Donham J et al (2014) Storm@ twitter. In: Proceedings of the 2014 ACM SIGMOD international conference on management of data, pp 147\u2013156","DOI":"10.1145\/2588555.2595641"},{"key":"1248_CR227","unstructured":"Apache strom 1.2.1. http:\/\/storm.apache.org\/releases\/current\/Fault-tolerance.html"},{"key":"1248_CR228","unstructured":"Storm 1.2.0. http:\/\/storm.apache.org\/2018\/02\/15\/storm120-released.html"},{"key":"1248_CR229","unstructured":"Samza documentation. https:\/\/samza.apache.org\/learn\/documentation\/0.14\/comparisons\/spark-streaming.html"},{"key":"1248_CR230","unstructured":"Bockermann C (2014) A survey of the stream processing landscape. Lehrstuhl fork unstliche Intelligenz Technische Universit. at Dortmund"},{"key":"1248_CR231","doi-asserted-by":"crossref","unstructured":"Neumeyer L, Robbins B, Nair A, Kesari A (2010) S4: distributed stream computing platform. In: Data mining workshops (ICDMW), 2010 IEEE international conference on, pp 170\u2013177","DOI":"10.1109\/ICDMW.2010.172"},{"key":"1248_CR232","first-page":"10","volume":"12","author":"M Zaharia","year":"2012","unstructured":"Zaharia M, Das T, Li H, Shenker S, Stoica I (2012) Discretized streams: an efficient and fault-tolerant model for stream processing on large clusters. HotCloud 12:10\u201310","journal-title":"HotCloud"},{"key":"1248_CR233","doi-asserted-by":"crossref","unstructured":"Zaharia M, Das T, Li H, Hunter T, Shenker S, Stoica I (2013) Discretized streams: fault-tolerant streaming computation at scale. In: Proceedings of the twenty-fourth ACM symposium on operating systems principles, pp 423\u2013438","DOI":"10.1145\/2517349.2522737"},{"key":"1248_CR234","unstructured":"Spark streaming programming guide. https:\/\/spark.apache.org\/docs\/2.2.0\/streaming-programming -guide.html#discretized-streams-dstreams"},{"key":"1248_CR235","unstructured":"Improved fault-tolerance and zero data loss in apache spark streaming. https:\/\/databricks.com\/blog\/2015\/01\/15\/improved-driver-fault-tolerance-and-zero-data-loss-in-spark-streaming.html"},{"key":"1248_CR236","unstructured":"Apache spark 2.3. https:\/\/spark.apache.org\/releases\/spark-release-2-3-0.html"},{"issue":"1","key":"1248_CR237","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1145\/214451.214456","volume":"3","author":"KM Chandy","year":"1985","unstructured":"Chandy KM, Lamport L (1985) Distributed snapshots: determining global states of distributed systems. ACM Trans Comput Syst 3(1):63\u201375","journal-title":"ACM Trans Comput Syst"},{"key":"1248_CR238","unstructured":"Apache spark 2.3. https:\/\/databricks.com\/blog\/2018\/02\/28\/introducing-apache-spark-2-3.html"},{"issue":"6","key":"1248_CR239","doi-asserted-by":"publisher","first-page":"939","DOI":"10.1007\/s00778-014-0357-y","volume":"23","author":"A Alexandrov","year":"2014","unstructured":"Alexandrov A, Bergmann R, Ewen S, Freytag J-C, Hueske F, Heise A, Kao O, Leich M, Leser U, Markl V (2014) The stratosphere platform for big data analytics. VLDB J 23(6):939\u2013964","journal-title":"VLDB J"},{"key":"1248_CR240","unstructured":"Apache flink 1.4. https:\/\/ci.apache.org\/projects\/flink\/flink-docs-release-1.4\/concepts\/runtime.html"},{"key":"1248_CR241","unstructured":"Flink checkpointing. https:\/\/ci.apache.org\/projects\/flink\/flink-docs-release-1.2\/dev\/stream\/checkpointing.html"},{"key":"1248_CR242","unstructured":"Exactly-once processing in samza. https:\/\/cwiki.apache.org\/confluence\/display\/SAMZA\/SEP-10+Exactly-once+Processing+in+Samza"},{"issue":"1","key":"1248_CR243","first-page":"149","volume":"16","author":"GF Morales De","year":"2015","unstructured":"De Morales GF, Bifet A (2015) Samoa: scalable advanced massive online analysis. J Mach Learn Res 16(1):149\u2013153","journal-title":"J Mach Learn Res"},{"key":"1248_CR244","unstructured":"Samoa project. https:\/\/samoa.incubator.apache.org\/documentation\/SAMOA-Topology.html"},{"key":"1248_CR245","unstructured":"Apache samoa documentation. https:\/\/samoa.incubator.apache.org\/documentation\/Home.html"},{"issue":"11","key":"1248_CR246","doi-asserted-by":"publisher","first-page":"1033","DOI":"10.14778\/2536222.2536229","volume":"6","author":"T Akidau","year":"2013","unstructured":"Akidau T, Balikov A, Bekiro\u011flu K, Chernyak S, Haberman J, Lax R, McVeety S, Mills D, Nordstrom P, Whittle S (2013) Millwheel: fault-tolerant stream processing at internet scale. Proc VLDB Endow 6(11):1033\u20131044","journal-title":"Proc VLDB Endow"},{"key":"1248_CR247","doi-asserted-by":"crossref","unstructured":"Kulkarni S, Bhagat N, Fu M, Kedigehalli V, Kellogg C, Mittal S, Patel JM, Ramasamy K, Taneja S (2015) Twitter heron: stream processing at scale. In: Proceedings of the 2015 ACM SIGMOD international conference on management of data, pp 239\u2013250","DOI":"10.1145\/2723372.2742788"},{"key":"1248_CR248","doi-asserted-by":"crossref","unstructured":"Abadi D, Carney D, Cetintemel U, Cherniack M, Convey C, Erwin C, Galvez E, Hatoun M, Maskey A, Rasin A et\u00a0al (2003) Aurora: a data stream management system. In: Proceedings of the 2003 ACM SIGMOD international conference on management of data, pp 666\u2013666","DOI":"10.1145\/872757.872855"},{"key":"1248_CR249","unstructured":"Heron project. https:\/\/twitter.github.io\/heron\/docs\/concepts\/architecture\/#metrics-manager"},{"key":"1248_CR250","unstructured":"Structured streaming programming guide. https:\/\/spark.apache.org\/docs\/latest\/structured-streaming-programming-guide.html"},{"key":"1248_CR251","unstructured":"Flink streaming. https:\/\/ci.apache.org\/projects\/flink\/flink-docs-master\/dev\/datastream_api.html"},{"key":"1248_CR252","doi-asserted-by":"crossref","unstructured":"Fu M, Agrawal A, Floratou A, Graham B, Jorgensen A, Li M, Lu N, Ramasamy K, Rao S, Wang C (2017) Twitter heron: towards extensible streaming engines. In: Data engineering (ICDE), 2017 IEEE 33rd international conference on, pp 1165\u20131172","DOI":"10.1109\/ICDE.2017.161"},{"key":"1248_CR253","unstructured":"Amazon kinesis data streams. https:\/\/docs.aws.amazon.com\/streams\/latest\/dev\/key-concepts.html"},{"key":"1248_CR254","unstructured":"Azure stream analytics. https:\/\/docs.microsoft.com\/en-us\/azure\/stream-analytics\/ stream-analytics-introduction#how-does-stream-analytics-work"},{"key":"1248_CR255","unstructured":"Ibm streaming analytics. https:\/\/www.ibm.com\/cloud\/streaming-analytics"},{"key":"1248_CR256","unstructured":"Samza-storm. https:\/\/samza.apache.org\/learn\/documentation\/0.7.0\/comparisons\/storm.html"},{"key":"1248_CR257","unstructured":"Apache storm 2.0. http:\/\/storm.apache.org\/releases\/2.0.0-SNAPSHOT\/index.html"},{"key":"1248_CR258","unstructured":"Shukla A, Chaturvedi S, Simmhan Y (2017) Riotbench: a real-time iot benchmark for distributed stream processing platforms. arXiv preprint arXiv:1701.08530"},{"key":"1248_CR259","unstructured":"Dreissig F, Pollner N (2017) A data center infrastructure monitoring platform based on storm and trident. Datenbanksysteme f\u00fcr Business, Technologie und Web (BTW 2017)-Workshopband"},{"key":"1248_CR260","doi-asserted-by":"crossref","unstructured":"Saha B, Shah H, Seth S, Vijayaraghavan G, Murthy A, Curino C (2015) Apache tez: a unifying framework for modeling and building data processing applications. In: Proceedings of the 2015 ACM SIGMOD international conference on management of data, pp 1357\u20131369","DOI":"10.1145\/2723372.2742790"},{"key":"1248_CR261","unstructured":"Tpc-h is a decision support benchmark. http:\/\/www.tpc.org\/"},{"key":"1248_CR262","unstructured":"Hortonworks data platform-apache hive performance tuning. https:\/\/docs.hortonworks.com\/HDPDocuments\/HDP2\/HDP-2.5.5\/bk_hive-performance-tuning\/bk_hive-performance-tuning.pdf"},{"key":"1248_CR263","unstructured":"Aws-containers. https:\/\/aws.amazon.com\/what-are-containers\/"},{"key":"1248_CR264","unstructured":"Apache mesos. http:\/\/mesos.apache.org\/documentation\/latest\/"},{"key":"1248_CR265","doi-asserted-by":"crossref","unstructured":"Sebastio S, Ghosh R, Mukherjee T (2018) An availability analysis approach for deployment configurations of containers. IEEE Trans Serv Comput","DOI":"10.1109\/TSC.2017.2788442"},{"key":"1248_CR266","doi-asserted-by":"crossref","unstructured":"Medel V, Rana O, Ba\u00f1ares J\u00c1, Arronategui Unai (2016) Modelling performance and resource management in kubernetes. In: Utility and cloud computing (UCC), 2016 IEEE\/ACM 9th international conference on, pp 257\u2013262","DOI":"10.1145\/2996890.3007869"},{"key":"1248_CR267","unstructured":"Hindman B, Konwinski A, Zaharia M, Ghodsi A, Joseph AD, Katz RH, Shenker S, Stoica I (2011) Mesos: a platform for fine-grained resource sharing in the data center. In: NSDI, vol 11, pp 295\u2013308"},{"key":"1248_CR268","unstructured":"Amazon web services. https:\/\/aws.amazon.com\/docker\/"},{"key":"1248_CR269","unstructured":"Kreps J, Narkhede N, Rao J et\u00a0al (2011) Kafka: a distributed messaging system for log processing. In: Proceedings of the NetDB, pp 1\u20137"},{"key":"1248_CR270","unstructured":"Rabbitmq. https:\/\/www.rabbitmq.com\/"},{"key":"1248_CR271","unstructured":"Activemq. http:\/\/activemq.apache.org\/"},{"key":"1248_CR272","unstructured":"AmazonmQ. https:\/\/aws.amazon.com\/amazon-mq\/"},{"issue":"2","key":"1248_CR273","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1007\/s11761-015-0174-1","volume":"10","author":"H Lampesberger","year":"2016","unstructured":"Lampesberger H (2016) Technologies for web and cloud service interaction: a survey. Serv Oriented Comput Appl 10(2):71\u2013110","journal-title":"Serv Oriented Comput Appl"},{"key":"1248_CR274","doi-asserted-by":"crossref","unstructured":"Dobbelaere P, Esmaili KS (2017) Kafka versus RabbitMQ. arXiv preprint arXiv:1709.00333","DOI":"10.1145\/3093742.3093908"},{"key":"1248_CR275","doi-asserted-by":"crossref","unstructured":"Sangat P, Indrawan-Santiago M, Taniar D (2018) Sensor data management in the cloud: data storage, data ingestion, and data retrieval. Concurr Comput: Pract Exp 30(1)","DOI":"10.1002\/cpe.4354"},{"key":"1248_CR276","unstructured":"Hoffman S (2013) Apache flume: distributed log collection for hadoop. Packt Publishing Ltd"},{"key":"1248_CR277","unstructured":"Ting K, Cecho JJ (2013) Apache Sqoop Cookbook. O\u2019Reilly Media, Inc"},{"key":"1248_CR278","first-page":"1","volume":"10","author":"A Rabkin","year":"2010","unstructured":"Rabkin A, Katz RH (2010) Chukwa: a system for reliable large-scale log collection. LISA 10:1\u201315","journal-title":"LISA"},{"key":"1248_CR279","unstructured":"Apach sqoop-overview. https:\/\/blogs.apache.org\/sqoop\/entry\/apache_sqoop_overview"},{"key":"1248_CR280","unstructured":"Low Y, Gonzalez J, Kyrola A, Bickson D, Guestrin C, Hellerstein JM (2010) Graphlab: a new framework for parallel machine learning. arxiv preprint. arXiv preprint arXiv:1006.4990"},{"key":"1248_CR281","unstructured":"Aver C (2011) Giraph: large-scale graph processing infrastructure on hadoop. In: Proceedings of the Hadoop summit. Santa Clara 11(3), 5\u20139"},{"issue":"1","key":"1248_CR282","first-page":"2","volume":"12","author":"JE Gonzalez","year":"2012","unstructured":"Gonzalez JE, Low Y, Haijie G, Bickson D, Guestrin C (2012) Powergraph: distributed graph-parallel computation on natural graphs. OSDI 12(1):2\u20132","journal-title":"OSDI"},{"key":"1248_CR283","doi-asserted-by":"crossref","unstructured":"Salihoglu S, Widom J (2013) Gps: a graph processing system. In: Proceedings of the 25th international conference on scientific and statistical database management 22, pp 1\u201312","DOI":"10.1145\/2484838.2484843"},{"key":"1248_CR284","first-page":"599","volume":"14","author":"JE Gonzalez","year":"2014","unstructured":"Gonzalez JE, Xin RS, Dave A, Crankshaw D, Franklin MJ, Stoica I (2014) Graphx: graph processing in a distributed dataflow framework. OSDI 14:599\u2013613","journal-title":"OSDI"},{"key":"1248_CR285","unstructured":"Xin RS, Crankshaw D, Dave A, Gonzalez JE, Franklin MJ, Stoica I (2014) Graphx: unifying data-parallel and graph-parallel analytics. arXiv preprint arXiv:1402.2394"},{"key":"1248_CR286","unstructured":"Graphx programming guide. https:\/\/spark.apache.org\/docs\/latest\/graphx-programming-guide.html"},{"key":"1248_CR287","unstructured":"Junghanns M, Petermann A, G\u00f3mez K, Rahm E (2015) Gradoop: scalable graph data management and analytics with hadoop. arXiv preprint arXiv:1506.00548"},{"key":"1248_CR288","unstructured":"Hunt P, Konar M, Junqueira FP, Reed B (2010) Zookeeper: Wait-free coordination for internet-scale systems. In: USENIX annual technical conference 8(9)"},{"key":"1248_CR289","unstructured":"Myriad home. https:\/\/cwiki.apache.org\/confluence\/display\/MYRIAD\/Myriad+Home"},{"key":"1248_CR290","unstructured":"Apache avro. https:\/\/avro.apache.org\/docs\/current\/"},{"issue":"3","key":"1248_CR291","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1016\/j.websem.2008.02.006","volume":"6","author":"W Hu","year":"2008","unstructured":"Hu W, Qu Y (2008) Falcon-AO: a practical ontology matching system. Web Semant: Sci Serv Agents World Wide Web 6(3):237\u2013239","journal-title":"Web Semant: Sci Serv Agents World Wide Web"},{"key":"1248_CR292","unstructured":"Apache nifi project. https:\/\/nifi.apache.org\/"},{"key":"1248_CR293","doi-asserted-by":"crossref","unstructured":"Islam M, Huang AK, Battisha M, Chiang M, Srinivasan S, Peters C, Neumann A, Abdelnur A (2012) Oozie: towards a scalable workflow management system for hadoop. In: Proceedings of the 1st ACM SIGMOD workshop on scalable workflow execution engines and technologies 4:1\u20134:10","DOI":"10.1145\/2443416.2443420"}],"container-title":["Knowledge and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-018-1248-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10115-018-1248-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-018-1248-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,5]],"date-time":"2023-09-05T06:18:52Z","timestamp":1693894732000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10115-018-1248-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,9,18]]},"references-count":293,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2019,9]]}},"alternative-id":["1248"],"URL":"https:\/\/doi.org\/10.1007\/s10115-018-1248-0","relation":{},"ISSN":["0219-1377","0219-3116"],"issn-type":[{"value":"0219-1377","type":"print"},{"value":"0219-3116","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,9,18]]},"assertion":[{"value":"7 December 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 June 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 August 2018","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 September 2018","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}