{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:36:44Z","timestamp":1772120204352,"version":"3.50.1"},"reference-count":104,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["NSFC 61972277"],"award-info":[{"award-number":["NSFC 61972277"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006606","name":"Natural Science Foundation of Tianjin City","doi-asserted-by":"publisher","award":["18JCZDJC30800"],"award-info":[{"award-number":["18JCZDJC30800"]}],"id":[{"id":"10.13039\/501100006606","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002367","name":"Chinese Academy of Sciences","doi-asserted-by":"publisher","award":["U1731243, U1931130"],"award-info":[{"award-number":["U1731243, U1931130"]}],"id":[{"id":"10.13039\/501100002367","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Knowl. Data Eng."],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/tkde.2020.2975652","type":"journal-article","created":{"date-parts":[[2020,2,25]],"date-time":"2020-02-25T04:20:30Z","timestamp":1582604430000},"page":"1-1","source":"Crossref","is-referenced-by-count":61,"title":["A Survey on Spark Ecosystem: Big Data Processing Infrastructure, Machine Learning, and Applications"],"prefix":"10.1109","author":[{"given":"Shanjiang","family":"Tang","sequence":"first","affiliation":[]},{"given":"Bingsheng","family":"He","sequence":"additional","affiliation":[]},{"given":"Ce","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Yusen","family":"Li","sequence":"additional","affiliation":[]},{"given":"Kun","family":"Li","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref63","article-title":"Optimizing shuffle performance in spark","author":"Davidson","year":"2013"},{"key":"ref64","article-title":"TensorFlow: Large-scale machine learning on heterogeneous distributed systems","author":"Abadi","year":"2016"},{"key":"ref65","first-page":"337","article-title":"Succinct: Enabling queries on compressed data","volume-title":"Proc. 12th USENIX Symp. Netw. Syst. Des. Implementation","author":"Agarwal"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1145\/2588555.2593667"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1145\/2465351.2465355"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1016\/j.aci.2019.04.002"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-30348-2_21"},{"key":"ref70","first-page":"211","article-title":"SSDAlloc: Hybrid SSD\/RAM memory management made easy","volume-title":"Proc. 8th USENIX Conf. Netw. Syst. Des. Implementation","author":"Badam"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2016.6"},{"key":"ref72","first-page":"17","article-title":"A spark-based workflow for probabilistic record linkage of healthcare data","volume-title":"Proc. The Workshop Algorithms Syst. MapReduce Beyond","author":"Barreto"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/ICDMW.2015.140"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1145\/1057977.1057978"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2012.2"},{"key":"ref76","article-title":"Lightweight asynchronous snapshots for distributed dataflows","author":"Carbone","year":"2015","journal-title":"arXiv:1506.08603"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2017.09.005"},{"key":"ref78","volume-title":"Redis in Action","author":"Carlson","year":"2013"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1145\/1365815.1365816"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM.2016.18"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2018.8310322"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/LDAV.2015.7348080"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1145\/2897937.2905012"},{"key":"ref84","first-page":"181","article-title":"The missing piece in complex analytics: Low latency, scalable model management and serving with velox","volume":"185","author":"Crankshaw","year":"2014","journal-title":"Eur. J. Obstetrics Gynecol. Reproductive Biol."},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1145\/2670979.2670995"},{"key":"ref86","first-page":"1232","article-title":"Large scale distributed deep networks","volume-title":"Proc. 25th Int. Conf. Neural Inf. Process. Syst.","author":"Dean"},{"key":"ref87","first-page":"10","article-title":"MapReduce: Simplified data processing on large clusters","volume-title":"Proc. 6th Conf. Symp. Operating Syst. Des. Implementation","author":"Dean"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1145\/1323293.1294281"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1145\/3278161.3278166"},{"key":"ref90","first-page":"1","article-title":"Big data analytics for real time systems","author":"Dutta","year":"2015","journal-title":"Big Data Analytics Seminar"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1145\/2213836.2213934"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1038\/nmeth.3041"},{"key":"ref93","first-page":"17","article-title":"PowerGraph: Distributed graph-parallel computation on natural graphs","volume-title":"Proc. 10th USENIX Conf. Operating Syst. Des. Implementation","author":"Gonzalez"},{"key":"ref94","first-page":"599","article-title":"GraphX: Graph processing in a distributed dataflow framework","volume-title":"Proc. 11th USENIX Conf. Operating Syst. Des. Implementation","author":"Gonzalez"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1145\/2884781.2884813"},{"key":"ref96","first-page":"75","article-title":"Nectar: Automatic management of data and computation in datacenters","volume-title":"Proc. 9th USENIX Conf. Operating Syst. Des. Implementation","author":"Gunda"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1016\/0375-6505(85)90011-2"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2017.2773504"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1089\/cmb.2018.0102"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-017-0474-5"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.14778\/2850583.2850595"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1145\/1272996.1273005"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1007\/s11207-018-1258-9"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"ref106","article-title":"DeepSpark: Spark-based deep learning supporting asynchronous updates and caffe compatibility","author":"Kim","year":"2016"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1145\/3127479.3134762"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1145\/2487575.2487650"},{"key":"ref109","first-page":"2","article-title":"MLbase: A distributed machine-learning system","volume-title":"Proc. 6th Biennial Conf. Innovative Data Syst. Res.","volume":"1","author":"Kraska"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1145\/2872427.2883026"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1145\/1773912.1773922"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.14778\/2367502.2367520"},{"key":"ref113","article-title":"Approximate stream analytics in apache flink and apache spark streaming","author":"Le Quoc","year":"2017","journal-title":"arXiv:1709.02946"},{"key":"ref114","article-title":"Approximate distributed joins in apache spark","author":"Le Quoc","year":"2018","journal-title":"arXiv:1805.05874"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1145\/2670979.2670985"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1145\/2670979.2670985"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btp352"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/NAS.2015.7255222"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1145\/3079079.3079089"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2017.131"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/HOTI.2014.15"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1145\/2872362.2872386"},{"key":"ref123","article-title":"Trash day: Coordinating garbage collection in distributed systems","volume-title":"Proc. 15th USENIX Conf. Hot Topics Operating Syst.","author":"Maas"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1145\/1807167.1807184"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/IC2E.2016.30"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2016.22"},{"key":"ref127","article-title":"Adam: Genomics formats and processing patterns for cloud scale computing","author":"Massie","year":"2013"},{"issue":"1","key":"ref128","first-page":"1235","article-title":"MLlib: Machine learning in apache spark","volume":"17","author":"Meng","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref129","first-page":"1383","article-title":"Spark SQL: Relational data processing in spark","volume-title":"Proc. ACM SIGMOD Int. Conf. Manage. Data","author":"Michael"},{"key":"ref130","first-page":"149","article-title":"SAMOA: Scalable advanced massive online analysis","volume":"16","author":"Morales","year":"2015","journal-title":"J. Mach. Learn. Res."},{"key":"ref131","article-title":"SparkNet: Training deep networks in spark","author":"Moritz","year":"2015"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.14778\/2002938.2002940"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2016.2627558"},{"key":"ref134","first-page":"385","article-title":"Scaling memcache at Facebook","volume-title":"Proc. 10th USENIX Conf. Netw. Syst. Des. Implementation","author":"Nishtala"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1145\/2723372.2742787"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1145\/1376616.1376726"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1145\/2517349.2522716"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1111\/j.1467-8659.2007.01012.x"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1145\/2785956.2787505"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1145\/1851182.1851227"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2899408"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-74690-6_66"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2017.109"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1145\/3226595.3226638"},{"key":"ref145","first-page":"35","article-title":"MLbase: A distributed machine learning wrapper","volume-title":"Proc. NIPS Big Learn. Workshop","author":"Talwalkar"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2011.218"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2010.5447738"},{"key":"ref148","first-page":"439","article-title":"Real-time credit card fraud detection using streaming analytics","volume-title":"Proc. 2nd Int. Conf. Appl. Theor. Comput. Commun. Technol.","author":"U"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1145\/2523616.2523633"},{"key":"ref150","first-page":"301","article-title":"The power of choice in data-aware cluster scheduling","volume-title":"Proc. 11th USENIX Conf. Operating Syst. Des. Implementation","author":"Venkataraman"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2903740"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-019-42966-5"},{"key":"ref153","first-page":"307","article-title":"Ceph: A scalable, high-performance distributed file system","volume-title":"Proc. 7th Symp. Operating Syst. Des. Implementation","author":"Weil"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1145\/3325135"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btu343"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1145\/2463676.2465288"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1145\/2987550.2987576"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.1007\/s10707-018-0330-9"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1145\/2886107.2886110"},{"key":"ref160","first-page":"10","article-title":"Spark: Cluster computing with working sets","volume-title":"Proc. 2nd USENIX Conf. Hot Topics Cloud Comput.","author":"Zaharia"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1145\/2517349.2522737"},{"key":"ref162","doi-asserted-by":"publisher","DOI":"10.3847\/1538-3881\/ab2384"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.14778\/2732951.2732956"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190534"},{"key":"ref165","article-title":"Splash: User-friendly programming interface for parallelizing stochastic algorithms","author":"Zhang","year":"2015","journal-title":"arXiv:1506.07552"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2015.7363840"}],"container-title":["IEEE Transactions on Knowledge and Data Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/69\/4358933\/09007378.pdf?arnumber=9007378","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,9]],"date-time":"2024-01-09T22:19:15Z","timestamp":1704838755000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9007378\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":104,"URL":"https:\/\/doi.org\/10.1109\/tkde.2020.2975652","relation":{},"ISSN":["1041-4347","1558-2191","2326-3865"],"issn-type":[{"value":"1041-4347","type":"print"},{"value":"1558-2191","type":"electronic"},{"value":"2326-3865","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]}}}