{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T23:30:51Z","timestamp":1743031851664,"version":"3.40.3"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030602475"},{"type":"electronic","value":"9783030602482"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-60248-2_14","type":"book-chapter","created":{"date-parts":[[2020,9,29]],"date-time":"2020-09-29T09:03:14Z","timestamp":1601370194000},"page":"197-217","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Profiling-Based Big Data Workflow Optimization in a Cross-layer Coupled Design Framework"],"prefix":"10.1007","author":[{"given":"Qianwen","family":"Ye","sequence":"first","affiliation":[]},{"given":"Chase Q.","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Wuji","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Aiqin","family":"Hou","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Shen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,29]]},"reference":[{"issue":"11","key":"14_CR1","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1145\/2934664","volume":"59","author":"PWM Zaharia","year":"2016","unstructured":"Zaharia, P.W.M., Xin, R.S., et al.: Apache spark: a unified engine for big data processing. Commun. ACM 59(11), 56\u201365 (2016)","journal-title":"Commun. ACM"},{"key":"14_CR2","doi-asserted-by":"publisher","first-page":"3045","DOI":"10.1093\/bioinformatics\/bth361","volume":"20","author":"JFT Oinn","year":"2004","unstructured":"Oinn, J.F.T., Addis, M., et al.: Taverna: a tool for the composition and enactment of bioinformatics workflows. Bioinformatics 20, 3045\u20133054 (2004)","journal-title":"Bioinformatics"},{"key":"14_CR3","unstructured":"Ludascher, I.A.C.B.B., et al.: Scientific workflow management and the Kepler system. Spec. Issue Workflow Grid Syst. 18, 1039\u20131065 (2005)"},{"key":"14_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1007\/978-3-540-28642-4_2","volume-title":"Grid Computing","author":"E Deelman","year":"2004","unstructured":"Deelman, E., Blythe, J., et al.: Pegasus: mapping scientific workflows onto the grid. In: Dikaiakos, M.D. (ed.) AxGrids 2004. LNCS, vol. 3165, pp. 11\u201320. Springer, Heidelberg (2004). \nhttps:\/\/doi.org\/10.1007\/978-3-540-28642-4_2"},{"key":"14_CR5","doi-asserted-by":"crossref","unstructured":"Kumar, G.M.V.S., Sadayappan, P., et al.: An integrated framework for performance-based optimization of scientific workflows. In: Proceedings of the 18th ACM International Symposium on High Performance Distributed Computing, Garching, Germany, pp. 177\u2013186 (2009)","DOI":"10.1145\/1551609.1551638"},{"key":"14_CR6","doi-asserted-by":"crossref","unstructured":"Chiu, G.A.D., Deshpande, S., et al.: Cost and accuracy sensitive dynamic workflow composition over grid environments. In: Proceedings of the 2008 9th IEEE\/ACM International Conference on Grid Computing, Washington, DC, USA, pp. 9\u201316 (2008)","DOI":"10.1109\/GRID.2008.4662777"},{"key":"14_CR7","doi-asserted-by":"publisher","first-page":"352","DOI":"10.1016\/j.future.2013.09.005","volume":"36","author":"MPS Holl","year":"2014","unstructured":"Holl, M.P.S., Zimmermann, O., et al.: A new optimization phase for scientific workflow management systems. Future Gener. Comput. Sci. 36, 352\u2013362 (2014)","journal-title":"Future Gener. Comput. Sci."},{"key":"14_CR8","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1016\/j.bdr.2017.05.001","volume":"11","author":"A Counaris","year":"2018","unstructured":"Counaris, A., Torres, J.: A methodology for spark parameter tuning. Big Data Res. 11, 22\u201332 (2018)","journal-title":"Big Data Res."},{"key":"14_CR9","doi-asserted-by":"crossref","unstructured":"Wang, B.H.G., Xu, J., et al.: A novel method for tuning configuration parameters of spark based on machine learning. In: IEEE 18th International Conference on High Performance Computing and Communications, Sydney, NSW, Austrilia (2016)","DOI":"10.1109\/HPCC-SmartCity-DSS.2016.0088"},{"key":"14_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"406","DOI":"10.1007\/978-3-642-40047-6_42","volume-title":"Euro-Par 2013 Parallel Processing","author":"G Liao","year":"2013","unstructured":"Liao, G., Datta, K., Willke, T.L.: Gunther: search-based auto-tuning of mapreduce. In: Wolf, F., Mohr, B., an Mey, D. (eds.) Euro-Par 2013. LNCS, vol. 8097, pp. 406\u2013419. Springer, Heidelberg (2013). \nhttps:\/\/doi.org\/10.1007\/978-3-642-40047-6_42"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Wu, A.G.D., et al.: A self-tuning system based on application profiling and performance analysis for optimizing hadoop mapreduce cluster configuration. In: 20th Annual International Conference on High Performance Computing, Bangalore, India (2014)","DOI":"10.1109\/HiPC.2013.6799133"},{"key":"14_CR12","doi-asserted-by":"crossref","unstructured":"Li, S.M., Zeng, L., et al.: MRONLINE: MapReduce online performance tuning. In: Proceedings of the 23rd International Symposium on High-Performance Parallel and Distributed Computing, New York, NY, USA, pp. 165\u2013176 (2014)","DOI":"10.1145\/2600212.2600229"},{"key":"14_CR13","doi-asserted-by":"crossref","unstructured":"Shu, T., Wu, C.: Performance optimization of $$\\mathit{H}$$adoop workflows in public clouds through adaptive task partitioning. In: Proceedings of the IEEE INFOCOM, Atlanta, GA, USA, 1\u20134 May 2017","DOI":"10.1109\/INFOCOM.2017.8057204"},{"issue":"2","key":"14_CR14","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1109\/TCC.2014.2358220","volume":"3","author":"C Wu","year":"2015","unstructured":"Wu, C., Lin, X., Yu, D., Xu, W., Li, L.: End-to-end delay minimization for scientific workflows in clouds under budget constraint. IEEE Trans. Cloud Comp. 3(2), 169\u2013181 (2015)","journal-title":"IEEE Trans. Cloud Comp."},{"key":"14_CR15","first-page":"51","volume":"84","author":"D Yun","year":"2015","unstructured":"Yun, D., Wu, C., Gu, Y.: An integrated approach to workflow mapping and task scheduling for delay minimization in distributed environments. JPDC 84, 51\u201364 (2015)","journal-title":"JPDC"},{"key":"14_CR16","doi-asserted-by":"crossref","unstructured":"Ye, Q., Wu, C.Q., Cao, H., et al.: Storage-aware task scheduling for performance optimization of big data workflows. In: The 8th IEEE International Conference on Big Data and Cloud Computing, Melbourne, Australia, 11\u201313 December 2018","DOI":"10.1109\/BDCloud.2018.00163"},{"key":"14_CR17","doi-asserted-by":"crossref","unstructured":"Wang, B.H.E.G. , Xu, J.: A novel method for tuning configuration parameters of spark based on machine learning. In: 2016 IEEE 18th International Conference on HPC and Communications, Sydney, NSW, Australia, 12\u201314 December 2016","DOI":"10.1109\/HPCC-SmartCity-DSS.2016.0088"},{"key":"14_CR18","series-title":"Advances in Intelligent Systems and Computing","doi-asserted-by":"publisher","first-page":"226","DOI":"10.1007\/978-3-319-47898-2_24","volume-title":"Advances in Big Data","author":"P Petridis","year":"2017","unstructured":"Petridis, P., Gounaris, A., Torres, J.: Spark parameter tuning via trial-and-error. In: Angelov, P., Manolopoulos, Y., Iliadis, L., Roy, A., Vellasco, M. (eds.) INNS 2016. AISC, vol. 529, pp. 226\u2013237. Springer, Cham (2017). \nhttps:\/\/doi.org\/10.1007\/978-3-319-47898-2_24"},{"key":"14_CR19","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1016\/j.bdr.2017.05.001","volume":"11","author":"A Gounaris","year":"2018","unstructured":"Gounaris, A., Torres, J.: A methodology for spark parameter tuning. Big Data Res. 11, 22\u201332 (2018)","journal-title":"Big Data Res."},{"key":"14_CR20","doi-asserted-by":"crossref","unstructured":"Jia, G.C.E.Z., Xue, C.: Auto-tuning spark big data workloads on POWER8: prediction-based dynamic SMT threading. In: 2016 International Conference on Parallel Architecture and Compilation Techniques (PACT), Haifa, Israel, 11\u201315 September 2016","DOI":"10.1145\/2967938.2967957"},{"key":"14_CR21","volume-title":"Hadoop in Practice","author":"A Holmes","year":"2012","unstructured":"Holmes, A.: Hadoop in Practice. Manning Publications Co., Greenwich (2012)"},{"key":"14_CR22","doi-asserted-by":"crossref","unstructured":"Li, S.M.E.M., Zeng, L.: MRONLINE: MapReduce online performance tuning. In: Proceedings of the 23rd International Symposium on High-Performance Parallel and Distributed Computing, Vancouver, BC, Canada, 23\u201327 June 2014","DOI":"10.1145\/2600212.2600229"},{"key":"14_CR23","unstructured":"Ding, D.Q.E.X., Liu, Y.: Jellyfish: online performance tuning with adaptive configuration and elastic container in hadoop yarn. In: 2015 IEEE 21st International Conference on Parallel and Distributed Systems (ICPADS), Melbourne, Australia, 14\u201317 December 2015"},{"key":"14_CR24","unstructured":"Flight Data. \nhttp:\/\/stat-computing.org\/dataexpo\/2009\/the-data.html"},{"key":"14_CR25","unstructured":"Library Checkout Data. \nhttps:\/\/data.seattle.gov\/Community\/Checkouts-by-Title\/tmmm-ytt6"},{"key":"14_CR26","unstructured":"Parking Violation Data. \nhttps:\/\/data.cityofnewyork.us\/City-Government\/Open-Parking-and-Camera-Violations\/nc67-uf89"},{"key":"14_CR27","unstructured":"Service Request Data. \nhttps:\/\/data.cityofnewyork.us\/Social-Services\/311-Service-Requests-from-2010-to-Present\/erm2-nwe9"},{"key":"14_CR28","doi-asserted-by":"publisher","first-page":"332","DOI":"10.1109\/9.119632","volume":"37","author":"JC Spall","year":"1992","unstructured":"Spall, J.C.: Multivariate stochastic approximation using a simultaneous perturbation gradient approximation. IEEE Trans. Autom. Control 37, 332\u2013341 (1992)","journal-title":"IEEE Trans. Autom. Control"},{"issue":"3","key":"14_CR29","doi-asserted-by":"publisher","first-page":"462","DOI":"10.1214\/aoms\/1177729392","volume":"23","author":"JWJ Kiefer","year":"1952","unstructured":"Kiefer, J.W.J.: Stochastic estimation of the maximum of a regression function. Ann. Math. Stat. 23(3), 462\u2013466 (1952)","journal-title":"Ann. Math. Stat."},{"key":"14_CR30","unstructured":"Spall, J.C.: Introduction to Stochastic Search and Optimization: Estimation, Simulation, and Control. Wiley, Hoboken (2005)"},{"issue":"2","key":"14_CR31","first-page":"1","volume":"9","author":"B Ross","year":"2014","unstructured":"Ross, B.: Mutual information between discrete and continuous data sets. PLOS ONE 9(2), 1\u20135 (2014)","journal-title":"PLOS ONE"},{"key":"14_CR32","volume-title":"Handbook of Mathematical Functions with Formulas, Graphs, and Mathematical Tables","author":"M Abramowitz","year":"1972","unstructured":"Abramowitz, M., Stegun, I.: Handbook of Mathematical Functions with Formulas, Graphs, and Mathematical Tables. Dover Publishing Inc., New York (1972)"},{"key":"14_CR33","doi-asserted-by":"publisher","first-page":"817","DOI":"10.1109\/7.705889","volume":"34","author":"JC Spall","year":"1998","unstructured":"Spall, J.C.: Implementation of the simultaneous perturbation algorithm for stochastic optimization. IEEE Trans. Aerosp. Electron. Syst. 34, 817\u2013823 (1998)","journal-title":"IEEE Trans. Aerosp. Electron. Syst."},{"key":"14_CR34","first-page":"97","volume":"4","author":"D Heger","year":"2013","unstructured":"Heger, D.: Hadoop performance tuning-a pragmatic & iterative approach. CMG J. 4, 97\u2013113 (2013)","journal-title":"CMG J."},{"key":"14_CR35","volume-title":"Hadoop: The Definitive Guide","author":"T White","year":"2012","unstructured":"White, T.: Hadoop: The Definitive Guide. O\u2019Reilly Media Inc., Sebastopol (2012)"},{"key":"14_CR36","doi-asserted-by":"crossref","unstructured":"Lawler, G., Limic, V.: Random Walk: A Modern Introduction. Cambridge University Press, Cambridge (2010)","DOI":"10.1017\/CBO9780511750854"},{"issue":"4","key":"14_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1287\/inte.20.4.1","volume":"20","author":"F Glover","year":"1990","unstructured":"Glover, F.: Tabu search: a tutorial. Informs J. Appl. Anal. 20(4), 1\u2013185 (1990)","journal-title":"Informs J. Appl. Anal."},{"key":"14_CR38","unstructured":"Montgomery, E.A.P.D.C., Vining, G.: Introduction To Linear Regression Analysis, vol. 821. Wiley, Hoboken (2012)"},{"key":"14_CR39","volume-title":"Numerical Optimization","author":"J Nocedal","year":"2006","unstructured":"Nocedal, J., Wright, S.: Numerical Optimization. Springer, Heidelberg (2006)"},{"key":"14_CR40","unstructured":"Apache, Hadoop (2016). \nhttp:\/\/hadoop.apache.org"},{"key":"14_CR41","unstructured":"Spark (2016). \nhttp:\/\/spark.apache.org"},{"key":"14_CR42","unstructured":"Oozie (2016). \nhttps:\/\/oozie.apache.org"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-60248-2_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,9,29]],"date-time":"2020-09-29T10:31:10Z","timestamp":1601375470000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-60248-2_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030602475","9783030602482"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-60248-2_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"29 September 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICA3PP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithms and Architectures for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New York, NY","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ica3pp2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.cloud-conf.net\/ica3pp2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"495","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"142","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"305","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"10","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}