{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:50:48Z","timestamp":1767340248234,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031744297"},{"type":"electronic","value":"9783031744303"}],"license":[{"start":{"date-parts":[[2024,12,21]],"date-time":"2024-12-21T00:00:00Z","timestamp":1734739200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,21]],"date-time":"2024-12-21T00:00:00Z","timestamp":1734739200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-74430-3_5","type":"book-chapter","created":{"date-parts":[[2024,12,20]],"date-time":"2024-12-20T07:44:49Z","timestamp":1734680689000},"page":"84-102","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Radical-Cylon: A Heterogeneous Data Pipeline for\u00a0Scientific Computing"],"prefix":"10.1007","author":[{"given":"Arup Kumar","family":"Sarker","sequence":"first","affiliation":[]},{"given":"Aymen","family":"Alsaadi","sequence":"additional","affiliation":[]},{"given":"Niranda","family":"Perera","sequence":"additional","affiliation":[]},{"given":"Mills","family":"Staylor","sequence":"additional","affiliation":[]},{"given":"Gregor","family":"von Laszewski","sequence":"additional","affiliation":[]},{"given":"Matteo","family":"Turilli","sequence":"additional","affiliation":[]},{"given":"Ozgur Ozan","family":"Kilic","sequence":"additional","affiliation":[]},{"given":"Mikhail","family":"Titov","sequence":"additional","affiliation":[]},{"given":"Andre","family":"Merzky","sequence":"additional","affiliation":[]},{"given":"Shantenu","family":"Jha","sequence":"additional","affiliation":[]},{"given":"Geoffrey","family":"Fox","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,21]]},"reference":[{"key":"5_CR1","unstructured":"Abadi, M., et al.: TensorFlow: Large-scale machine learning on heterogeneous systems (2015). https:\/\/www.tensorflow.org\/, software available from tensorflow.org"},{"key":"5_CR2","doi-asserted-by":"crossref","unstructured":"Abeykoon, V., et al.: Data engineering for hpc with python. arXiv preprint arXiv:2010.06312 (2020)","DOI":"10.1109\/PyHPC51966.2020.00007"},{"key":"5_CR3","unstructured":"Anaconda: The state of data science 2020 moving from hype toward maturity. https:\/\/www.anaconda.com\/resources\/whitepapers\/state-of-data-science-2020\u201d (December 2020). Accessed 05 May 2023"},{"key":"5_CR4","unstructured":"Apache: Apache hadoop. https:\/\/hadoop.apache.org\/ (May 2022). Accessed 18 Apr 2023"},{"key":"5_CR5","doi-asserted-by":"crossref","unstructured":"Babuji, Y., et al.: Parsl: Pervasive parallel programming in python. In: Proceedings of the 28th International Symposium on High-Performance Parallel and Distributed Computing, pp. 25\u201336 (2019)","DOI":"10.1145\/3307681.3325400"},{"key":"5_CR6","first-page":"430","volume":"4","author":"P Barham","year":"2022","unstructured":"Barham, P., et al.: Pathways: Asynchronous distributed dataflow for ml. Proc. Mach. Learn. Syst. 4, 430\u2013449 (2022)","journal-title":"Proc. Mach. Learn. Syst."},{"key":"5_CR7","unstructured":"Carbone, P., Katsifodimos, A., Ewen, S., Markl, V., Haridi, S., Tzoumas, K.: Apache flink: Stream and batch processing in a single engine. Bull. Tech. Committee Data Eng. 38(4) (2015)"},{"issue":"9","key":"5_CR8","doi-asserted-by":"publisher","first-page":"1108","DOI":"10.1016\/j.jpdc.2005.03.010","volume":"65","author":"L Dalcin","year":"2005","unstructured":"Dalcin, L., Paz, R., Storti, M.: Mpi for python. J. Parall. Distrib. Comput. 65(9), 1108\u20131115 (2005). https:\/\/doi.org\/10.1016\/j.jpdc.2005.03.010","journal-title":"J. Parall. Distrib. Comput."},{"key":"5_CR9","unstructured":"Dean, J.: Introducing pathways: A next-generation ai architecture. https:\/\/blog.google\/technology\/ai\/introducing-pathways-next-generation-ai-architecture\/ (October 2021). Accessed 17 Apr 2023"},{"issue":"1","key":"5_CR10","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean, J., Ghemawat, S.: Mapreduce: simplified data processing on large clusters. Commun. ACM 51(1), 107\u2013113 (2008)","journal-title":"Commun. ACM"},{"key":"5_CR11","doi-asserted-by":"publisher","unstructured":"DeCandia, G., et al.: Dynamo: Amazon\u2019s highly available key-value store. In: Proceedings of Twenty-First ACM SIGOPS Symposium on Operating Systems Principles, pp. 205\u2013220. SOSP \u201907, Association for Computing Machinery, New York, NY, USA (2007). https:\/\/doi.org\/10.1145\/1294261.1294281","DOI":"10.1145\/1294261.1294281"},{"key":"5_CR12","unstructured":"Facebookincubator: Gloo: Collective communications library with various primitives for multi-machine training. https:\/\/github.com\/facebookincubator\/gloo\u201d (March 2023). Accessed 01 Apr 2023"},{"key":"5_CR13","unstructured":"Government, U.: Arkouda: Numpy-like arrays at massive scale backed by chapel. https:\/\/pypi.org\/project\/arkouda\/##description \u201d (March 2019). Accessed 05 Apr 2023"},{"key":"5_CR14","doi-asserted-by":"publisher","unstructured":"McKenna, A.: The genome analysis toolkit: a mapreduce framework for analyzing next-generation dna sequencing data. Genome Res. 20 9, 1297\u2013303 (2010). https:\/\/doi.org\/10.1101\/gr.107524.110","DOI":"10.1101\/gr.107524.110"},{"issue":"9","key":"5_CR15","first-page":"1","volume":"14","author":"W McKinney","year":"2011","unstructured":"McKinney, W., et al.: pandas: a foundational python library for data analysis and statistics. Python High Perform. Sci. Comput. 14(9), 1\u20139 (2011)","journal-title":"Python High Perform. Sci. Comput."},{"key":"5_CR16","doi-asserted-by":"publisher","unstructured":"Merzky, A., Turilli, M., Titov, M., Al-Saadi, A., Jha, S.: Design and performance characterization of radical-pilot on leadership-class platforms. IEEE Trans. Parall. Distrib. Syst. 33(04), 818\u2013829 (apr 2022). https:\/\/doi.org\/10.1109\/TPDS.2021.3105994","DOI":"10.1109\/TPDS.2021.3105994"},{"key":"5_CR17","unstructured":"Moritz, P., et al.: Ray: A distributed framework for emerging $$\\{$$AI$$\\}$$ applications. In: 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18), pp. 561\u2013577 (2018)"},{"key":"5_CR18","unstructured":"ORNL-Summit: Summit (2019). https:\/\/www.olcf.ornl.gov\/olcf-resources\/compute-systems\/summit\/"},{"key":"5_CR19","unstructured":"Paszke, A., et al.: Pytorch: An imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems 32, pp. 8024\u20138035. Curran Associates, Inc. (2019). http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"key":"5_CR20","doi-asserted-by":"crossref","unstructured":"Perera, N., et al.: In-depth analysis on parallel processing patterns for high-performance dataframes. Future Generation Computer Systems (2023)","DOI":"10.1016\/j.future.2023.07.007"},{"key":"5_CR21","doi-asserted-by":"crossref","unstructured":"Perera, N., et al.: Supercharging distributed computing environments for high performance data engineering. arXiv preprint arXiv:2301.07896 (2023)","DOI":"10.3389\/fhpcp.2024.1384619"},{"key":"5_CR22","unstructured":"Rivanna: University of virginia\u2019s high-performance computing (hpc) system (2019). https:\/\/www.rc.virginia.edu\/userinfo\/rivanna\/overview\/"},{"key":"5_CR23","doi-asserted-by":"crossref","unstructured":"Shamis, P., et\u00a0al.: Ucx: an open source framework for hpc network apis and beyond. In: 2015 IEEE 23rd Annual Symposium on High-Performance Interconnects, pp. 40\u201343. IEEE (2015)","DOI":"10.1109\/HOTI.2015.13"},{"key":"5_CR24","doi-asserted-by":"crossref","unstructured":"Shan, K., et al.: Hybrid cloud and hpc approach to high-performance dataframes. In: 2022 IEEE International Conference on Big Data (Big Data), pp. 2728\u20132736. IEEE (2022)","DOI":"10.1109\/BigData55660.2022.10020958"},{"key":"5_CR25","doi-asserted-by":"crossref","unstructured":"Widanage, C., et al.: High performance data engineering everywhere. In: 2020 IEEE International Conference on Smart Data Services (SMDS), pp. 122\u2013132. IEEE (2020)","DOI":"10.1109\/SMDS49396.2020.00022"},{"key":"5_CR26","unstructured":"Yuan, J., et al..: Oneflow: Redesign the distributed deep learning framework from scratch. arXiv preprint arXiv:2110.15032 (2021)"},{"issue":"11","key":"5_CR27","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1145\/2934664","volume":"59","author":"M Zaharia","year":"2016","unstructured":"Zaharia, M., et al.: Apache spark: a unified engine for big data processing. Commun. ACM 59(11), 56\u201365 (2016)","journal-title":"Commun. ACM"},{"key":"5_CR28","unstructured":"ZMQ: High-level messaging patterns. https:\/\/zguide.zeromq.org\/docs\/chapter2\/##High-Level-Messaging-Patterns\u201d (October 2021). Accessed 05 Apr 2023"}],"container-title":["Lecture Notes in Computer Science","Job Scheduling Strategies for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-74430-3_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,20]],"date-time":"2024-12-20T08:03:31Z","timestamp":1734681811000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-74430-3_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,21]]},"ISBN":["9783031744297","9783031744303"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-74430-3_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,21]]},"assertion":[{"value":"21 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"JSSPP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Workshop on Job Scheduling Strategies for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"San Francisco","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 May 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 May 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"jsspp2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}