{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T19:22:26Z","timestamp":1774120946481,"version":"3.50.1"},"publisher-location":"Cham","reference-count":37,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030787127","type":"print"},{"value":"9783030787134","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-78713-4_4","type":"book-chapter","created":{"date-parts":[[2021,6,16]],"date-time":"2021-06-16T23:06:15Z","timestamp":1623884775000},"page":"57-76","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["A Hierarchical Task Scheduler for Heterogeneous Computing"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8259-8891","authenticated-orcid":false,"given":"Narasinga Rao","family":"Miniskar","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6615-0739","authenticated-orcid":false,"given":"Frank","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5448-4667","authenticated-orcid":false,"given":"Aaron R.","family":"Young","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3524-9071","authenticated-orcid":false,"given":"Dwaipayan","family":"Chakraborty","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2449-6720","authenticated-orcid":false,"given":"Jeffrey S.","family":"Vetter","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,6,17]]},"reference":[{"issue":"3","key":"4_CR1","doi-asserted-by":"publisher","first-page":"682","DOI":"10.1109\/TPDS.2013.57","volume":"25","author":"H Arabnejad","year":"2013","unstructured":"Arabnejad, H., Barbosa, J.G.: List scheduling algorithm for heterogeneous systems by an optimistic cost table. IEEE Trans. Parallel Distrib. Syst. 25(3), 682\u2013694 (2013)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"4_CR2","unstructured":"ARM Corp.: AMBA: the standard for on-chip communication. https:\/\/www.arm.com\/products\/silicon-ip-system\/embedded-system-design\/amba-specifications. Accessed 10 Dec 2020"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Arnold, O., Noethen, B., Fettweis, G.: Instruction set architecture extensions for a dynamic task scheduling unit. In: 2012 IEEE Computer Society Annual Symposium on VLSI, pp. 249\u2013254. IEEE (2012)","DOI":"10.1109\/ISVLSI.2012.51"},{"key":"4_CR4","unstructured":"Asanovic, K., et al.: The Rocket chip generator. EECS Department, University of California, Berkeley, Technical report UCB\/EECS-2016-17 (2016)"},{"issue":"2","key":"4_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2024716.2024718","volume":"39","author":"N Binkert","year":"2011","unstructured":"Binkert, N., et al.: The GEM5 simulator. ACM SIGARCH Comput. Archit. News 39(2), 1\u20137 (2011)","journal-title":"ACM SIGARCH Comput. Archit. News"},{"issue":"1","key":"4_CR6","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1006\/jpdc.1996.0107","volume":"37","author":"RD Blumofe","year":"1996","unstructured":"Blumofe, R.D., Joerg, C.F., Kuszmaul, B.C., Leiserson, C.E., Randall, K.H., Zhou, Y.: Cilk: an efficient multithreaded runtime system. J. Parallel Distrib. Comput. 37(1), 55\u201369 (1996)","journal-title":"J. Parallel Distrib. Comput."},{"key":"4_CR7","doi-asserted-by":"publisher","first-page":"721","DOI":"10.1109\/TPDS.2019.2942909","volume":"31","author":"LC Canon","year":"2019","unstructured":"Canon, L.C., Marchal, L., Simon, B., Vivien, F.: Online scheduling of task graphs on heterogeneous platforms. IEEE Trans. Parallel Distrib. Syst. 31, 721\u2013732 (2019)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Dallou, T., Engelhardt, N., Elhossini, A., Juurlink, B.: Nexus#: a distributed hardware task manager for task-based programming models. In: 2015 IEEE International Parallel and Distributed Processing Symposium, pp. 1129\u20131138. IEEE (2015)","DOI":"10.1109\/IPDPS.2015.79"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Frigo, M., Leiserson, C.E., Randall, K.H.: The implementation of the Cilk-5 multithreaded language. In: Proceedings of the ACM SIGPLAN 1998 Conference on Programming Language Design and Implementation, pp. 212\u2013223 (1998)","DOI":"10.1145\/277652.277725"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"4_CR11","doi-asserted-by":"crossref","unstructured":"Huang, T.W., Lin, C.X., Guo, G., Wong, M.: Cpp-Taskflow: fast task-based parallel programming using modern C++. In: 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 974\u2013983. IEEE (2019)","DOI":"10.1109\/IPDPS.2019.00105"},{"key":"4_CR12","unstructured":"Intel Corp.: Coffee lake - microarchitecture - intel. https:\/\/en.wikichip.org\/wiki\/intel\/microarchitectures\/coffee_lake. Accessed 10 Dec 2020"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Johnston, B., Milthorpe, J.: AIWC: OpenCL-based architecture-independent workload characterization. In: 2018 IEEE\/ACM 5th Workshop on the LLVM Compiler Infrastructure in HPC (LLVM-HPC), pp. 81\u201391. IEEE (2018)","DOI":"10.1109\/LLVM-HPC.2018.8639381"},{"key":"4_CR14","doi-asserted-by":"crossref","first-page":"175","DOI":"10.7551\/mitpress\/5241.003.0009","volume-title":"Parallel Programming Using C++","author":"LV Kale","year":"1996","unstructured":"Kale, L.V., Krishnan, S.: Charm++: parallel programming with message-driven objects. In: Wilson, G.V., Lu, P. (eds.) Parallel Programming Using C++, vol. 1, pp. 175\u2013213. MIT Press, Cambridge (1996)"},{"key":"4_CR15","doi-asserted-by":"crossref","unstructured":"Kaleem, R., Barik, R., Shpeisman, T., Hu, C., Lewis, B.T., Pingali, K.: Adaptive heterogeneous scheduling for integrated GPUs. In: 2014 23rd International Conference on Parallel Architecture and Compilation Techniques (PACT), pp. 151\u2013162. IEEE (2014)","DOI":"10.1145\/2628071.2628088"},{"key":"4_CR16","unstructured":"Khronos Group: OpenCL: the open standard for parallel programming of heterogeneous systems (2019)"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Kukanov, A., Voss, M.J.: The foundations for scalable multi-core software in Intel Threading Building Blocks. Intel Technol. J. 11(4) (2007)","DOI":"10.1535\/itj.1104.05"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Liu, F., Miniskar, N.R., Chakraborty, D., Vetter, J.S.: DEFFE: a data-efficient framework for performance characterization in domain-specific computing. In: Proceedings of the 17th ACM International Conference on Computing Frontiers, pp. 182\u2013191 (2020)","DOI":"10.1145\/3387902.3392633"},{"key":"4_CR19","doi-asserted-by":"crossref","unstructured":"Ma, Z., Catthoor, F., Vounckx, J.: Hierarchical task scheduler for interleaving subtasks on heterogeneous multiprocessor platforms. In: Proceedings of the 2005 Asia and South Pacific Design Automation Conference, pp. 952\u2013955 (2005)","DOI":"10.1145\/1120725.1120765"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"Morais, L., et al.: Adding tightly-integrated task scheduling acceleration to a RISC-V multi-core processor. In: Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, pp. 861\u2013872 (2019)","DOI":"10.1145\/3352460.3358271"},{"key":"4_CR21","doi-asserted-by":"crossref","unstructured":"Nickolls, J., Buck, I.: NVIDIA CUDA software and GPU parallel computing architecture. In: Microprocessor Forum (2007)","DOI":"10.1109\/HOTCHIPS.2007.7482491"},{"key":"4_CR22","unstructured":"OpenACC: OpenACC: directives for accelerators (2015)"},{"key":"4_CR23","unstructured":"OpenMP: OpenMP reference (1999)"},{"issue":"2","key":"4_CR24","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1109\/MCSE.2013.21","volume":"15","author":"AD Robison","year":"2013","unstructured":"Robison, A.D.: Composable parallel patterns with Intel Cilk Plus. Comput. Sci. Eng. 15(2), 66\u201371 (2013)","journal-title":"Comput. Sci. Eng."},{"key":"4_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-Net: convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015. LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28"},{"key":"4_CR26","doi-asserted-by":"crossref","unstructured":"Shao, Y.S., Xi, S.L., Srinivasan, V., Wei, G.Y., Brooks, D.: Co-designing accelerators and SoC interfaces using gem5-Aladdin. In: 2016 49th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO), pp. 1\u201312. IEEE (2016)","DOI":"10.1109\/MICRO.2016.7783751"},{"key":"4_CR27","unstructured":"Sijstermans, F.: The NVIDIA deep learning accelerator. In: Proceedings Hot Chips: A Symposium on High Performance Chips, August 2018"},{"key":"4_CR28","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"4_CR29","doi-asserted-by":"crossref","unstructured":"Sinnen, O.: Task Scheduling for Parallel Systems, vol.\u00a060. Wiley, Hoboken (2007)","DOI":"10.1002\/0470121173"},{"key":"4_CR30","doi-asserted-by":"crossref","unstructured":"Sj\u00e4lander, M., Terechko, A., Duranton, M.: A look-ahead task management unit for embedded multi-core architectures. In: 2008 11th EUROMICRO Conference on Digital System Design Architectures, Methods and Tools, pp. 149\u2013157. IEEE (2008)","DOI":"10.1109\/DSD.2008.45"},{"key":"4_CR31","doi-asserted-by":"crossref","unstructured":"Slaughter, E., et al.: Task bench: a parameterized benchmark for evaluating parallel runtime performance, pp. 1\u201330 (2020)","DOI":"10.1109\/SC41405.2020.00066"},{"key":"4_CR32","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z.: Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2818\u20132826 (2016)","DOI":"10.1109\/CVPR.2016.308"},{"issue":"3","key":"4_CR33","doi-asserted-by":"publisher","first-page":"260","DOI":"10.1109\/71.993206","volume":"13","author":"H Topcuoglu","year":"2002","unstructured":"Topcuoglu, H., Hariri, S., Wu, M.Y.: Performance-effective and low-complexity task scheduling for heterogeneous computing. IEEE Trans. Parallel Distrib. Syst. 13(3), 260\u2013274 (2002)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"issue":"3","key":"4_CR34","doi-asserted-by":"publisher","first-page":"384","DOI":"10.1016\/S0022-0000(75)80008-0","volume":"10","author":"JD Ullman","year":"1975","unstructured":"Ullman, J.D.: NP-complete scheduling problems. J. Comput. Syst. Sci. 10(3), 384\u2013393 (1975)","journal-title":"J. Comput. Syst. Sci."},{"key":"4_CR35","doi-asserted-by":"publisher","unstructured":"Vetter, J.S., Brightwell, R., et al.: Extreme heterogeneity 2018: DOE ASCR basic research needs workshop on extreme heterogeneity (2018). https:\/\/doi.org\/10.2172\/1473756","DOI":"10.2172\/1473756"},{"key":"4_CR36","doi-asserted-by":"crossref","unstructured":"Waterman, A., Lee, Y., Avizienis, R., Cook, H., Patterson, D.A., Asanovic, K.: The RISC-V instruction set. In: Hot Chips Symposium, p. 1 (2013)","DOI":"10.1109\/HOTCHIPS.2013.7478332"},{"key":"4_CR37","unstructured":"Western Digital Corp.: RISC-V: accelerating next-generation compute requirements. https:\/\/www.westerndigital.com\/company\/innovations\/risc-v. Accessed 10 Dec 2020"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-78713-4_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,2]],"date-time":"2024-09-02T00:06:32Z","timestamp":1725235592000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-78713-4_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030787127","9783030787134"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-78713-4_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"17 June 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ISC High Performance","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on High Performance Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 June 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 July 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"36","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"supercomputing2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.isc-hpc.com\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Linklings","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"74","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"32% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.28","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.13","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"In the ISC High Performance Workshop, there were 49 submissions, out of which 35 were accepted.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}