{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,24]],"date-time":"2025-08-24T01:24:00Z","timestamp":1755998640618,"version":"3.37.3"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319748955"},{"type":"electronic","value":"9783319748962"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-74896-2_7","type":"book-chapter","created":{"date-parts":[[2018,1,30]],"date-time":"2018-01-30T10:22:32Z","timestamp":1517307752000},"page":"125-144","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Exploration of Supervised Machine Learning Techniques for Runtime Selection of CPU vs. GPU Execution in Java Programs"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1623-1818","authenticated-orcid":false,"given":"Gloria Y. K.","family":"Kim","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6861-6272","authenticated-orcid":false,"given":"Akihiro","family":"Hayashi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vivek","family":"Sarkar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,1,31]]},"reference":[{"key":"7_CR1","unstructured":"APARAPI: API for Data Parallel Java (2011). http:\/\/code.google.com\/p\/aparapi\/ . Accessed 20 June 2017"},{"key":"7_CR2","unstructured":"Dubach, C., Cheng, P., Rabbah, R., Bacon, D.F., Fink, S.J.: Compiling a high-level language for gpus: (via language support for architectures and compilers). In: Proceedings of the 33rd ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI 2012, pp. 1\u201312. ACM, New York (2012). http:\/\/doi.acm.org\/10.1145\/2254064.2254066"},{"key":"7_CR3","unstructured":"Fumero, J.J., Remmelg, T., Steuwer, M., Dubach, C.: Runtime code generation and data management for heterogeneous computing in Java. In: Proceedings of the Principles and Practices of Programming on the Java Platform, PPPJ 2015, pp. 16\u201326. ACM, New York (2015). http:\/\/doi.acm.org\/10.1145\/2807426.2807428"},{"key":"7_CR4","unstructured":"Fumero, J.J., Steuwer, M., Dubach, C.: A composable array function interface for heterogeneous computing in Java. In: Proceedings of ACM SIGPLAN International Workshop on Libraries, Languages, and Compilers for Array Programming, ARRAY 2014, pp. 44:44\u201344:49. ACM, New York (2014). http:\/\/doi.acm.org\/10.1145\/2627373.2627381"},{"key":"7_CR5","unstructured":"Grcevski, N., Kielstra, A., Stoodley, K., Stoodley, M., Sundaresan, V.: JavaTM just-in-time compiler and virtual machine improvements for server and middleware applications. In: Proceedings of the 3rd Conference on Virtual Machine Research And Technology Symposium, VM 2004, vol. 3. p. 12. USENIX Association, Berkeley (2004). http:\/\/dl.acm.org\/citation.cfm?id=1267242.1267254"},{"key":"7_CR6","doi-asserted-by":"publisher","unstructured":"Grossman, M., Breternitz, M., Sarkar, V.: HadoopCL: MapReduce on Distributed heterogeneous platforms through seamless integration of Hadoop and OpenCL. In: Proceedings of the 2013 IEEE 27th International Symposium on Parallel and Distributed Processing Workshops and PhD Forum, IPDPSW 2013, pp. 1918\u20131927. IEEE Computer Society, Washington, DC (2013). https:\/\/doi.org\/10.1109\/IPDPSW.2013.246","DOI":"10.1109\/IPDPSW.2013.246"},{"issue":"3","key":"7_CR7","doi-asserted-by":"crossref","first-page":"762","DOI":"10.1109\/TPDS.2015.2414943","volume":"27","author":"M Grossman","year":"2016","unstructured":"Grossman, M., Breternitz, M., Sarkar, V.: Hadoopcl2: motivating the design of a distributed, heterogeneous programming system with machine-learning applications. IEEE Trans. Parallel Distrib. Syst. 27(3), 762\u2013775 (2016)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Hayashi, A., Grossman, M., Zhao, J., Shirako, J., Sarkar, V.: Accelerating Habanero-Java programs with OpenCL generation. In: Proceedings of the 2013 International Conference on Principles and Practices of Programming on the Java Platform: Virtual Machines, Languages, and Tools, PPPJ 2013, pp. 124\u2013134 (2013)","DOI":"10.1145\/2500828.2500840"},{"key":"7_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"342","DOI":"10.1007\/978-3-319-09967-5_20","volume-title":"Languages and Compilers for Parallel Computing","author":"A Hayashi","year":"2014","unstructured":"Hayashi, A., Grossman, M., Zhao, J., Shirako, J., Sarkar, V.: Speculative execution of parallel programs with precise exception semantics on GPUs. In: Ca\u015fcaval, C., Montesinos, P. (eds.) LCPC 2013. LNCS, vol. 8664, pp. 342\u2013356. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-09967-5_20"},{"key":"7_CR10","unstructured":"Hayashi, A., Ishizaki, K., Koblents, G., Sarkar, V.: Machine-learning-based performance heuristics for runtime CPU\/GPU selection. In: Proceedings of the Principles and Practices of Programming on the Java Platform, PPPJ 2015, pp. 27\u201336. ACM, New York (2015). http:\/\/doi.acm.org\/10.1145\/2807426.2807429"},{"key":"7_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1007\/978-3-642-19595-2_13","volume-title":"Languages and Compilers for Parallel Computing","author":"A Hayashi","year":"2011","unstructured":"Hayashi, A., et al.: Parallelizing compiler framework and API for power reduction and software productivity of real-time heterogeneous multicores. In: Cooper, K., Mellor-Crummey, J., Sarkar, V. (eds.) LCPC 2010. LNCS, vol. 6548, pp. 184\u2013198. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-19595-2_13"},{"key":"7_CR12","unstructured":"Hong, S., Kim, H.: An analytical model for a GPU architecture with memory-level and thread-level parallelism awareness. In: Proceedings of the 36th Annual International Symposium on Computer Architecture, ISCA 2009, pp. 152\u2013163. ACM, New York (2009). http:\/\/doi.acm.org\/10.1145\/1555754.1555775"},{"key":"7_CR13","unstructured":"IBM Corporation: IBM SDK, Java Technology Edition, Version 8 (2015). https:\/\/developer.ibm.com\/javasdk\/downloads\/ . Accessed 20 June 2017"},{"key":"7_CR14","doi-asserted-by":"crossref","unstructured":"Ishizaki, K., Hayashi, A., Koblents, G., Sarkar, V.: Compiling and optimizing java 8 programs for GPU execution. In: 2015 International Conference on Parallel Architecture and Compilation (PACT), pp. 419\u2013431, October 2015","DOI":"10.1109\/PACT.2015.46"},{"key":"7_CR15","unstructured":"JGF: The Java Grande Forum benchmark suite. https:\/\/www.epcc.ed.ac.uk\/research\/computing\/performance-characterisation-and-benchmarking\/java-grande-benchmark-suite"},{"key":"7_CR16","unstructured":"Kaleem, R., Barik, R., Shpeisman, T., Lewis, B.T., Hu, C., Pingali, K.: Adaptive heterogeneous scheduling for integrated GPUs. In: Proceedings of the 23rd International Conference on Parallel Architectures and Compilation, PACT 2014, pp. 151\u2013162. ACM, New York (2014). http:\/\/doi.acm.org\/10.1145\/2628071.2628088"},{"key":"7_CR17","doi-asserted-by":"crossref","unstructured":"Karami, A., Mirsoleimani, S.A., Khunjush, F.: A statistical performance prediction model for OpenCL kernels on NVIDIA GPUs. In: The 17th CSI International Symposium on Computer Architecture Digital Systems (CADS 2013), pp. 15\u201322, October 2013","DOI":"10.1109\/CADS.2013.6714232"},{"key":"7_CR18","doi-asserted-by":"crossref","unstructured":"Leung, A., Lhot\u00e1k, O., Lashari, G.: Automatic parallelization for graphics processing units. In: Proceedings of the 7th International Conference on Principles and Practice of Programming in Java, PPPJ 2009, pp. 91\u2013100 (2009)","DOI":"10.1145\/1596655.1596670"},{"key":"7_CR19","unstructured":"Luk, C.K., Hong, S., Kim, H.: Qilin: exploiting parallelism on heterogeneous multiprocessors with adaptive mapping. In: Proceedings of the 42nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO 42, pp. 45\u201355. ACM, New York (2009). http:\/\/doi.acm.org\/10.1145\/1669112.1669121"},{"key":"7_CR20","doi-asserted-by":"crossref","unstructured":"Luo, C., Suda, R.: A performance and energy consumption analytical model for GPU. In: 2011 IEEE Ninth International Conference on Dependable, Autonomic and Secure Computing, pp. 658\u2013665, December 2011","DOI":"10.1109\/DASC.2011.117"},{"key":"7_CR21","unstructured":"NVIDIA: NVVM IR specification 1.3 (2017). http:\/\/docs.nvidia.com\/cuda\/pdf\/NVVM_IR_Specification.pdf . Accessed 20 June 2017"},{"key":"7_CR22","unstructured":"NVIDIA: Parallel Thread Execution ISA v5.0 (2017). http:\/\/docs.nvidia.com\/cuda\/pdf\/ptx_isa_5.0.pdf . Accessed 20 June 2017"},{"issue":"3","key":"7_CR23","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1145\/2189751.2047883","volume":"47","author":"N Nystrom","year":"2011","unstructured":"Nystrom, N., White, D., Das, K.: Firepile: run-time compilation for GPUs in scala. SIGPLAN Not. 47(3), 107\u2013116 (2011). http:\/\/doi.acm.org\/10.1145\/2189751.2047883","journal-title":"SIGPLAN Not."},{"key":"7_CR24","unstructured":"OpenMP: OpenMP Application Program Interface, version 4.5 (2015). http:\/\/www.openmp.org\/wp-content\/uploads\/openmp-4.5.pdf . Accessed 20 June 2017"},{"key":"7_CR25","unstructured":"Parboil: Parboil benchmarks. http:\/\/impact.crhc.illinois.edu\/parboil\/parboil.aspx"},{"key":"7_CR26","unstructured":"PolyBench: The polyhedral benchmark suite. http:\/\/www.cse.ohio-state.edu\/~pouchet\/software\/polybench"},{"key":"7_CR27","doi-asserted-by":"crossref","unstructured":"Pratt-Szeliga, P., Fawcett, J., Welch, R.: Rootbeer: seamlessly using GPUs from Java. In: 14th IEEE International Conference on High Performance Computing and Communication and 9th IEEE International Conference on Embedded Software and Systems, HPCC-ICESS 2012, Liverpool, United Kingdom, June 25\u201327, 2012, pp. 375\u2013380, June 2012","DOI":"10.1109\/HPCC.2012.57"},{"key":"7_CR28","unstructured":"Machine Learning Group at the University of Waikato: Weka3: data mining software in Java (2017). http:\/\/www.cs.waikato.ac.nz\/ml\/weka\/ . Accessed 20 June 2017"},{"key":"7_CR29","doi-asserted-by":"crossref","unstructured":"Wu, G., Greathouse, J.L., Lyashevsky, A., Jayasena, N., Chiou, D.: GPGPU performance and power estimation using machine learning. In: 2015 IEEE 21st International Symposium on High Performance Computer Architecture (HPCA), pp. 564\u2013576, February 2015","DOI":"10.1109\/HPCA.2015.7056063"},{"key":"7_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"887","DOI":"10.1007\/978-3-642-03869-3_82","volume-title":"Euro-Par 2009 Parallel Processing","author":"Y Yan","year":"2009","unstructured":"Yan, Y., Grossman, M., Sarkar, V.: JCUDA: a programmer-friendly interface for accelerating Java programs with CUDA. In: Sips, H., Epema, D., Lin, H.-X. (eds.) Euro-Par 2009. LNCS, vol. 5704, pp. 887\u2013899. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-03869-3_82"},{"key":"7_CR31","unstructured":"Zaremba, W., Lin, Y., Grover, V.: JaBEE: framework for object-oriented Java bytecode compilation and execution on Graphics Processor Units. In: Proceedings of the 5th Annual Workshop on General Purpose Processing with Graphics Processing Units, GPGPU-5, pp. 74\u201383. ACM, New York (2012). http:\/\/doi.acm.org\/10.1145\/2159430.2159439"}],"container-title":["Lecture Notes in Computer Science","Accelerator Programming Using Directives"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-74896-2_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,26]],"date-time":"2020-10-26T08:27:36Z","timestamp":1603700856000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-74896-2_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319748955","9783319748962"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-74896-2_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}