{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T05:50:49Z","timestamp":1742968249731,"version":"3.40.3"},"publisher-location":"Cham","reference-count":21,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319470986"},{"type":"electronic","value":"9783319470993"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-47099-3_10","type":"book-chapter","created":{"date-parts":[[2016,9,29]],"date-time":"2016-09-29T11:16:42Z","timestamp":1475147802000},"page":"116-129","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Streaming Applications on Heterogeneous Platforms"],"prefix":"10.1007","author":[{"given":"Zhaokui","family":"Li","sequence":"first","affiliation":[]},{"given":"Jianbin","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Tao","family":"Tang","sequence":"additional","affiliation":[]},{"given":"Xuhao","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Canqun","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,9,30]]},"reference":[{"key":"10_CR1","doi-asserted-by":"crossref","unstructured":"Adriaens, J.T., Compton, K., Kim, N.S., Schulte, M.J.: The case for GPGPU spatial multitasking. In: 2012 IEEE 18th International Symposium on High Performance Computer Architecture (HPCA), pp. 1\u201312. IEEE, February 2012","DOI":"10.1109\/HPCA.2012.6168946"},{"key":"10_CR2","doi-asserted-by":"crossref","unstructured":"Boyer, M., Meng, J., Kumaran, K.: Improving GPU performance prediction with data transfer modeling. In: 2013 IEEE 27th International Symposium on Parallel and Distributed Processing Symposium Workshops and PhD Forum (IPDPSW), pp. 1097\u20131106. IEEE, May 2013","DOI":"10.1109\/IPDPSW.2013.236"},{"key":"10_CR3","unstructured":"Che, S., Boyer, M., Meng, J., Tarjan, D., Sheaffer, J.W., Lee, S.-H., Skadron, K.: Rodinia: a benchmark suite for heterogeneous computing. In: IEEE International Symposium on Workload Characterization, 2009. IISWC 2009, pp. 44\u201354. IEEE, October 2009"},{"issue":"9","key":"10_CR4","doi-asserted-by":"publisher","first-page":"1117","DOI":"10.1016\/j.jpdc.2011.07.011","volume":"72","author":"J G\u00f3mez-Luna","year":"2012","unstructured":"G\u00f3mez-Luna, J., Gonz\u00e1lez-Linares, J.M., Benavides, J.I., Guil, N.: Performance models for asynchronous data transfers on consumer graphics processing units. J. Parallel Distrib. Comput. 72(9), 1117\u20131126 (2012)","journal-title":"J. Parallel Distrib. Comput."},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Gregg, C., Hazelwood, K.: Where is the data? Why you cannot debate CPU vs. GPU performance without the answer. In: 2011 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), pp. 134\u2013144. IEEE, April 2011","DOI":"10.1109\/ISPASS.2011.5762730"},{"key":"10_CR6","volume-title":"Computer Architecture: A Quantitative Approach","author":"JL Hennessy","year":"2006","unstructured":"Hennessy, J.L., Patterson, D.A.: Computer Architecture: A Quantitative Approach, 4th edn. Morgan Kaufmann, Burlington (2006)","edition":"4"},{"issue":"12","key":"10_CR7","doi-asserted-by":"publisher","first-page":"2604","DOI":"10.1587\/transinf.E96.D.2604","volume":"96\u2013D","author":"F Ino","year":"2013","unstructured":"Ino, F., Nakagawa, S., Hagihara, K.: GPU-chariot: a programming framework for stream applicationsrunning on multi-GPU systems. IEICE Trans. 96\u2013D(12), 2604\u20132616 (2013)","journal-title":"IEICE Trans."},{"key":"10_CR8","unstructured":"Intel Inc. hStreams Architecture document for Intel MPSS 3.5, April 2015"},{"issue":"2","key":"10_CR9","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1177\/1094342015585845","volume":"30","author":"B Liu","year":"2015","unstructured":"Liu, B., Qiu, W., Jiang, L., Gong, Z.: Software pipelining for graphic processing unit acceleration: partition, scheduling and granularity. Int. J. High Perform. Comput. Appl. 30(2), 169\u2013185 (2015)","journal-title":"Int. J. High Perform. Comput. Appl."},{"issue":"2","key":"10_CR10","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1177\/1094342012468180","volume":"27","author":"MR Meswani","year":"2013","unstructured":"Meswani, M.R., Carrington, L., Unat, D., Snavely, A., Baden, S., Poole, S.: Modeling and predicting performance of high performance computing applications on hardware accelerators. Int. J. High Perform. Comput. Appl. 27(2), 89\u2013108 (2013)","journal-title":"Int. J. High Perform. Comput. Appl."},{"issue":"4","key":"10_CR11","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1145\/2788396","volume":"47","author":"S Mittal","year":"2015","unstructured":"Mittal, S., Vetter, J.S.: A survey of CPU-GPU heterogeneous computing techniques. ACM Comput. Surv. 47(4), 36 (2015)","journal-title":"ACM Comput. Surv."},{"key":"10_CR12","unstructured":"NVIDIA Inc. CUDA C Best Practices Guide Version 7.0, March 2015"},{"issue":"5","key":"10_CR13","doi-asserted-by":"publisher","first-page":"879","DOI":"10.1109\/JPROC.2008.917757","volume":"96","author":"JD Owens","year":"2008","unstructured":"Owens, J.D., Houston, M., Luebke, D., Green, S., Stone, J.E., Phillips, J.C.: GPU computing. Proc. IEEE 96(5), 879\u2013899 (2008)","journal-title":"Proc. IEEE"},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Pienaar, J.A., Raghunathan, A., Chakradhar, S.: MDR: performance model driven runtime for heterogeneous parallel platforms. In: Proceedings of the International Conference on Supercomputing, ICS 2011, pp. 225\u2013234. ACM, New York (2011)","DOI":"10.1145\/1995896.1995933"},{"key":"10_CR15","doi-asserted-by":"crossref","unstructured":"Takizawa, H., Sato, K., Kobayashi, H.: SPRAT: runtime processor selection for energy-aware computing. In: 2008 IEEE International Conference on Cluster Computing, pp. 386\u2013393. IEEE (2008)","DOI":"10.1109\/CLUSTR.2008.4663799"},{"key":"10_CR16","unstructured":"The Khronos OpenCL Working Group. OpenCL - The open standard for parallel programming of heterogeneoussystems, January 2016. \n                      http:\/\/www.khronos.org\/opencl\/"},{"key":"10_CR17","doi-asserted-by":"crossref","unstructured":"Werkhoven, B.V., Maassen, J., Seinstra, F.J., Bal, H.E.: Performance models for CPU-GPU data transfers. In: 2014 14th IEEE\/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGrid), pp. 11\u201320. IEEE, May 2014","DOI":"10.1109\/CCGrid.2014.16"},{"key":"10_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1007\/978-3-319-09873-9_66","volume-title":"Euro-Par 2014 Parallel Processing","author":"F Wende","year":"2014","unstructured":"Wende, F., Steinke, T., Cordes, F.: Concurrent kernel execution on xeon phi within parallel heterogeneous workloads. In: Silva, F., Dutra, I., Santos Costa, V. (eds.) Euro-Par 2014. LNCS, vol. 8632, pp. 788\u2013799. Springer, Heidelberg (2014). doi:\n                      10.1007\/978-3-319-09873-9_66"},{"key":"10_CR19","unstructured":"Wende, F., Steinke, T., Cordes, F.: Multi-threaded kernel offloading to GPGPU using hyper-Q on kepler architecture. Technical report 14\u201319, ZIB, Takustr. 7, 14195 Berlin (2014)"},{"key":"10_CR20","doi-asserted-by":"crossref","unstructured":"Yang, C., Wang, F., Du, Y., Chen, J., Liu, J., Yi, H., Lu, K.: Adaptive optimization for petascale heterogeneous CPU\/GPU computing. In: 2010 IEEE International Conference on Cluster Computing (CLUSTER), pp. 19\u201328. IEEE (2010)","DOI":"10.1109\/CLUSTER.2010.12"},{"key":"10_CR21","doi-asserted-by":"crossref","unstructured":"Yang, C., Xue, W., Fu, H., Gan, L., Li, L., Xu, Y., Lu, Y., Sun, J., Yang, G., Zheng, W.: A peta-scalable CPU-GPU algorithm for global atmospheric simulations. In: Proceedings of the 18th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPoPP 2013, pp. 1\u201312. ACM, New York (2013)","DOI":"10.1145\/2442516.2442518"}],"container-title":["Lecture Notes in Computer Science","Network and Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-47099-3_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,19]],"date-time":"2020-10-19T00:10:37Z","timestamp":1603066237000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-47099-3_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319470986","9783319470993"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-47099-3_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016]]},"assertion":[{"value":"30 September 2016","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NPC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"IFIP International Conference on Network and Parallel Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Xi'an","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2016","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 October 2016","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 October 2016","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"npc2016","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}