{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,6,24]],"date-time":"2024-06-24T23:07:44Z","timestamp":1719270464438},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2017,2,1]],"date-time":"2017-02-01T00:00:00Z","timestamp":1485907200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2017,2]]},"DOI":"10.1007\/s11704-016-5468-8","type":"journal-article","created":{"date-parts":[[2016,9,15]],"date-time":"2016-09-15T02:39:43Z","timestamp":1473907183000},"page":"130-146","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Understanding co-run performance on CPU-GPU integrated processors: observations, insights, directions"],"prefix":"10.1007","volume":"11","author":[{"given":"Qi","family":"Zhu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xipeng","family":"Shen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kai","family":"Shen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Li","family":"Shen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiying","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,4,7]]},"reference":[{"issue":"4","key":"5468_CR1","doi-asserted-by":"crossref","first-page":"379","DOI":"10.1109\/71.273046","volume":"5","author":"E. P. Markatos","year":"1994","unstructured":"Markatos E P, LeBlanc T J. Using processor affinity in loop scheduling on shared-memory multiprocessors. IEEE Transactions on Parallel Distributed Systems, 1994, 5(4): 379\u2013400","journal-title":"IEEE Transactions on Parallel Distributed Systems"},{"issue":"2","key":"5468_CR2","doi-asserted-by":"crossref","first-page":"131","DOI":"10.1109\/71.207589","volume":"4","author":"M. S. Squillante","year":"1993","unstructured":"Squillante M S, Lazowska E D. Using processor-cache affinity information in shared-memory multiprocessor scheduling. IEEE Transactions on Parallel and Distributed Systems, 1993, 4(2): 131\u2013143","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"5468_CR3","doi-asserted-by":"crossref","first-page":"347","DOI":"10.1145\/1736020.1736059","volume-title":"Proceedings of the 15th Edition of ASPLOS on Architectural Support for Programming Languages and Operating Systems.","author":"I. Gelado","year":"2010","unstructured":"Gelado I, Stone J E, Cabezas J, Patel S, Navarro N, Hwu W M W. An asymmetric distributed shared memory model for heterogeneous parallel systems. In: Proceedings of the 15th Edition of ASPLOS on Architectural Support for Programming Languages and Operating Systems. 2010, 347\u2013358"},{"key":"5468_CR4","doi-asserted-by":"crossref","first-page":"220","DOI":"10.1145\/1454115.1454146","volume-title":"Proceedings of the International Conference on Parallel Architecture and Compilation Techniques.","author":"Y. Jiang","year":"2008","unstructured":"Jiang Y, Shen X P, Chen J, Tripathi R. Analysis and approximation of optimal co-scheduling on chip multiprocessors. In: Proceedings of the International Conference on Parallel Architecture and Compilation Techniques. 2008, 220\u2013229"},{"key":"5468_CR5","first-page":"41","volume-title":"Proceedings of the 6th ACM Computing Frontiers.","author":"K. Tian","year":"2009","unstructured":"Tian K, Jiang Y L, Shen X P. A study on optimally co-scheduling jobs of different lengths on chip multiprocessors. In: Proceedings of the 6th ACM Computing Frontiers. 2009, 41\u201350"},{"key":"5468_CR6","first-page":"25","volume-title":"Proceedings of the 16th International Conference on Parallel Architecture and Compilation Techniques.","author":"A. Fedorova","year":"2007","unstructured":"Fedorova A, Seltzer M, Smith M D. Improving performance isolation on chip multiprocessors via an operating system scheduler. In: Proceedings of the 16th International Conference on Parallel Architecture and Compilation Techniques. 2007, 25\u201338"},{"key":"5468_CR7","volume-title":"Proceedings of the 20th International Parallel and Distributed Processing Symposium.","author":"A. El-Moursy","year":"2006","unstructured":"El-Moursy A, Garg R, Albonesi D H, Dwarkadas S. Compatible phase co-scheduling on a CMP of multi-threaded processors. In: Proceedings of the 20th International Parallel and Distributed Processing Symposium. 2006"},{"key":"5468_CR8","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1145\/2541940.2541963","volume-title":"Proceedings of the 19th International Conference on Architectural Support for Programming Languages and Operating Systems.","author":"K. Menychtas","year":"2014","unstructured":"Menychtas K, Shen K, Scott M L. Disengaged scheduling for fair, protected access to computational accelerators. In: Proceedings of the 19th International Conference on Architectural Support for Programming Languages and Operating Systems. 2014, 301\u2013316"},{"key":"5468_CR9","first-page":"17","volume-title":"Proceedings of the 2011 USENIX Annual Technical Conference.","author":"S. Kato","year":"2011","unstructured":"Kato S, Lakshmanan K, Rajkumar R, Ishikawa Y. TimeGraph: GPU scheduling for real-time multi-tasking environments. In: Proceedings of the 2011 USENIX Annual Technical Conference. 2011, 17\u201330"},{"key":"5468_CR10","first-page":"225","volume-title":"Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques.","author":"V. Mekkat","year":"2013","unstructured":"Mekkat V, Holey A, Yew P C, Zhai A. Managing shared last-level cache in a heterogeneous multicore processor. In: Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques. 2013, 225\u2013234"},{"key":"5468_CR11","first-page":"26","volume-title":"Proceedings of the 12th International Conference on Parallel Architectures and Compilation Techniques.","author":"N. Tuck","year":"2003","unstructured":"Tuck N, Tullsen D M. Initial observations of the simultaneous multithreading Pentium 4 processor. In: Proceedings of the 12th International Conference on Parallel Architectures and Compilation Techniques. 2003, 26\u201335"},{"issue":"4","key":"5468_CR12","doi-asserted-by":"crossref","first-page":"98","DOI":"10.1145\/2082156.2082183","volume":"39","author":"J. Fousek","year":"2011","unstructured":"Fousek J, Filipovic J, Madzin M. Automatic fusions of CUDA-GPU kernels for parallel map. ACM SIGARCH Computer Architecture News, 2011, 39(4): 98\u201399","journal-title":"ACM SIGARCH Computer Architecture News"},{"key":"5468_CR13","first-page":"344","volume-title":"Proceedings of the 2010 IEEE\/ACM International Conference on Green Computing and Communications & International Conference on Cyber, Physical and Social Computing (CPSCom).","author":"G. B. Wang","year":"2010","unstructured":"Wang G B, Lin Y S, Yi W. Kernel fusion: an effective method for better power efficiency on multithreaded GPU. In: Proceedings of the 2010 IEEE\/ACM International Conference on Green Computing and Communications & International Conference on Cyber, Physical and Social Computing (CPSCom). 2010, 344\u2013350"},{"key":"5468_CR14","first-page":"2433","volume-title":"Proceedings of Parallel and Distributed Processing Symposium Workshops & PhD Forum (IPDPSW).","author":"H. C. Wu","year":"2012","unstructured":"Wu H C, Diamos G, Wang J, Cadambi S, Yalamanchili S, Chakradhar S. Optimizing data warehousing applications for GPUs using kernel fusion \/fission. In: Proceedings of Parallel and Distributed Processing Symposium Workshops & PhD Forum (IPDPSW). 2012, 2433\u20132442"},{"key":"5468_CR15","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1145\/1572769.1572792","volume-title":"Proceedings of the Conference on High Performance Graphics.","author":"T. Aila","year":"2009","unstructured":"Aila T, Laine S. Understanding the efficiency of ray traversal on GPUs. In: Proceedings of the Conference on High Performance Graphics. 2009, 145\u2013149"},{"key":"5468_CR16","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/IPDPS.2010.5470413","volume-title":"Proceedings of 2010 IEEE International Symposium on Parallel and Distributed Processing (IPDPS).","author":"L. Chen","year":"2010","unstructured":"Chen L, Villa O, Krishnamoorthy S, Gao G R. Dynamic load balancing on single- and multi-GPU systems. In: Proceedings of 2010 IEEE International Symposium on Parallel and Distributed Processing (IPDPS). 2010, 1\u201312"},{"key":"5468_CR17","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/InPar.2012.6339596","volume-title":"Proceedings of Innovative Parallel Computing (InPar), 2012.","author":"K. Gupta","year":"2012","unstructured":"Gupta K, Stuart J A, Owens J D. A study of persistent threads style GPU programming for GPGPU workloads. In: Proceedings of Innovative Parallel Computing (InPar), 2012. 2012, 1\u201314"},{"key":"5468_CR18","first-page":"1","volume-title":"Proceedings of the 2010 IEEE International Symposium on Parallel & Distributed Proceedings.","author":"S. C. Xiao","year":"2010","unstructured":"Xiao S C, Feng W C. Inter-block GPU communication via fast barrier synchronization. In: Proceedings of the 2010 IEEE International Symposium on Parallel & Distributed Proceedings. 2010, 1\u201312"},{"key":"5468_CR19","first-page":"2","volume-title":"Proceedings of the 2007 Workshop on Experimental Computer Science.","author":"C. P. Li","year":"2007","unstructured":"Li C P, Ding C, Shen K. Quantifying the cost of context switch. In: Proceedings of the 2007 Workshop on Experimental Computer Science. 2007, 2"},{"key":"5468_CR20","doi-asserted-by":"crossref","first-page":"374","DOI":"10.1145\/2155620.2155664","volume-title":"Proceedings of the 44th Annual IEEE\/ACMInternational Symposium onMicroarchitecture.","author":"S. P. Muralidhara","year":"2011","unstructured":"Muralidhara S P, Subramanian L, Mutlu O, Kandemir M, Moscibroda T. Reducing memory interference in multicore systems via applicationaware memory channel partitioning. In: Proceedings of the 44th Annual IEEE\/ACMInternational Symposium onMicroarchitecture. 2011, 374\u2013385"},{"key":"5468_CR21","doi-asserted-by":"crossref","first-page":"169","DOI":"10.1109\/ISCA.2014.6853214","volume-title":"Proceedings of the 2014 ACM\/IEEE 41st International Symposium on Computer Architecture.","author":"L. Liu","year":"2014","unstructured":"Liu L, Li Y, Cui Z H, Bao Y G, Chen M Y, Wu C Y. Going vertical in memory management: handling multiplicity by multi-policy. In: Proceedings of the 2014 ACM\/IEEE 41st International Symposium on Computer Architecture. 2014, 169\u2013180"},{"key":"5468_CR22","first-page":"367","volume-title":"Proceedings of the 14th IEEE International Symposium on High Performance Computer Architecture.","author":"J. Lin","year":"2008","unstructured":"Lin J, Lu Q D, Ding X N, Zhang Z, Zhang X D, Sadayappan P. Gaining insights into multicore cache partitioning: bridging the gap between simulation and real systems. In: Proceedings of the 14th IEEE International Symposium on High Performance Computer Architecture. 2008, 367\u2013378"},{"key":"5468_CR23","doi-asserted-by":"crossref","first-page":"367","DOI":"10.1145\/2370816.2370869","volume-title":"Proceedings of the 21st International Conference on Parallel Architectures and Compilation Techniques.","author":"L. Liu","year":"2012","unstructured":"Liu L, Cui Z H, Xing M J, Bao Y G, Chen M Y, Wu C Y. A software memory partition approach for eliminating bank-level interference in multicore systems. In: Proceedings of the 21st International Conference on Parallel Architectures and Compilation Techniques. 2012, 367\u2013376"},{"key":"5468_CR24","doi-asserted-by":"crossref","first-page":"242","DOI":"10.1145\/1274971.1275005","volume-title":"Proceedings of the 25th Annual International Conference on Supercomputing.","author":"J. C. Chang","year":"2007","unstructured":"Chang J C, Sohi G S. Cooperative cache partitioning for chip multiprocessors. In: Proceedings of the 25th Annual International Conference on Supercomputing. 2007, 242\u2013252"},{"key":"5468_CR25","doi-asserted-by":"crossref","first-page":"2","DOI":"10.1145\/1152154.1152160","volume-title":"Proceedings of the 15th International Conference on Parallel Architecture and Compilation Techniques.","author":"N. Rafique","year":"2006","unstructured":"Rafique N, Lim W T, Thottethodi M. Architectural support for operating system-driven CMP cache management. In: Proceedings of the 15th International Conference on Parallel Architecture and Compilation Techniques. 2006, 2\u201312"},{"key":"5468_CR26","doi-asserted-by":"crossref","first-page":"117","DOI":"10.1109\/HPCA.2002.995703","volume-title":"Proceedings of the 8th International Symposium on High-Performance Computer Architecture.","author":"G. E. Suh","year":"2002","unstructured":"Suh G E, Devadas S, Rudolph L. A new memory monitoring scheme for memory-aware scheduling and partitioning. In: Proceedings of the 8th International Symposium on High-Performance Computer Architecture. 2002, 117\u2013128"},{"key":"5468_CR27","first-page":"423","volume-title":"Proceedings of the 39th International Symposium on Microarchitecture.","author":"M. K. Qureshi","year":"2006","unstructured":"Qureshi M K, Patt Y N. Utility-based cache partitioning: a lowoverhead, high-performance, runtime mechanism to partition shared caches. In: Proceedings of the 39th International Symposium on Microarchitecture. 2006, 423\u2013432"},{"key":"5468_CR28","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1145\/1693453.1693482","volume-title":"Proceedings of the 15th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","author":"E. Z. Zhang","year":"2010","unstructured":"Zhang E Z, Jiang Y L, Shen X P. Does cache sharing on modern CMP matter to the performance of contemporary multithreaded programs? In: Proceedings of the 15th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming. 2010, 203\u2013212"},{"key":"5468_CR29","first-page":"65","volume-title":"Proceedings of the 43rd Annual IEEE\/ACMInternational Symposium on Microarchitecture (MICRO).","author":"Y. Kim","year":"2010","unstructured":"Kim Y, Papamichael M, Mutlu O, Harchol-Balter M. Thread cluster memory scheduling: exploiting differences in memory access behavior. In: Proceedings of the 43rd Annual IEEE\/ACMInternational Symposium on Microarchitecture (MICRO). 2010, 65\u201376"},{"issue":"3","key":"5468_CR30","doi-asserted-by":"crossref","first-page":"308","DOI":"10.1145\/2508148.2485949","volume":"41","author":"H. Cook","year":"2013","unstructured":"Cook H, Moreto M, Bird S, Dao K, Patterson D A, Asanovic K. A hardware evaluation of cache partitioning to improve utilization and energy-efficiency while preserving responsiveness. ACM SIGARCH Computer Architecture News, 2013, 41(3): 308\u2013319","journal-title":"ACM SIGARCH Computer Architecture News"},{"key":"5468_CR31","doi-asserted-by":"crossref","first-page":"262","DOI":"10.1007\/978-3-642-11515-8_20","volume-title":"Proceedings of the 5th international conference on High Performance Embedded Architectures and Compilers.","author":"Y. J. Xie","year":"2010","unstructured":"Xie Y J, Loh G H. Scalable shared-cache management by containing thrashing workloads. In: Proceedings of the 5th international conference on High Performance Embedded Architectures and Compilers. 2010, 262\u2013276"},{"key":"5468_CR32","first-page":"1","volume-title":"Proceedings of the 40th International Symposium on Computer Architecture (ISCA).","author":"J. Mars","year":"2013","unstructured":"Mars J, Tang L J. Whare-Map: Heterogeneity in \u201chomogeneous\u201d warehouse-scale computers. In: Proceedings of the 40th International Symposium on Computer Architecture (ISCA). 2013, 1\u201312"},{"issue":"4","key":"5468_CR33","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1145\/2644865.2541962","volume":"49","author":"S. M. Zahedi","year":"2014","unstructured":"Zahedi S M, Lee B C. REF: resource elasticity fairness with sharing incentives for multiprocessors. ACM SIGPLAN Notices, 2014, 49(4): 145\u2013160","journal-title":"ACM SIGPLAN Notices"},{"issue":"3","key":"5468_CR34","doi-asserted-by":"crossref","first-page":"416","DOI":"10.1145\/2366231.2337207","volume":"40","author":"R. Ausavarungnirun","year":"2012","unstructured":"Ausavarungnirun R, Chang K KW, Subramanian L, Loh G H, Mutlu O. Staged memory scheduling: achieving high performance and scalability in heterogeneous systems. ACM SIGARCH Computer Architecture News, 2012, 40(3): 416\u2013427","journal-title":"ACM SIGARCH Computer Architecture News"},{"key":"5468_CR35","first-page":"82","volume-title":"Proceedings of International Workshop on Languages and Compilers for Parallel Computing (LCPC).","author":"Q. Zhu","year":"2014","unstructured":"Zhu Q, Wu B, Shen X P, Shen L, Wang Z Y. Understanding co-run degradations on integrated heterogeneous processors. In: Proceedings of International Workshop on Languages and Compilers for Parallel Computing (LCPC). 2014, 82\u201397"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-016-5468-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11704-016-5468-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-016-5468-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,20]],"date-time":"2023-08-20T03:44:14Z","timestamp":1692503054000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11704-016-5468-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,2]]},"references-count":35,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2017,2]]}},"alternative-id":["5468"],"URL":"https:\/\/doi.org\/10.1007\/s11704-016-5468-8","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,2]]}}}