{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,4]],"date-time":"2025-05-04T04:06:47Z","timestamp":1746331607598,"version":"3.40.4"},"publisher-location":"Berlin, Heidelberg","reference-count":45,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783662444900"},{"type":"electronic","value":"9783662444917"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-662-44491-7_1","type":"book-chapter","created":{"date-parts":[[2014,7,21]],"date-time":"2014-07-21T01:13:00Z","timestamp":1405905180000},"page":"1-15","source":"Crossref","is-referenced-by-count":2,"title":["Fusion Coherence: Scalable Cache Coherence for Heterogeneous Kilo-Core System"],"prefix":"10.1007","author":[{"given":"Songwen","family":"Pei","sequence":"first","affiliation":[]},{"given":"Myoung-Seo","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Jean-Luc","family":"Gaudiot","sequence":"additional","affiliation":[]},{"given":"Naixue","family":"Xiong","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Borkar, S.: Thousand core chips: a technology perspective. In: Proceedings of the 44th Annual Design Automation Conference (DAC), San Diego, CA, pp. 746\u2013749 (2007)","DOI":"10.1145\/1278480.1278667"},{"key":"1_CR2","unstructured":"Brookwood, N.: AMD fusion family of APUs: enabling a superior, immersive PC experience, AMD white paper (2010) (available in January 2014)"},{"key":"1_CR3","unstructured":"Intel Corpaoration. Intel Haswell Microarchitecture, http:\/\/www.intel.com (available in January 2014)"},{"key":"1_CR4","unstructured":"Nvidia Corporation. Nvidia Project Denver, http:\/\/www.nvidia.com (available in January 2014)"},{"key":"1_CR5","unstructured":"ARM Corporation. Big.LITTLE Processing, http:\/\/www.arm.com (available in January 2014)"},{"key":"1_CR6","doi-asserted-by":"crossref","unstructured":"Lustig, D., Martonosi, M.: Reducing GPU offload latency via fine-grained CPU-GPU synchronization. In: IEEE Conference on HPCA (2013)","DOI":"10.1109\/HPCA.2013.6522332"},{"key":"1_CR7","unstructured":"AMD. Heterogeneous System Architecture: A Technical Review, developer.amd.com\/wordpress\/media\/2012\/10\/hsa10.pdf (available in January 2014)"},{"key":"1_CR8","doi-asserted-by":"crossref","unstructured":"Greeg, C., Hazelwood, K.: Where is the data? Why you cannot debate CPU vs. GPU performance without the answer. In: IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), Austin, TX, pp. 134\u2013144 (2011)","DOI":"10.1109\/ISPASS.2011.5762730"},{"key":"1_CR9","doi-asserted-by":"crossref","unstructured":"Daga, M., Aji, A.M., Feng, W.: On the Efficacy of a Fused CPU+GPU Processor (or APU) for Parallel Computing. In: 2011 Symposium on Application Accelerators in High-Performance Computing, Knoxville, Tennessee, pp. 141\u2013149 (2011)","DOI":"10.1109\/SAAHPC.2011.29"},{"key":"1_CR10","doi-asserted-by":"crossref","unstructured":"Hwu, W.-M.: Rethinking computer architecture for throughput computing. In: 2013 International Conference on Embedded Computer Systems: Architectures, Modeling and Simulation (SAMOS), Keynote, Greece (2013)","DOI":"10.1109\/SAMOS.2013.6621096"},{"key":"1_CR11","unstructured":"Pei, S., Gaudiot, J.-L., et al.: Decoupled memory system for heterogeneous kilo-core high throughput processor. Tech Report, UC Irvine (2013)"},{"key":"1_CR12","doi-asserted-by":"crossref","unstructured":"Ferdman, M., Lotfi-kamran, P., Balet, K., et al.: Cuckoo directory: a scalable directory for many-core systems. In: Proceedings of IEEE 17th International Symposium on High Performance Computer Architecture (HPCA), San Antonio, TX, pp. 169\u2013180 (2011)","DOI":"10.1109\/HPCA.2011.5749726"},{"key":"1_CR13","doi-asserted-by":"crossref","unstructured":"Power, J., Basu, A., Gu, J., et al.: Heterogeneous system coherence for integrated CPU-GPU systems. In: Proceedings of the 46th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO), Davis, CA, pp. 457\u2013467 (2013)","DOI":"10.1145\/2540708.2540747"},{"issue":"2","key":"1_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2024716.2024718","volume":"39","author":"N. Binkert","year":"2011","unstructured":"Binkert, N., Beckmann, B., Black, G., et al.: The gem5 simulator. ACM SIGARCH Computer Architecture News\u00a039(2), 1\u20137 (2011)","journal-title":"ACM SIGARCH Computer Architecture News"},{"key":"1_CR15","doi-asserted-by":"crossref","unstructured":"Bakhoda, A., Yuan, G.L., Fung, W.W.L., et al.: Analyzing CUDA workloads using a detailed GPU simulator. In: 2009 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), Boston, MA, pp. 163\u2013174 (2009)","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"1_CR16","unstructured":"Hennessy, J., Patterson, D.: Computer Architecture a quantitative approach, 5th edn., p. 333 (2012)"},{"key":"1_CR17","doi-asserted-by":"crossref","unstructured":"Kelm, J., Johnson, M., Lumettea, S., et al.: WayPoint: scaling coherence to 1000-core architectures. In: Proceedings of the 19th International Conference on Parallel Architectures and Compilation Techniques (PACT), Vienna, Austria, pp. 99\u2013110 (2010)","DOI":"10.1145\/1854273.1854291"},{"key":"1_CR18","doi-asserted-by":"crossref","unstructured":"Sanchez, D., Kozyrakis, C.: SCD: A scalable coherence directory with flexible sharer set encoding. In: Proceedings of the 2012 IEEE 18th International Symposium on High-Performance Computer Architecture (HPCA), New Orleans, LA, pp. 1\u201312 (2012)","DOI":"10.1109\/HPCA.2012.6168950"},{"key":"1_CR19","doi-asserted-by":"crossref","unstructured":"Barroso, L., Gharachorloo, K., McNamara, R., et al.: Piranha: a scalable architecture based on single-chip multiprocessing. In: Proceedings of the 27th Annual International Symposium on Computer Architecture (ISCA), Vancouver, Canada, pp. 282\u2013293 (2000)","DOI":"10.1145\/339647.339696"},{"key":"1_CR20","unstructured":"Gupta, A., Weber, W., Mowry, T.: Reducing memory and traffic requirements for scalable directory based cache coherence schemes. In: Proceedings of the International Conference on Parallel Processing, ICPP (1990)"},{"issue":"3","key":"1_CR21","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1109\/71.139202","volume":"3","author":"Q. Yang","year":"1992","unstructured":"Yang, Q., Thangadurai, G., Bhuyan, L.: Design of an adaptive cache coherence protocol for large scale multiprocessors. IEEE Transactions on Parallel and Distributed Systems (TPDS)\u00a03(3), 281\u2013293 (1992)","journal-title":"IEEE Transactions on Parallel and Distributed Systems (TPDS)"},{"key":"1_CR22","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shriraman, A., Kumar, S., et al.: Protozoa: Adaptive granularity cache coherence. In: Proceedings of the 40th Annual International Symposium on Computer Architecture (ISCA), Israel, pp. 547\u2013558 (2013)","DOI":"10.1145\/2485922.2485969"},{"key":"1_CR23","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shriraman, A., Dwarkadsa, S., et al.: SPATL: Honey, I Shrunk the Coherence Directory. In: 2011 International Conference on Parallel Architectures and Compilation Techniques (PACT), Galveston, TX, pp. 33\u201344 (2011)","DOI":"10.1109\/PACT.2011.10"},{"key":"1_CR24","doi-asserted-by":"crossref","unstructured":"Sanchez, D., Kozyrakis, C.: The ZCache: decoupling ways and associativity. In: Proceedings of the 43rd Annual IEE\/ACM Symposium on Microarchitecture (MICRO), Atlanta, GA, pp. 187\u2013198 (2010)","DOI":"10.1109\/MICRO.2010.20"},{"key":"1_CR25","doi-asserted-by":"crossref","unstructured":"Beckmann, N., Sanchez, D.: Jigsaw: scalable software-defined caches. In: Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques (PACT), pp. 213\u2013224 (2013)","DOI":"10.1109\/PACT.2013.6618811"},{"issue":"4","key":"1_CR26","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/MM.2011.40","volume":"31","author":"D.R. Johnson","year":"2011","unstructured":"Johnson, D.R., Kelm, J.H., Crago, N.C., et al.: Rigel: a scalable architecture for 1000+ core accelerators. IEEE Micro\u00a031(4), 30\u201341 (2011)","journal-title":"IEEE Micro"},{"key":"1_CR27","doi-asserted-by":"crossref","unstructured":"Xu, Y., Du, Y., Zhang, Y., et al.: A composite and scalable cache coherence protocol for large scale CMPs. In: Proceedings of the International Conference on Supercomputing, Tucson, Arizona, pp. 285\u2013294 (2011)","DOI":"10.1145\/1995896.1995941"},{"key":"1_CR28","doi-asserted-by":"crossref","unstructured":"Hechtman, B.A., Sorin, D.J.: Evaluating cache coherent shared virtual memory for heterogeneous multicore chips. In: 2013 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), Austin, TX, pp. 118\u2013119 (2013)","DOI":"10.1109\/ISPASS.2013.6557152"},{"key":"1_CR29","doi-asserted-by":"crossref","unstructured":"Lis, M., Shim, K.S., Cho, M.H., et al.: Memory coherence in the age of multicores. In: 2011 IEEE 29th International Conference on Computer Design (ICCD), Amherst, MA, pp. 1\u20138 (2011)","DOI":"10.1109\/ICCD.2011.6081367"},{"key":"1_CR30","doi-asserted-by":"crossref","unstructured":"Singh, I., Shriraman, A., Fung, W.W.L., et al.: Cache Coherence for GPU Architecture. In: 2013 IEEE 19th International Symposium on High Performance Computer Architecture (HPCA), Shenzhen, China, pp. 578\u2013590 (2013)","DOI":"10.1109\/HPCA.2013.6522351"},{"key":"1_CR31","doi-asserted-by":"crossref","unstructured":"Kasture, H., Sanchez, D.: Ubik: Efficient Cache Sharing with Strict QoS for Latency-Critical Workloads. In: Proceedings of the 19th International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS), pp. 1\u201314 (2014)","DOI":"10.1145\/2541940.2541944"},{"key":"1_CR32","unstructured":"Basu, A., Beckmann, B.M., Hill, M.D., et al.: CMP Directory Coherence: One Granularity Does Not Fit All. TR1798, http:\/\/minds.wisconsin.edu\/handle\/1793\/66144 (available in January 2014)"},{"key":"1_CR33","unstructured":"Mekkat, V., Holey, A., Yew, P.C., et al.: Managing shared last-level cache in a heterogeneous multicore processor. In: Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques (PACT), pp. 225\u2013243 (2013)"},{"key":"1_CR34","doi-asserted-by":"crossref","unstructured":"Abeyratne, N., Das, Q., Li, Q., et al.: Scaling towards kilo-core processors with asymmetric high-radix topologies. In: Proceedings of the 19th IEEE International Symposium on High Performance Computer Architecture (HPCA), Shenzhen, China, pp. 496\u2013507 (2013)","DOI":"10.1109\/HPCA.2013.6522344"},{"key":"1_CR35","doi-asserted-by":"crossref","unstructured":"Cesier, L.M., Feautrier, P.: A new solution to coherence problems in mulicache systems. IEEE Transactions on Computers\u00a027 (1978)","DOI":"10.1109\/TC.1978.1675013"},{"key":"1_CR36","doi-asserted-by":"crossref","unstructured":"Guo, S.L., Wang, H.X., Xue, Y.B., et al.: Hierarchical cache directory for CMP. Journal of Computer Science and Technology\u00a025(2) (2010)","DOI":"10.1007\/s11390-010-9321-5"},{"key":"1_CR37","doi-asserted-by":"crossref","unstructured":"Pagh, R., Rodler, F.F.: Cuckoo Hashing. Algotithms\u00a051 (2004)","DOI":"10.1016\/j.jalgor.2003.12.002"},{"key":"1_CR38","doi-asserted-by":"crossref","unstructured":"Moshovos, A.: RegionScout: Exploiting Coarse Grain Sharing in Snoop-Based Coherence. In: Proceedings of the 32nd Annual International Symposium on\u00a0Computer\u00a0Architecture (ISCA), pp. 234\u2013245 (2005)","DOI":"10.1145\/1080695.1069990"},{"key":"1_CR39","doi-asserted-by":"crossref","unstructured":"Zebchuk, J., Safi, E., Moshovos, A.: A Framework for Coarse-Grain Optimizations in the On-Chip Memory Hierarchy. In: Proceedings of the 40th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO), pp. 314\u2013327 (2007)","DOI":"10.1109\/MICRO.2007.4408265"},{"key":"1_CR40","doi-asserted-by":"crossref","unstructured":"Alisafaee, M.: Spatiotemporal Coherence Tracking. In: Proceedings of the 45th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO), Vancouver, BC, pp. 341\u2013350 (2012)","DOI":"10.1109\/MICRO.2012.39"},{"key":"1_CR41","unstructured":"Beckmann, B.M., Basu, A., Reinhardt, S.K.: Region Privatization in directory-based cache coherence. U.S.Patent Application Publication, US2013\/0073811a1 (2013)"},{"key":"1_CR42","doi-asserted-by":"crossref","unstructured":"Kelm, J.H., Johnson, D.R., Tuohy, W., et al.: Cohesion: a Hybrid Memory Model for Accelerators. In: Proceedings of the 37th Annual International Symposium on\u00a0Computer\u00a0Architecture, Saint-Malo, France, pp. 429\u2013440 (2010)","DOI":"10.1145\/1815961.1816019"},{"issue":"1","key":"1_CR43","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1109\/MM.2011.8","volume":"31","author":"J.H. Kelm","year":"2011","unstructured":"Kelm, J.H., Johnson, D.R., Tuohy, W., et al.: Cohesion: An Adaptive Hybrid Memory Model for Accelerators. IEEE Micro\u00a031(1), 42\u201355 (2011)","journal-title":"IEEE Micro"},{"key":"1_CR44","doi-asserted-by":"crossref","unstructured":"Hechtman, B.A., Sorin, D.J.: Exploring Memory Consistency for Massively-Threaded Throughput-Oriented Processors. In: Proceedings of the 40th Annual International Symposium on\u00a0Computer\u00a0Architecture (ISCA), Tel-aviv, Israel, pp. 201\u2013212 (2013)","DOI":"10.1145\/2485922.2485940"},{"key":"1_CR45","doi-asserted-by":"crossref","unstructured":"Sanchez, D., Kozyrakis, C.: ZSim: Fast and Accurate Microarchitectural Simulation of Thousand-Core Systems. In: Proceedings of the 40th Annual International Symposium on\u00a0Computer\u00a0Architecture (ISCA), Tel-aviv, Israel, pp. 475\u2013486 (2013)","DOI":"10.1145\/2485922.2485963"}],"container-title":["Communications in Computer and Information Science","Advanced Computer Architecture"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-662-44491-7_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,3]],"date-time":"2025-05-03T21:50:54Z","timestamp":1746309054000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-662-44491-7_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783662444900","9783662444917"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-3-662-44491-7_1","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2014]]}}}