{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T02:10:01Z","timestamp":1751249401050,"version":"3.41.0"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2018,1,29]],"date-time":"2018-01-29T00:00:00Z","timestamp":1517184000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Spanish Government","award":["TIN2013-43228-P"],"award-info":[{"award-number":["TIN2013-43228-P"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Parallel Prog"],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1007\/s10766-018-0557-y","type":"journal-article","created":{"date-parts":[[2018,1,29]],"date-time":"2018-01-29T04:23:35Z","timestamp":1517199815000},"page":"1110-1138","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Mosaic: A Scalable Coherence Protocol"],"prefix":"10.1007","volume":"46","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0850-6738","authenticated-orcid":false,"given":"Lucia G.","family":"Menezo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6904-3282","authenticated-orcid":false,"given":"Valentin","family":"Puente","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1262-1256","authenticated-orcid":false,"given":"Pablo","family":"Abad","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2214-303X","authenticated-orcid":false,"given":"Jose-Angel","family":"Gregorio","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,1,29]]},"reference":[{"issue":"3","key":"557_CR1","doi-asserted-by":"crossref","first-page":"371","DOI":"10.1145\/1555815.1555801","volume":"37","author":"BM Rogers","year":"2009","unstructured":"Rogers, B.M., Krishna, A., Bell, G.B., Vu, K., Jiang, X., Solihin, Y.: Scaling the bandwidth wall: challenges in and avenues for CMP scaling. Int. Symp. Comput. Archit (ISCA) 37(3), 371 (2009)","journal-title":"Int. Symp. Comput. Archit (ISCA)"},{"key":"557_CR2","unstructured":"ITRS.: Roadmap 2012. http:\/\/www.itrs.net\/links\/2012itrs\/home2012.htm (2012)"},{"issue":"2","key":"557_CR3","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1109\/L-CA.2011.20","volume":"10","author":"P Prieto","year":"2011","unstructured":"Prieto, P., Puente, V., Gregorio, J.A.: Multilevel cache modeling for chip-multiprocessor systems. IEEE Comput. Archit. Lett. 10(2), 49\u201352 (2011)","journal-title":"IEEE Comput. Archit. Lett."},{"key":"557_CR4","doi-asserted-by":"crossref","unstructured":"Butler, M.: \u201cAMD \u2018Bulldozer\u2019 Core\u2014a new approach to multithreaded compute performance for maximum efficiency and throughput,\u201d In: IEEE HotChips Symposium on High-Performance Chips (HotChips 2010) (2010)","DOI":"10.1109\/HOTCHIPS.2010.7480086"},{"issue":"2","key":"557_CR5","doi-asserted-by":"publisher","first-page":"6","DOI":"10.1109\/MM.2014.10","volume":"34","author":"P Hammarlund","year":"2014","unstructured":"Hammarlund, P., Martinez, A.J., Bajwa, A.A., Hill, D.L., Hallnor, E., Jiang, H., Dixon, M., Derr, M., Hunsaker, M., Kumar, R., Osborne, R.B., Rajwar, R., Singhal, R., D\u2019Sa, R., Chappell, R., Kaushik, S., Chennupaty, S., Jourdan, S., Gunther, S., Piazza, T., Burton, T.: Haswell: the fourth-generation intel core processor. IEEE Micro 34(2), 6\u201320 (2014)","journal-title":"IEEE Micro"},{"issue":"2","key":"557_CR6","first-page":"48","volume":"33","author":"J Feehrer","year":"2013","unstructured":"Feehrer, J., Jairath, S., Loewenstein, P., Sivaramakrishnan, R., Smentek, D., Turullols, S., Vahidsafa, A.: The oracle sparc T5 16-core processor scales to eight sockets. IEEE Comput. Soc. 33(2), 48\u201357 (2013)","journal-title":"IEEE Comput. Soc."},{"issue":"2","key":"557_CR7","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1109\/MM.2010.38","volume":"30","author":"R Kalla","year":"2010","unstructured":"Kalla, R., Sinharoy, B., Starke, W.J., Floyd, M.: Power7: IBM\u2019s next-generation server processor. IEEE Micro 30(2), 7\u201315 (2010)","journal-title":"IEEE Micro"},{"key":"557_CR8","doi-asserted-by":"crossref","unstructured":"Molka, D., Hackenberg, D., Schone, R., Muller, M.S.: Memory performance and cache coherency effects on an intel nehalem multiprocessor system. In: 2009 18th International Conference on Parallel Architectures and Compilation Techniques, pp. 261\u2013270 (2009)","DOI":"10.1109\/PACT.2009.22"},{"issue":"1","key":"557_CR9","doi-asserted-by":"publisher","first-page":"1:1","DOI":"10.1147\/JRD.2011.2173962","volume":"56","author":"F Busaba","year":"2012","unstructured":"Busaba, F., Blake, M.A., Curran, B., Fee, M., Jacobi, C., Mak, P.-K., Prasky, B.R., Walters, C.R.: IBM zEnterprise 196 microprocessor and cache subsystem. IBM J. Res. Dev. 56(1), 1:1\u20131:12 (2012)","journal-title":"IBM J. Res. Dev."},{"issue":"1","key":"557_CR10","doi-asserted-by":"publisher","first-page":"3:1","DOI":"10.1147\/JRD.2014.2376131","volume":"59","author":"WJ Starke","year":"2015","unstructured":"Starke, W.J., Stuecheli, J., Daly, D.M., Dodson, J.S., Auernhammer, F., Sagmeister, P.M., Guthrie, G.L., Marino, C.F., Siegel, M., Blaner, B.: The cache and memory subsystems of the IBM POWER8 processor. IBM J. Res. Dev. 59(1), 3:1\u20133:13 (2015)","journal-title":"IBM J. Res. Dev."},{"issue":"4","key":"557_CR11","doi-asserted-by":"publisher","first-page":"491","DOI":"10.1147\/rd.504.0491","volume":"50","author":"AW Topol","year":"2006","unstructured":"Topol, A.W., La Tulipe, D.C., Shi, L., Frank, D.J., Bernstein, K., Steen, S.E., Kumar, A., Singco, G.U., Young, a M., Guarini, K.W., Ieong, M.: Three-dimensional integrated circuits. IBM J. Res. Dev. 50(4), 491\u2013506 (2006)","journal-title":"IBM J. Res. Dev."},{"key":"557_CR12","doi-asserted-by":"crossref","unstructured":"Choi, B., Komuravelli, R., Sung, H., Smolinski, R., Honarmand, N., Adve, S.V., Carter, N.P., Chou, C.-T.: DeNovo: rethinking the memory hierarchy for disciplined parallelism. In: 20th International Conference on Parallel Architectures and Compilation Techniques (PACT), pp. 155\u2013166 (2011)","DOI":"10.1109\/PACT.2011.21"},{"issue":"1","key":"557_CR13","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1109\/JSSC.2010.2079450","volume":"46","author":"J Howard","year":"2011","unstructured":"Howard, J., Dighe, S., Vangal, S.R., Ruhl, G., Borkar, N., Jain, S., Erraguntla, V., Konow, M., Riepen, M., Gries, M., Droege, G., Lund-Larsen, T., Steibl, S., Borkar, S., De, V.K., Van Der Wijngaart, R.: A 48-core IA-32 processor in 45 nm CMOS using on-die message-passing and DVFS for performance and power scaling. IEEE J. Solid-State Circuits 46(1), 173\u2013183 (2011)","journal-title":"IEEE J. Solid-State Circuits"},{"issue":"7","key":"557_CR14","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1145\/2209249.2209269","volume":"55","author":"MMK Martin","year":"2012","unstructured":"Martin, M.M.K., Hill, M.D., Sorin, D.J.: Why on-chip cache coherence is here to stay. Commun. ACM 55(7), 78 (2012)","journal-title":"Commun. ACM"},{"key":"557_CR15","doi-asserted-by":"crossref","unstructured":"Kurd, N., Douglas, J., Mosalikanti, P., Kumar, R.: Next generation Intel\u00aemicro-architecture (Nehalem) clocking architecture. In: IEEE Symp. VLSI Circ., pp. 62\u201363 (2008)","DOI":"10.1109\/VLSIC.2008.4585952"},{"issue":"2","key":"557_CR16","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1109\/MM.2010.31","volume":"30","author":"P Conway","year":"2010","unstructured":"Conway, P., Kalyanasundharam, N., Donley, G., Lepak, K., Hughes, B.: Cache hierarchy and memory subsystem of the AMD opteron processor. IEEE Micro 30(2), 16\u201329 (2010)","journal-title":"IEEE Micro"},{"key":"557_CR17","doi-asserted-by":"crossref","unstructured":"Raghavan, A., Blundell, C., Martin, M.M.K.: Token tenure: PATCHing token counting using directory-based cache coherence. In: 41st IEEE\/ACM International Symposium on Microarchitecture (MICRO), pp. 47\u201358 (2008)","DOI":"10.1109\/MICRO.2008.4771778"},{"key":"557_CR18","doi-asserted-by":"crossref","unstructured":"Menezo, L.G., Puente, V., Gregorio, J.A.: The case for a scalable coherence protocol for complex on-chip cache hierarchies in many-core systems. In: 22nd International Conference on Parallel Architectures and Compilation Techniques (PACT), pp. 279\u2013288 (2013)","DOI":"10.1109\/PACT.2013.6618817"},{"key":"557_CR19","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1007\/978-1-4615-3604-8_9","volume-title":"Scalable Shared Memory Multiprocessors","author":"Anoop Gupta","year":"1992","unstructured":"Gupta, A., Weber, W., Mowry, T.: Reducing memory and traffic requirements for scalable directory-based cache coherence schemes. In: International Conference on Parallel Processing, pp. 167\u2013192 (1990)"},{"key":"557_CR20","doi-asserted-by":"crossref","unstructured":"Martin, M.M.K., Hill, M.D.D., Wood, D.A.: Token coherence: decoupling performance and correctness. In: 30th International Symposium on Computer Architecture (ISCA), pp. 182\u2013193 (2003)","DOI":"10.1145\/859618.859640"},{"issue":"2","key":"557_CR21","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1145\/633625.52409","volume":"16","author":"J-L Baer","year":"1988","unstructured":"Baer, J.-L., Wang, W.-H.: On the inclusion properties for multi-level cache hierarchies. ACM SIGARCH Comput. Archit. News 16(2), 73\u201380 (1988)","journal-title":"ACM SIGARCH Comput. Archit. News"},{"key":"557_CR22","doi-asserted-by":"crossref","unstructured":"Jaleel, A., Borch, E., Bhandaru, M., Steely Jr., S.C., Emer, J.: Achieving non-inclusive cache performance with inclusive caches: temporal locality aware (TLA) cache management policies. In: 43rd IEEE\/ACM International Symposium on Microarchitecture (MICRO), pp. 151\u2013162 (2010)","DOI":"10.1109\/MICRO.2010.52"},{"issue":"8","key":"557_CR23","doi-asserted-by":"publisher","first-page":"1028","DOI":"10.1109\/TPDS.2007.1091","volume":"18","author":"J Huh","year":"2007","unstructured":"Huh, J., Kim, C., Shafi, H., Zhang, L., Burger, D., Keckler, S.W.: A NUCA substrate for flexible CMP cache sharing. IEEE Trans. Parallel Distrib. Syst. 18(8), 1028\u20131040 (2007)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"issue":"2","key":"557_CR24","doi-asserted-by":"publisher","first-page":"148","DOI":"10.1109\/TVLSI.2005.863753","volume":"14","author":"K Lee","year":"2006","unstructured":"Lee, K., Lee, S.J., Yoo, H.J.: Low-power network-on-chip for high-performance SoC design. IEEE Trans. Very Large Scale Integr. Syst. 14(2), 148\u2013160 (2006)","journal-title":"IEEE Trans. Very Large Scale Integr. Syst."},{"key":"557_CR25","doi-asserted-by":"crossref","unstructured":"Agarwal, N., Peh, L., Jha, N.K.: In-network snoop ordering (INSO): snoopy coherence on unordered interconnects. In: 15th International Symposium on High Performance Computer Architecture (HPCA), pp. 67\u201378 (2009)","DOI":"10.1109\/HPCA.2009.4798238"},{"key":"557_CR26","doi-asserted-by":"crossref","unstructured":"Jerger, N.E., Peh, L.S., Lipasti, M.: Virtual circuit tree multicasting: a case for on-chip hardware multicast support. In: International Symposium on Computer Architecture (ISCA), pp. 229\u2013240 (2008)","DOI":"10.1145\/1394608.1382141"},{"issue":"11","key":"557_CR27","doi-asserted-by":"publisher","first-page":"2010","DOI":"10.1109\/TPDS.2012.45","volume":"23","author":"P Abad","year":"2012","unstructured":"Abad, P., Puente, V., Menezo, L.G., Gregorio, J.A.: Adaptive-Tree Multicast: Efficient Multidestination Support for CMP Communication Substrate. IEEE Trans. Parallel Distrib. Syst. 23(11), 2010\u20132023 (2012)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"557_CR28","doi-asserted-by":"crossref","unstructured":"Zebchuk, J., Srinivasan, V., Qureshi, M.K.M.K., Moshovos, A.: A tagless coherence directory. In: International Symposium on Microarchitecture (MICRO), pp. 423\u2013434 (2009)","DOI":"10.1145\/1669112.1669166"},{"key":"557_CR29","unstructured":"OpenSPARC T2 Core Microarchitecture Specification. Santa Clara, CA (2007)"},{"issue":"6","key":"557_CR30","doi-asserted-by":"publisher","first-page":"556","DOI":"10.1109\/TPDS.2002.1011412","volume":"13","author":"DJ Sorin","year":"2002","unstructured":"Sorin, D.J., Plakal, M., Condon, A.E., Hill, M.D., Martin, M.M.K., Wood, D.A.: Specifying and verifying a broadcast and a multicast snooping cache coherence protocol. IEEE Trans. Parallel Distrib. Syst. 13(6), 556\u2013578 (2002)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"557_CR31","doi-asserted-by":"crossref","unstructured":"Sanchez, D., Kozyrakis, C.: SCD: a scalable coherence directory with flexible sharer set encoding. In: 18th IEEE International Symposium on High Performance Computer Architecture, pp. 1\u201312 (2012)","DOI":"10.1109\/HPCA.2012.6168950"},{"key":"557_CR32","doi-asserted-by":"crossref","unstructured":"Sanchez, D., Kozyrakis, C.: The ZCache: decoupling ways and associativity. In: International Symposium on Microarchitecture (MICRO), pp. 187\u2013198 (2010)","DOI":"10.1109\/MICRO.2010.20"},{"key":"557_CR33","unstructured":"Menezo, L.G.: Mosaic coherence protocol specification. State Table (sparse design) (2016). https:\/\/www.ce.unican.es\/docs\/coherence_protocols\/mosaic_sparse\/index.html"},{"key":"557_CR34","unstructured":"Menezo, L.G.: Mosaic coherence protocol specification. State Table (in-cache design) (2016). https:\/\/www.ce.unican.es\/docs\/coherence_protocols\/mosaic_incache\/index.html"},{"issue":"1","key":"557_CR35","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/JSSC.2012.2223036","volume":"48","author":"JL Shin","year":"2013","unstructured":"Shin, J.L., Park, H., Li, H., Smith, A., Choi, Y., Sathianathan, H., Dash, S., Turullols, S., Kim, S., Masleid, R., Konstadinidis, G., Golla, R., Doherty, M.J., Grohoski, G., McAllister, C.: The next-generation 64b SPARC core in a T4 SoC processor. IEEE J. Solid-State Circuits 48(1), 82\u201390 (2013)","journal-title":"IEEE J. Solid-State Circuits"},{"key":"557_CR36","unstructured":"Pinkston, T.M., Duato, J.: Appendix F: interconnection networks. In: Computer Architecture: A Quantitative Approach, 5th ed. Morgan Kaufmann, Burlington (2012)"},{"key":"557_CR37","doi-asserted-by":"crossref","unstructured":"Menezo, L.G., Puente, V., Abad, P., Gregorio, J.A.: Improving coherence protocol reactiveness by trading bandwidth for latency. In: ACM International Conference on Computing Frontiers (CF\u201912), pp. 143\u2013152 (2012)","DOI":"10.1145\/2212908.2212929"},{"issue":"4","key":"557_CR38","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1145\/1105734.1105747","volume":"33","author":"MMK Martin","year":"2005","unstructured":"Martin, M.M.K., Sorin, D.J., Beckmann, B.M., Marty, M.R., Xu, M., Alameldeen, A.R., Moore, K.E., Hill, M.D., Wood, D.A.: Multifacet\u2019s general execution-driven multiprocessor simulator (GEMS) toolset. ACM SIGARCH Comput. Archit. News 33(4), 92\u201399 (2005)","journal-title":"ACM SIGARCH Comput. Archit. News"},{"key":"557_CR39","doi-asserted-by":"crossref","unstructured":"Muralimanohar, N., Balasubramonian, R., Jouppi, N.: Optimizing NUCA organizations and wiring alternatives for large caches with CACTI 6.0. In: 40th IEEE\/ACM International Symposium on Microarchitecture (MICRO), pp. 3\u201314 (2007)","DOI":"10.1109\/MICRO.2007.33"},{"key":"557_CR40","doi-asserted-by":"crossref","unstructured":"Sun, C., Chen, C.-H.O., Kurian, G., Wei, L., Miller, J., Agarwal, A., Peh, L.-S., Stojanovic, V.: DSENT\u2014a tool connecting emerging photonics with electronics for opto-electronic networks-on-chip modeling. In: International Symposium on Networks-on-Chip (NOCS), pp. 201\u2013210 (2012)","DOI":"10.1109\/NOCS.2012.31"},{"key":"557_CR41","unstructured":"Jin, H., Frumkin, M., Yan, J.: The OpenMP implementation of NAS parallel benchmarks and its performance. Natl. Aeronaut. Sp. Adm. (NASA), Tech. Rep. NAS-99-011, Moffett Field, USA, no. October (1999)"},{"issue":"2","key":"557_CR42","first-page":"50","volume":"36","author":"AR Alameldeen","year":"2003","unstructured":"Alameldeen, A.R., Martin, M.M.K., Mauer, C.J., Moore, K.E., Hill, M.D., Wood, D.A., Sorin, D.J.: Simulating a $2M commercial server on a $2K PC. Computer (Long. Beach. Calif) 36(2), 50\u201357 (2003)","journal-title":"Computer (Long. Beach. Calif)"},{"key":"557_CR43","doi-asserted-by":"crossref","unstructured":"Loh, G.H., Hill, M.D.: Efficiently enabling conventional block sizes for very large die-stacked DRAM caches. In: International Symposium on Microarchitecture (MICRO), p. 454 (2011)","DOI":"10.1145\/2155620.2155673"},{"key":"557_CR44","doi-asserted-by":"crossref","unstructured":"Demetriades, S., Cho, S.: Stash directory: a scalable directory for many-core coherence. In: International Symposium for High-Performance, Computer Architecture (HPCA) (2014)","DOI":"10.1109\/HPCA.2014.6835928"},{"key":"557_CR45","doi-asserted-by":"crossref","unstructured":"Cuesta, B.A., Ros, A., G\u00f3mez, M.E., Robles, A., Duato, J.F.: Increasing the effectiveness of directory caches by deactivating coherence for private memory blocks. In International Symposium on Computer Architecture (ISCA), p. 93 (2011)","DOI":"10.1145\/2000064.2000076"}],"container-title":["International Journal of Parallel Programming"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10766-018-0557-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-018-0557-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-018-0557-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T01:33:54Z","timestamp":1751247234000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10766-018-0557-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,1,29]]},"references-count":45,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2018,12]]}},"alternative-id":["557"],"URL":"https:\/\/doi.org\/10.1007\/s10766-018-0557-y","relation":{},"ISSN":["0885-7458","1573-7640"],"issn-type":[{"type":"print","value":"0885-7458"},{"type":"electronic","value":"1573-7640"}],"subject":[],"published":{"date-parts":[[2018,1,29]]},"assertion":[{"value":"29 August 2016","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 January 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 January 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}