{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,3,30]],"date-time":"2022-03-30T16:53:22Z","timestamp":1648659202478},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2010,12,29]],"date-time":"2010-12-29T00:00:00Z","timestamp":1293580800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Int J Parallel Prog"],"published-print":{"date-parts":[[2011,6]]},"DOI":"10.1007\/s10766-010-0162-1","type":"journal-article","created":{"date-parts":[[2010,12,28]],"date-time":"2010-12-28T14:26:25Z","timestamp":1293546385000},"page":"271-295","source":"Crossref","is-referenced-by-count":0,"title":["An Evaluation of an OS-Based Coherence Scheme for Tiled CMPs"],"prefix":"10.1007","volume":"39","author":[{"given":"Christian","family":"Fensch","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marcelo","family":"Cintra","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2010,12,29]]},"reference":[{"key":"162_CR1","doi-asserted-by":"crossref","unstructured":"Abts, D., Scott, S., Lilja, D.J.: So many states, so little time: verifying memory coherence in the Cray X1. In: Proceedings of the International Parallel and Distributed Processing Symposium. (2003). doi: 10.1109\/IPDPS.2003.1213087","DOI":"10.1109\/IPDPS.2003.1213087"},{"key":"162_CR2","doi-asserted-by":"crossref","unstructured":"Adve, S.V., Gharachorloo, K.: Shared memory consistency models: a tutorial. IEEE Comput. 29(12) (1996). doi: 10.1109\/2.546611","DOI":"10.1109\/2.546611"},{"key":"162_CR3","unstructured":"Allen, E., Chase, D., Hallett, J., Luchangco, V., Maessen, J.W., Ryu, S., Steele, G.L. Jr., Tobin-Hochstadt, S.: The Fortress Language Specification Version 1.0 \u03b2. Sun Microsystems, Inc., http:\/\/research.sun.com\/projects\/plrg\/Publications\/fortress1.0beta.pdf (2007)"},{"key":"162_CR4","doi-asserted-by":"crossref","unstructured":"Beckmann, B.M., Wood, D.A.: Managing wire delay in large chip-multiprocessor caches. In: Proceedings of the 37th Annual IEEE\/ACM International Symposium on Microarchitecture, pp. 319\u2013330. (2004). doi: 10.1109\/MICRO.2004.21","DOI":"10.1109\/MICRO.2004.21"},{"key":"162_CR5","unstructured":"Burger, D., Austin, T.M., Bennett, S.: Evaluating future microprocessors: the SimpleScalar tool set. Technical Report CS-TR-1996-1308, University of Wisconsin-Madison (1996)"},{"issue":"7","key":"162_CR6","doi-asserted-by":"crossref","first-page":"44","DOI":"10.1109\/MC.2004.65","volume":"37","author":"D. Burger","year":"2004","unstructured":"Burger, D., Keckler, S.W., McKinley, K.S., Dahlin, M., John, L.K., Lin, C., Moore, C.R., Burrill, J., McDonald, R.G., Yoder, W., The TRIPS Team: Scaling to the end of silicon with EDGE architectures. IEEE Comput. 37(7), 44\u201355 (2004). doi: 10.1109\/MC.2004.65","journal-title":"IEEE Comput."},{"key":"162_CR7","doi-asserted-by":"crossref","unstructured":"Carter, J.B., Bennett, J.K., Zwaenepoel, W.: Implementation and performance of munin. In: Proceedings of the 13th Symposium on Operating Systems Principles, pp. 152\u2013164 (1991). doi: 10.1145\/121133.121159","DOI":"10.1145\/121133.121159"},{"key":"162_CR8","doi-asserted-by":"crossref","unstructured":"Ca\u015fcaval, C., Casta\u00f1os, J.G., Ceze, L., Denneau, M., Gupta, M., Lieber, D., Moreira, J.E., Strauss, K., Warren, H.S. Jr.: Evaluation of a multithreaded architecture for cellular computing. In: Proceedings of the 8th International Symposium on High-Performance Computer Architecture, pp. 311\u2013322 (2002). doi: 10.1109\/HPCA.2002.995720","DOI":"10.1109\/HPCA.2002.995720"},{"key":"162_CR9","doi-asserted-by":"crossref","unstructured":"Chang, J., Sohi, G.S.: Cooperative caching for chip multiprocessors. In: Proceedings of the 33rd Annual International Symposium on Computer Architecture, pp. 264\u2013276 (2006). doi: 10.1109\/ISCA.2006.17","DOI":"10.1109\/ISCA.2006.17"},{"key":"162_CR10","doi-asserted-by":"crossref","unstructured":"Chaudhuri, M., Heinrich, M.: SMTp: an architecture for next-generation scalable multi-threading. In: Proceedings of the 31st Annual International Symposium on Computer Architecture, pp. 124\u2013137 (2004). doi: 10.1109\/ISCA.2004.1310769","DOI":"10.1109\/ISCA.2004.1310769"},{"key":"162_CR11","doi-asserted-by":"crossref","unstructured":"Chishti, Z., Powell, M.D., Vijaykumar, T.N.: Optimizing replication, communication, and capacity allocation in CMPs. In: Proceedings of the 32nd Annual International Symposium on Computer Architecture, pp. 357\u2013368 (2005). doi: 10.1109\/ISCA.2005.39","DOI":"10.1109\/ISCA.2005.39"},{"key":"162_CR12","unstructured":"Cray: Chapel Language Specification 0.785. Cray Inc., http:\/\/chapel.cray.com\/spec-0.785.pdf (2009)"},{"key":"162_CR13","doi-asserted-by":"crossref","unstructured":"Fensch, C., Cintra, M.: An OS-based alternative to full hardware coherence on tiled CMPs. In: Proceedings of the 14th International Symposium on High-Performance Computer Architecture, pp. 355\u2013366 (2008). doi: 10.1109\/HPCA.2008.4658652","DOI":"10.1109\/HPCA.2008.4658652"},{"issue":"3","key":"162_CR14","doi-asserted-by":"crossref","first-page":"183","DOI":"10.1007\/BF02700035","volume":"25","author":"M. Fillo","year":"1997","unstructured":"Fillo M., Keckler S.W., Dally W.J., Carter N.P., Chang A., Gurevich Y., Lee W.S.: The M-machine multicomputer. Int. J. Parallel Programm. 25(3), 183\u2013212 (1997). doi: 10.1007\/BF02700035","journal-title":"Int. J. Parallel Programm."},{"key":"162_CR15","unstructured":"Hagersten, E.: Personal Communication regarding the verification of the coherence protocol of Sun Microsystems\u2019 Enterprise Servers E3000, E4000, E5000 and E6000 (2007)"},{"issue":"8","key":"162_CR16","doi-asserted-by":"crossref","first-page":"28","DOI":"10.1109\/2.707614","volume":"31","author":"M.D. Hill","year":"1998","unstructured":"Hill M.D.: Multiprocessors should support simple memory-consistency models. Computer 31(8), 28\u201334 (1998). doi: 10.1109\/2.707614","journal-title":"Computer"},{"key":"162_CR17","doi-asserted-by":"crossref","unstructured":"Iftode, L., Singh, J.P., Li, K.: Understanding applications performance on shared virtual memory systems. In: Proceedings of the 23rd Annual International Symposium on Computer Architecture, pp. 122\u2013133 (1996). doi: 10.1145\/232973.232987","DOI":"10.1145\/232973.232987"},{"key":"162_CR18","unstructured":"Intel: Intel Core2 Extreme Processor X6800 and Intel Core2 Duo Desktop Processor E6000 and E4000 Sequence Specification Update. Intel, document No: 313279-016 (2007)"},{"issue":"2","key":"162_CR19","doi-asserted-by":"crossref","first-page":"40","DOI":"10.1109\/MM.2004.1289290","volume":"24","author":"R. Kalla","year":"2004","unstructured":"Kalla R., Sinharoy B., Tendler J.M.: IBM Power5 chip: a dual-core multithreaded processor. IEEE Micro 24(2), 40\u201347 (2004). doi: 10.1109\/MM.2004.1289290","journal-title":"IEEE Micro"},{"key":"162_CR20","unstructured":"Keleher, P., Cox, A.L., Dwarkadas, S., Zwaenepoel, W.: TreadMarks: distributed shared memory on standard workstations and operating systems. In: USENIX Winter 1994 Technical Conference Proceedings, pp. 115\u2013131 (1994)"},{"key":"162_CR21","doi-asserted-by":"crossref","unstructured":"Kim, C., Burger, D., Keckler, S.W.: An adaptive, non-uniform cache structure for wire-delay dominated on-chip caches. In: Proceedings of the 10th International Conference on Architectural Support for Programming Languages and Operating Systems, pp. 211\u2013222 (2002). doi: 10.1145\/605432.605420","DOI":"10.1145\/605432.605420"},{"issue":"2","key":"162_CR22","doi-asserted-by":"crossref","first-page":"21","DOI":"10.1109\/MM.2005.35","volume":"25","author":"P. Kongetira","year":"2005","unstructured":"Kongetira P., Aingaran K., Olukotun K.: Niagara: a 32-way multithreaded sparc processor. IEEE Micro 25(2), 21\u201329 (2005). doi: 10.1109\/MM.2005.35","journal-title":"IEEE Micro"},{"key":"162_CR23","doi-asserted-by":"crossref","unstructured":"Kontothanassis, L.I., Hunt, G., Stets, R., Hardavellas, N., Cierniak, M., Parthasarathy, S., Meira, W. Jr., Dwarkadas, S., Scott, M.L.: VM-based shared memory on low-latency, remote-memory-access networks. In: Proceedings of the 24th Annual International Symposium on Computer Architecture, pp. 157\u2013169 (1997). doi: 10.1145\/384286.264163","DOI":"10.1145\/384286.264163"},{"key":"162_CR24","doi-asserted-by":"crossref","unstructured":"Krashinsky, R., Batten, C., Hampton, M., Gerding, S., Pharris, B., Casper, J., Asanovi\u0107, K.: The vector-thread architecture. In: Proceedings of the 31st Annual International Symposium on Computer Architecture, pp. 52\u201364 (2004). doi: 10.1109\/ISCA.2004.1310763","DOI":"10.1109\/ISCA.2004.1310763"},{"key":"162_CR25","doi-asserted-by":"crossref","unstructured":"Kumar, R., Zyuban, V., Tullsen, D.M.: Interconnections in multi-core architectures: understanding mechanisms, overheads and scaling. In: Proceedings of the 32nd Annual International Symposium on Computer Architecture, pp. 408\u2013419 (2005). doi: 10.1109\/ISCA.2005.34","DOI":"10.1109\/ISCA.2005.34"},{"key":"162_CR26","doi-asserted-by":"crossref","unstructured":"Kuskin, J., Ofelt, D., Heinrich, M., Heinlein, J., Simoni, R., Gharachorloo, K., Chapin, J., Nakahira, D., Baxter, J., Horowitz, M., Gupta, A., Rosenblum, M., Hennessy, J.L.: The stanford FLASH multiprocessor. In: Proceedings of the 21st Annual International Symposium on Computer Architecture, pp. 325\u2013337 (1994). doi: 10.1109\/ISCA.1994.288140","DOI":"10.1109\/ISCA.1994.288140"},{"key":"162_CR27","doi-asserted-by":"crossref","unstructured":"Laudon, J., Lenoski, D.: The SGI Origin: a ccNUMA highly scalable server. In: Proceedings of the 24th Annual International Symposium on Computer Architecture, pp. 241\u2013251 (1997). doi: 10.1145\/384286.264206","DOI":"10.1145\/384286.264206"},{"key":"162_CR28","unstructured":"Li, K.: IVY: a shared virtual memory system for parallel computing. In: Proceedings of the 1988 International Conference on Parallel Processing, vol. 2, pp. 94\u2013101, Pennsylvania State University Press (1988)"},{"key":"162_CR29","doi-asserted-by":"crossref","unstructured":"Li, M., Sasanka, R., Adve, S.V., Chen, Y.K., Debes, E.: The ALPBench benchmark suite for complex multimedia applications. In: Proceedings of IEEE International Symposium on Workload Characterization, pp. 34\u201345 (2005). doi: 10.1109\/IISWC.2005.1525999","DOI":"10.1109\/IISWC.2005.1525999"},{"key":"162_CR30","doi-asserted-by":"crossref","unstructured":"Martin, M.M.K., Hill, M.D., Wood, D.A.: Token coherence: decoupling performance and correctness. In: Proceedings of the 30th Annual International Symposium on Computer Architecture, pp. 182\u2013193 (2003). doi: 10.1109\/ISCA.2003.1206999","DOI":"10.1109\/ISCA.2003.1206999"},{"issue":"2","key":"162_CR31","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1109\/MM.2005.34","volume":"25","author":"C. McNairy","year":"2005","unstructured":"McNairy C., Bhatia R.: Montecito: a dual-core, dual-thread itanium processor. IEEE Micro 25(2), 10\u201320 (2005). doi: 10.1109\/MM.2005.35","journal-title":"IEEE Micro"},{"key":"162_CR32","doi-asserted-by":"crossref","unstructured":"Scott, S.L.: Synchronization and communication in the T3E multiprocessor. In: Proceedings of the 7th International Conference on Architectural Support for Programming Languages and Operating Systems, pp. 26\u201336 (1996). doi: 10.1145\/237090.237144","DOI":"10.1145\/237090.237144"},{"key":"162_CR33","doi-asserted-by":"crossref","unstructured":"Swanson, S., Michelson, K., Schwerin, A., Oskin, M.: WaveScalar. In: Proceedings of the 36th Annual International Symposium on Microarchitecture, pp. 291\u2013203 (2003). doi: 10.1109\/MICRO.2003.1253203","DOI":"10.1109\/MICRO.2003.1253203"},{"key":"162_CR34","doi-asserted-by":"crossref","unstructured":"Taylor, M.B., Lee, W., Miller, J., Wentzlaff, D., Bratt, I., Greenwald, B., Hoffmann, H., Johnson, P., Kim, J., Psota, J., Saraf, A., Shnidman, N., Strumpen, V., Frank, M., Agarwal, A., Amarasinghe, S.: Evaluation of the raw microprocessor: an exposed-wire-delay architecture for ILP and streams. In: Proceedings of the 31st Annual International Symposium on Computer Architecture, pp. 2\u201313 (2004). doi: 10.1109\/ISCA.2004.1310759","DOI":"10.1109\/ISCA.2004.1310759"},{"key":"162_CR35","doi-asserted-by":"crossref","unstructured":"Vachharajani, M., Vachharajani, N., August, D.I.: The liberty structural specification language: a high-level modeling language for component reuse. In: Proceedings of the ACM SIGPLAN Conference on Programming Language Design and Implementation, pp. 195\u2013206 (2004). doi: 10.1145\/996893.996865","DOI":"10.1145\/996893.996865"},{"key":"162_CR36","doi-asserted-by":"crossref","unstructured":"Verghese, B., Devine, S., Gupta, A., Rosenblum, M.: Operating system support for improving data locality on CC-NUMA compute servers. In: Proceedings of the 7th International Conference on Architectural Support for Programming Languages and Operating Systems, pp. 279\u2013289 (1996). doi: 10.1145\/237090.237205","DOI":"10.1145\/237090.237205"},{"key":"162_CR37","doi-asserted-by":"crossref","unstructured":"Woo, S.C., Ohara, M., Torrie, E., Singh, J.P., Gupta, A.: The SPLASH-2 programs: characterization and methodological considerations. In: Proceedings of the 22nd Annual International Symposium on Computer Architecture, pp. 24\u201336 (1995). doi: 10.1145\/223982.223990","DOI":"10.1145\/223982.223990"},{"key":"162_CR38","doi-asserted-by":"crossref","unstructured":"Zeffer, H., Hagersten, E.: A case for low-complexity MP architectures. In: Proceedings of the Conference on Supercomputing (2007). doi: 10.1145\/1362622.1362648","DOI":"10.1145\/1362622.1362648"},{"key":"162_CR39","doi-asserted-by":"crossref","unstructured":"Zeffer, H., Radovi\u0107, Z., Karlsson, M., Hagersten, E.: TMA: a trap-based memory architecture. In: Proceedings of the 20th Annual International Conference on Supercomputing, pp. 259\u2013268 (2006). doi: 10.1145\/1183401.1183438","DOI":"10.1145\/1183401.1183438"},{"key":"162_CR40","doi-asserted-by":"crossref","unstructured":"Zhang, M., Asanovi\u0107, K.: Victim replication: maximizing capacity while hiding wire delay in tiled chip multiprocessors. In: Proceedings of the 32nd Annual International Symposium on Computer Architecture, pp. 336\u2013345 (2005). doi: 10.1109\/ISCA.2005.53","DOI":"10.1109\/ISCA.2005.53"}],"container-title":["International Journal of Parallel Programming"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-010-0162-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10766-010-0162-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-010-0162-1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,7]],"date-time":"2019-06-07T10:28:02Z","timestamp":1559903282000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10766-010-0162-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,12,29]]},"references-count":40,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2011,6]]}},"alternative-id":["162"],"URL":"https:\/\/doi.org\/10.1007\/s10766-010-0162-1","relation":{},"ISSN":["0885-7458","1573-7640"],"issn-type":[{"value":"0885-7458","type":"print"},{"value":"1573-7640","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010,12,29]]}}}