{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,2]],"date-time":"2025-03-02T23:40:19Z","timestamp":1740958819704,"version":"3.38.0"},"publisher-location":"Berlin, Heidelberg","reference-count":27,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642195945"},{"type":"electronic","value":"9783642195952"}],"license":[{"start":{"date-parts":[[2011,1,1]],"date-time":"2011-01-01T00:00:00Z","timestamp":1293840000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011]]},"DOI":"10.1007\/978-3-642-19595-2_5","type":"book-chapter","created":{"date-parts":[[2011,2,24]],"date-time":"2011-02-24T07:17:18Z","timestamp":1298531838000},"page":"62-76","source":"Crossref","is-referenced-by-count":0,"title":["Tackling Cache-Line Stealing Effects Using Run-Time Adaptation"],"prefix":"10.1007","author":[{"given":"St\u00e9phane","family":"Zuckerman","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"William","family":"Jalby","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"5_CR1","unstructured":"ParMA: Parallel programming for multi-core architectures - ITEA2 project (06015), http:\/\/www.parma-itea2.org"},{"issue":"4","key":"5_CR2","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1145\/197405.197406","volume":"26","author":"D.F. Bacon","year":"1994","unstructured":"Bacon, D.F., Graham, S.L., Sharp, O.J.: Compiler Transformations for High-Performance Computing. ACM Comput. Surv.\u00a026(4), 345\u2013420 (1994)","journal-title":"ACM Comput. Surv."},{"key":"5_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"421","DOI":"10.1007\/BFb0025894","volume-title":"Languages and Compilers for Parallel Computing","author":"F. Bodin","year":"1995","unstructured":"Bodin, F., Granston, E.D., Montaut, T.: Evaluating two loop transformations for reducing multiple writer false sharing. In: Pingali, K.K., Gelernter, D., Padua, D.A., Banerjee, U., Nicolau, A. (eds.) LCPC 1994. LNCS, vol.\u00a0892, pp. 421\u2013439. Springer, Heidelberg (1995)"},{"key":"5_CR4","unstructured":"Bolosky, W.J., Scott, M.L.: False Sharing and its effect on shared memory performance. In: Proceedings of the USENIX Symposium on Experiences with Distributed and Multiprocessor Systems (SEDMS IV), pp. 57\u201371 (1993)"},{"key":"5_CR5","doi-asserted-by":"crossref","unstructured":"Garzaran, M., Brit, J., Ibanez, P., Vinals, V.: Hardware Prefetching in Bus-Based Multiprocessors: Pattern Characterization and Cost-Effective Hardware. In: Ninth Euromicro Workshop on Parallel and Distributed Processing, pp. 345\u2013354 (2001)","DOI":"10.1109\/EMPDP.2001.905061"},{"key":"5_CR6","unstructured":"Holmes, G., Donkin, A., Witten, I.: WEKA: a machine learning workbench. In: Proceedings of the 1994 Second Australian and New Zealand Conference on Intelligent Information Systems, pp. 357\u2013361 (1994)"},{"key":"5_CR7","unstructured":"Gornish, E.H., Veidenbaum, A.: An integrated hardware\/software data prefetching scheme for shared-memory multiprocessors. Intl. Journal of Parallel Programming, 35\u201370 (1999)"},{"key":"5_CR8","unstructured":"Hedge, R.: Optimizing application performance on intel core microarchitecture using hardware-implemented prefetchers (2008), http:\/\/software.intel.com\/en-us\/articles\/optimizing-application-performance-on-intel-coret-microarchitecture-using-hardware-implemented-prefetchers\/"},{"issue":"2","key":"5_CR9","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1006\/jpdc.1996.0054","volume":"34","author":"R.L. Hyde","year":"1996","unstructured":"Hyde, R.L., Fleisch, B.D.: An analysis of degenerate sharing and false coherence. J. Parallel Distrib. Comput.\u00a034(2), 183\u2013195 (1996)","journal-title":"J. Parallel Distrib. Comput."},{"key":"5_CR10","doi-asserted-by":"crossref","unstructured":"Jeremiassen, T.E., Eggers, S.J.: Reducing False Sharing on Shared Memory Multiprocessors through Compile Time Data Transformations. In: PPOPP, pp. 179\u2013188 (1995)","DOI":"10.1145\/209937.209955"},{"key":"5_CR11","unstructured":"Jerger, N., Hill, E., Lipasti, M.: Friendly fire: understanding the effects of multiprocessor prefetches. In: IEEE International Symmposium on Performance Analysis of Systems and Software, pp. 177\u2013188 (2006)"},{"key":"5_CR12","first-page":"359","volume-title":"ICS 2008","author":"L. Liu","year":"2008","unstructured":"Liu, L., Li, Z., Sameh, A.H.: Analyzing memory access intensity in parallel programs on multicore. In: ICS 2008, pp. 359\u2013367. ACM, New York (2008)"},{"issue":"4","key":"5_CR13","doi-asserted-by":"publisher","first-page":"390","DOI":"10.1145\/1187976.1187978","volume":"3","author":"J. Marathe","year":"2006","unstructured":"Marathe, J., Mueller, F., de Supinski, B.R.: Analysis of cache-coherence bottlenecks with hybrid hardware\/software techniques. ACM Trans. Archit. Code Optim.\u00a03(4), 390\u2013423 (2006)","journal-title":"ACM Trans. Archit. Code Optim."},{"issue":"1","key":"5_CR14","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1145\/273011.273021","volume":"16","author":"T.C. Mowry","year":"1998","unstructured":"Mowry, T.C.: Tolerating Latency in Multiprocessors Through Compiler-Inserted Prefetching. ACM Trans. Comput. Syst.\u00a016(1), 55\u201392 (1998)","journal-title":"ACM Trans. Comput. Syst."},{"key":"5_CR15","first-page":"348","volume-title":"ISCA 1984: Proceedings of the 11th Annual International Symposium on Computer Architecture","author":"M.S. Papamarcos","year":"1984","unstructured":"Papamarcos, M.S., Patel, J.H.: A low-overhead coherence solution for multiprocessors with private cache memories. In: ISCA 1984: Proceedings of the 11th Annual International Symposium on Computer Architecture, pp. 348\u2013354. ACM, New York (1984)"},{"key":"5_CR16","first-page":"271","volume-title":"CGO","author":"E. Raman","year":"2007","unstructured":"Raman, E., Hundt, R., Mannarswamy, S.: Structure Layout Optimization for Multithreaded Programs. In: CGO, pp. 271\u2013282. IEEE Computer Society, Los Alamitos (2007)"},{"key":"5_CR17","doi-asserted-by":"crossref","unstructured":"Skeppstedt, J., Dubois, M.: Hybrid compiler\/hardware prefetching for multiprocessors using low-overhead cache miss traps. In: International Conference on Parallel Processing, p. 298 (1997)","DOI":"10.1109\/ICPP.1997.622659"},{"key":"5_CR18","first-page":"209","volume-title":"PPoPP 2009: Proceedings of the 14th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","author":"S.W. Son","year":"2009","unstructured":"Son, S.W., Kandemir, M., Karakoy, M., Chakrabarti, D.: A compiler-directed data prefetching scheme for chip multiprocessors. In: PPoPP 2009: Proceedings of the 14th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 209\u2013218. ACM, New York (2009)"},{"key":"5_CR19","first-page":"51","volume-title":"ICPP 2007: Proceedings of the 2007 International Conference on Parallel Processing","author":"F. Song","year":"2007","unstructured":"Song, F., Moore, S., Dongarra, J.: L2 cache modeling for scientific applications on chip multi-processors. In: ICPP 2007: Proceedings of the 2007 International Conference on Parallel Processing, Washington, DC, USA, p. 51. IEEE Computer Society, Los Alamitos (2007)"},{"key":"5_CR20","doi-asserted-by":"crossref","unstructured":"Struik, P., van der Wolf, P., Pimentel, A.D.: A combined hardware\/software solution for stream prefetching in multimedia applications (1998)","DOI":"10.1117\/12.304664"},{"key":"5_CR21","doi-asserted-by":"publisher","first-page":"651","DOI":"10.1109\/12.286299","volume":"43","author":"J. Torrellas","year":"1994","unstructured":"Torrellas, J., Lam, M.S., Hennessy, J.L.: False sharing and spatial locality in multiprocessor caches. IEEE Transactions on Computers\u00a043, 651\u2013663 (1994)","journal-title":"IEEE Transactions on Computers"},{"key":"5_CR22","unstructured":"Wallin, D., Hagersten, E.: Miss penalty reduction using bundled capacity prefetching in multiprocessors. In: International Parallel and Distributed Processing Symposium, p. 12a (2003)"},{"key":"5_CR23","doi-asserted-by":"crossref","unstructured":"Wang, Z., Burger, D., McKinley, K.S., Reinhardt, S.K., Weems, C.C.: Guided region prefetching: A cooperative hardware\/software approach. In: Proceedings of the 30th International Symposium on Computer Architecture, pp. 388\u2013398 (2003)","DOI":"10.1145\/859662.859663"},{"key":"5_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"318","DOI":"10.1007\/978-3-540-73940-1_33","volume-title":"Parallel Computing Technologies","author":"J. Weidendorfer","year":"2007","unstructured":"Weidendorfer, J., Ott, M., Klug, T., Trinitis, C.: Latencies of Conflicting Writes on Contemporary Multicore Architectures. In: Malyshkin, V.E. (ed.) PaCT 2007. LNCS, vol.\u00a04671, pp. 318\u2013327. Springer, Heidelberg (2007)"},{"key":"5_CR25","unstructured":"Whitepaper, I.: Optimizing Embedded System Performance - Impact of Data Prefetching on a Medical Imaging Application (2006), http:\/\/download.intel.com\/technology\/advanced_comm\/315697.pdf"},{"key":"5_CR26","doi-asserted-by":"crossref","unstructured":"Williams, S., Oliker, L., Vuduc, R.W., Shalf, J., Yelick, K.A., Demmel, J.: Optimization of sparse matrix-vector multiplication on emerging multicore platforms. In: SC 2007, p. 38 (2007)","DOI":"10.1145\/1362622.1362674"},{"key":"5_CR27","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1145\/216585.216588","volume":"23","author":"W.A. Wulf","year":"1995","unstructured":"Wulf, W.A., McKee, S.A.: Hitting the Memory Wall: Implications of the Obvious. Computer Architecture News\u00a023, 20\u201324 (1995)","journal-title":"Computer Architecture News"}],"container-title":["Lecture Notes in Computer Science","Languages and Compilers for Parallel Computing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-19595-2_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,2]],"date-time":"2025-03-02T23:18:17Z","timestamp":1740957497000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-19595-2_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011]]},"ISBN":["9783642195945","9783642195952"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-19595-2_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2011]]}}}