{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T03:40:07Z","timestamp":1769830807978,"version":"3.49.0"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2015,1,18]],"date-time":"2015-01-18T00:00:00Z","timestamp":1421539200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2015,4]]},"DOI":"10.1007\/s11227-014-1374-8","type":"journal-article","created":{"date-parts":[[2015,1,17]],"date-time":"2015-01-17T05:21:05Z","timestamp":1421472065000},"page":"1451-1483","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Addressing characterization methods for memory contention aware co-scheduling"],"prefix":"10.1007","volume":"71","author":[{"given":"Andreas","family":"de Blanche","sequence":"first","affiliation":[]},{"given":"Thomas","family":"Lundqvist","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,1,18]]},"reference":[{"key":"1374_CR1","unstructured":"Akyil L et al (2012) Memory management and programming tools. In: Intel guide for developing multithreaded applications, Intel Corporation, pp 1\u2013133. http:\/\/software.intel.com\/en-us\/articles\/intel-guide-for-developing-multithreaded-applications"},{"key":"1374_CR2","doi-asserted-by":"crossref","unstructured":"Antonopoulos CD, Nikolopoulos DS, Papatheodorou TS (2004) Realistic workload scheduling policies for taming the memory bandwidth bottleneck of smps., International conference on high performance computing, Springer, Berlin","DOI":"10.1007\/978-3-540-30474-6_33"},{"key":"1374_CR3","doi-asserted-by":"crossref","unstructured":"Araiza R, Aguilera MG, Pham T, Teller PJ (2005) Towards a cross-platform microbenchmark suite for evaluating hardware performance counter data. In: Proceedings of the 2005 conference on diversity in computing, ACM, New York, NY, USA, TAPIA \u201905, pp 36\u201339. doi: 10.1145\/1095242.1095259","DOI":"10.1145\/1095242.1095259"},{"issue":"4","key":"1374_CR4","doi-asserted-by":"crossref","first-page":"8:1","DOI":"10.1145\/1880018.1880019","volume":"28","author":"S Blagodurov","year":"2010","unstructured":"Blagodurov S, Zhuravlev S, Fedorova A (2010) Contention-aware scheduling on multicore systems. ACM Trans Comput Syst 28(4):8:1\u20138:45. doi: 10.1145\/1880018.1880019","journal-title":"ACM Trans Comput Syst"},{"key":"1374_CR5","doi-asserted-by":"crossref","unstructured":"de Blanche A, Lundqvist T (2014) A methodology for estimating co-scheduling slowdowns due to memory bus contention on multicore nodes. In: International conference on parallel and distributed computing and networks","DOI":"10.2316\/P.2014.811-027"},{"key":"1374_CR6","unstructured":"de Blanche A, Mankefors-Christiernin S (2010) Method for experimental measurement of an applications memory bus usage. In: International conference on parallel and distributed processing techniques and applications, CRSEA"},{"key":"1374_CR7","unstructured":"Boklund A, Jiresjo C, Mankefors-Christiernin S, Namaki N, Gustavsson-Christiernin L, Ebbmar M (2005) Performance of network subsystems for technical simulation on linux clusters. In: Conference on parallel and distributed computing and systems, pp 503\u2013509"},{"key":"1374_CR8","unstructured":"Boklund A, Namaki N, Mankefors-Christiernin S, Gustafsson J, Lingbrand M (2008) Dual core efficiency for engineering simulation applications. In: International conference on parallel and distributed processing techniques and applications, pp 962\u2013968"},{"key":"1374_CR9","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1177\/109434200001400303","volume":"14","author":"S Browne","year":"2000","unstructured":"Browne S, Dongarra J, Garner N, London K, Mucci P (2000) A portable programming interface for performance evaluation on modern processors. Int J High Perform Comput Appl 14:189\u2013204","journal-title":"Int J High Perform Comput Appl"},{"key":"1374_CR10","doi-asserted-by":"crossref","unstructured":"Cascaval C, Rose LD, Padua DA, Reed DA (2000) Compile-time based performance prediction. In: Proceedings of the 12th international workshop on languages and compilers for parallel computing, Springer, London, LCPC \u201999, pp 365\u2013379. http:\/\/dl.acm.org\/citation.cfm?id=645677.663790","DOI":"10.1007\/3-540-44905-1_23"},{"key":"1374_CR11","series-title":"International symposium on high-performance computer architecture","volume-title":"Predicting inter-thread cache contention on a chip multi-processor architecture","author":"D Chandra","year":"2005","unstructured":"Chandra D, Guo F, Kim S, Solihin Y (2005) Predicting inter-thread cache contention on a chip multi-processor architecture., International symposium on high-performance computer architectureIEEE Computer Society, Washington, DC, USA"},{"key":"1374_CR12","doi-asserted-by":"crossref","unstructured":"Daci G, Tartari M (2013) A comparative review of contention-aware scheduling algorithms to avoid contention in multicore systems. In: Das VV (ed) Proceedings of the third international conference on trends in information, telecommunication and computing, vol 150, lecture notes in electrical engineering, Springer, New York, pp 99\u2013106","DOI":"10.1007\/978-1-4614-3363-7_12"},{"key":"1374_CR13","doi-asserted-by":"crossref","unstructured":"Eklov D, Nikoleris N, Black-Schaffer D, Hagersten E (2011) Cache pirating: measuring the curse of the shared cache. In: Parallel processing (ICPP), 2011 International conference on, pp 165\u2013175. doi: 10.1109\/ICPP.2011.15","DOI":"10.1109\/ICPP.2011.15"},{"key":"1374_CR14","doi-asserted-by":"crossref","unstructured":"Eklov D, Nikoleris N, Black-Schaffer D, Hagersten E (2012) Bandwidth bandit: quantitative characterization of memory contention. In: Proceedings of the 21st international conference on parallel architectures and compilation techniques, ACM, New York, PACT \u201912, pp 457\u2013458. doi: 10.1145\/2370816.2370894","DOI":"10.1145\/2370816.2370894"},{"key":"1374_CR15","unstructured":"Eranian S (2008) What can performance counters do for memory subsystem analysis? ACM SIGPLAN workshop on Memory systems performance and correctness: in conjunction with the thirteenth international conference on architectural support for programming languages and operating systems. ACM, New York, pp 26\u201330"},{"issue":"2","key":"1374_CR16","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1145\/1646353.1646371","volume":"53","author":"A Fedorova","year":"2010","unstructured":"Fedorova A, Blagodurov S, Zhuravlev S (2010) Managing contention for shared resources on multicore processors. Commun ACM 53(2):49\u201357. doi: 10.1145\/1646353.1646371","journal-title":"Commun ACM"},{"key":"1374_CR17","unstructured":"Field D, Johnson D, Mize D, Stober R (2007) Scheduling to overcome the multi-core memory bandwidth bottleneck. Hewlett Packard and Platform Computing White Paper"},{"key":"1374_CR18","unstructured":"Guo F (2008) Analyzing and managing shared cache in chip multi-processors. PhD thesis, North Carolina State University"},{"issue":"3","key":"1374_CR19","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1109\/MM.2007.56","volume":"27","author":"K Hoste","year":"2007","unstructured":"Hoste K, Eeckhout L (2007) Microarchitecture-independent workload characterization. IEEE Micro 27(3):63\u201372. doi: 10.1109\/MM.2007.56","journal-title":"IEEE Micro"},{"issue":"1","key":"1374_CR20","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1145\/1269899.1254886","volume":"35","author":"R Iyer","year":"2007","unstructured":"Iyer R, Zhao L, Guo F, Illikkal R, Makineni S, Newell D, Solihin Y, Hsu L, Reinhardt S (2007) Qos policies and architecture for cache\/memory in cmp platforms. SIGMETRICS Perform Eval Rev 35(1):25\u201336. doi: 10.1145\/1269899.1254886","journal-title":"SIGMETRICS Perform Eval Rev"},{"key":"1374_CR21","doi-asserted-by":"crossref","unstructured":"Jia G, Sheng W, Dai W, Li X (2011) Using fom predicting method for scheduling on chip multi-processor. In: Communication software and networks (ICCSN), 2011 IEEE 3rd international conference on, pp 579\u2013584. doi: 10.1109\/ICCSN.2011.6013973","DOI":"10.1109\/ICCSN.2011.6013973"},{"key":"1374_CR22","doi-asserted-by":"crossref","unstructured":"Jiang Y, Shen X, Chen J, Tripathi R (2008) Analysis and approximation of optimal co-scheduling on chip multiprocessors. International conference on parallel architectures and compilation techniques. NY, USA, New York, pp 220\u2013229","DOI":"10.1145\/1454115.1454146"},{"key":"1374_CR23","doi-asserted-by":"crossref","unstructured":"Koller R, Verma A, Rangaswami R (2011) Estimating application cache requirement for provisioning caches in virtualized systems. In: Modeling, analysis simulation of computer and telecommunication systems (MASCOTS), 2011 IEEE 19th international symposium on, pp 55\u201362. doi: 10.1109\/MASCOTS.2011.67","DOI":"10.1109\/MASCOTS.2011.67"},{"key":"1374_CR24","doi-asserted-by":"crossref","unstructured":"Koukis E, Koziris N (2006) Memory and network bandwidth aware scheduling of multiprogrammed workloads on clusters of smps. International conference on parallel and distributed systems, vol 1. IEEE Computer Society, Washington, DC, pp 345\u2013354","DOI":"10.1109\/ICPADS.2006.59"},{"key":"1374_CR25","unstructured":"Levinthal D (2007) Performance analysis guide for intel core i7 processor and intel xeon 5500 processors. Intel White Paper, from internet 2014. http:\/\/software.intel.com\/sites\/products\/collateral\/hpc\/vtune\/resolving_multicore_non_scaling.pdf"},{"key":"1374_CR26","unstructured":"Levinthal D (2009) Analyzing and resolving multi-core non scaling on intel core 2 processors. Intel White Paper, from internet 2014. https:\/\/software.intel.com\/sites\/products\/collateral\/hpc\/vtun\/performance_analysis_guide.pdf"},{"issue":"1","key":"1374_CR27","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1007\/s11227-014-1156-3","volume":"69","author":"X Liu","year":"2014","unstructured":"Liu X, Tong W, Zhi X, ZhiRen F, WenZhao L (2014) Performance analysis of cloud computing services considering resources sharing among virtual machines. J Supercomput 69(1):357\u2013374. doi: 10.1007\/s11227-014-1156-3","journal-title":"J Supercomput"},{"key":"1374_CR28","doi-asserted-by":"crossref","unstructured":"Mars J, Vachharajani N, Hundt R, Soffa ML (2010) Contention aware execution: online contention detection and response. In: CGO \u201910: proceedings of the 2010 international symposium on code generation and optimization, ACM, New York, pp 257\u2013265. doi: 10.1145\/1772954.1772991","DOI":"10.1145\/1772954.1772991"},{"key":"1374_CR29","doi-asserted-by":"crossref","unstructured":"Mars J, Tang L, Hundt R, Skadron K, Soffa ML (2011) Bubble-up: increasing utilization in modern warehouse scale computers via sensible co-locations. In: MICRO \u201911: proceedings of the 44th annual IEEE\/ACM international symposium on microarchitecture, ACM, New York","DOI":"10.1145\/2155620.2155650"},{"key":"1374_CR30","doi-asserted-by":"crossref","unstructured":"Mars J, Tang L, Hundt R, Skadron K, Soffa ML (2012) Increasing utilization in warehouse scale computers using bubbleup. IEEE Micro","DOI":"10.1109\/MM.2012.22"},{"key":"1374_CR31","unstructured":"McCalpin JD (1995) Memory bandwidth and machine balance in current high performance computers. In: IEEE computer society technical committee on computer architecture newsletter pp 19\u201325"},{"key":"1374_CR32","doi-asserted-by":"crossref","unstructured":"Namaki N, de Blanche A, Mankefors-Christiernin S (2009a) Exhaustion dominated performance: a first attempt. In: Proceedings of the 2009 ACM symposium on applied computing, ACM, New York, SAC \u201909, pp 1011\u20131012. doi: 10.1145\/1529282.1529504","DOI":"10.1145\/1529282.1529504"},{"key":"1374_CR33","unstructured":"Namaki N, de Blanche A, Mankefors-Christiernin S (2009b) A tool for processor dependency characterization of hpc applications. In: International Conference HPC Asia 2009"},{"key":"1374_CR34","unstructured":"Namaki N, de Blanche A, Mankefors-Christiernin S (2010) Black-box characterization of processor workloads for engineering applications. In: IEEE international symposium on workload characterization, IEEE"},{"issue":"3","key":"1374_CR35","doi-asserted-by":"crossref","first-page":"520","DOI":"10.1007\/s11227-011-0612-6","volume":"61","author":"T Niemi","year":"2012","unstructured":"Niemi T, Hameri AP (2012) Memory-based scheduling of scientific computing clusters. J Supercomput 61(3):520\u2013544. doi: 10.1007\/s11227-011-0612-6","journal-title":"J Supercomput"},{"key":"1374_CR36","unstructured":"Publications NASD (2009) Nas parallel benchmarks. http:\/\/www.nas.nasa.gov\/publications\/npb.html"},{"key":"1374_CR37","unstructured":"Singer N (2009) More chip cores can mean slower supercomputing, sandia simulation shows. Sandia National Laboratories News Release"},{"key":"1374_CR38","doi-asserted-by":"crossref","unstructured":"Tam DK, Azimi R, Soares LB, Stumm M (2009) Rapidmrc: approximating l2 miss rate curves on commodity systems for online optimizations. In: Proceedings of the 14th international conference on architectural support for programming languages and operating systems, ACM, New York, ASPLOS XIV, pp 121\u2013132. doi: 10.1145\/1508244.1508259","DOI":"10.1145\/1508244.1508259"},{"key":"1374_CR39","doi-asserted-by":"crossref","unstructured":"Tang L, Mars J, Vachharajani N, Hundt R, Soffa ML (2011) The impact of memory subsystem resource sharing on datacenter applications. In: ISCA \u201911: Proceeding of the 38th annual international symposium on computer architecture, ACM, New York, ISCA \u201911, pp 283\u2013294. doi: 10.1145\/2000064.2000099","DOI":"10.1145\/2000064.2000099"},{"issue":"3","key":"1374_CR40","doi-asserted-by":"crossref","first-page":"1113","DOI":"10.1007\/s11227-014-1142-9","volume":"68","author":"G Utrera","year":"2014","unstructured":"Utrera G, Corbalan J, Labarta J (2014) Scheduling parallel jobs on multicore clusters using cpu oversubscription. J Supercomput 68(3):1113\u20131140. doi: 10.1007\/s11227-014-1142-9","journal-title":"J Supercomput"},{"key":"1374_CR41","doi-asserted-by":"crossref","unstructured":"Xu D, Wu C, Yew PC (2010) On mitigating memory bandwidth contention through bandwidth-aware scheduling. International conference on parallel architectures and compilation techniques. New York, USA, pp 237\u2013248","DOI":"10.1145\/1854273.1854306"},{"issue":"3","key":"1374_CR42","doi-asserted-by":"crossref","first-page":"199","DOI":"10.1007\/s11227-008-0256-3","volume":"52","author":"CT Yang","year":"2010","unstructured":"Yang CT, Leu FY, Chen SY (2010) Network bandwidth-aware job scheduling with dynamic information model for grid resource brokers. J Supercomput 52(3):199\u2013223. doi: 10.1007\/s11227-008-0256-3","journal-title":"J Supercomput"},{"key":"1374_CR43","doi-asserted-by":"crossref","unstructured":"Yang LT, Ma X, Mueller F (2005) Cross-platform performance prediction of parallel applications using partial execution. In: Proceedings of the 2005 ACM\/IEEE conference on supercomputing, IEEE Computer Society, Washington, DC, USA, SC \u201905. doi: 10.1109\/SC.2005.20","DOI":"10.1109\/SC.2005.20"},{"key":"1374_CR44","series-title":"ASPLOS on Architectural support for programming languages and operating systems.","volume-title":"Addressing shared resource contention in multicore processors via scheduling","author":"S Zhuravlev","year":"2010","unstructured":"Zhuravlev S, Blagodurov S, Fedorova A (2010) Addressing shared resource contention in multicore processors via scheduling., ASPLOS on Architectural support for programming languages and operating systems.ACM, New York"},{"issue":"1","key":"1374_CR45","doi-asserted-by":"crossref","first-page":"4:1","DOI":"10.1145\/2379776.2379780","volume":"45","author":"S Zhuravlev","year":"2012","unstructured":"Zhuravlev S, Saez JC, Blagodurov S, Fedorova A, Prieto M (2012) Survey of scheduling techniques for addressing shared resources in multicore processors. ACM Comput Surv 45(1):4:1\u20134:28. doi: 10.1145\/2379776.2379780","journal-title":"ACM Comput Surv"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-014-1374-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-014-1374-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-014-1374-8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,19]],"date-time":"2019-08-19T16:20:45Z","timestamp":1566231645000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-014-1374-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,1,18]]},"references-count":45,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2015,4]]}},"alternative-id":["1374"],"URL":"https:\/\/doi.org\/10.1007\/s11227-014-1374-8","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,1,18]]}}}