{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:48:20Z","timestamp":1742914100223,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":25,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642406973"},{"type":"electronic","value":"9783642406980"}],"license":[{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013]]},"DOI":"10.1007\/978-3-642-40698-0_8","type":"book-chapter","created":{"date-parts":[[2013,8,12]],"date-time":"2013-08-12T05:08:30Z","timestamp":1376284110000},"page":"99-113","source":"Crossref","is-referenced-by-count":13,"title":["An OpenMP* Barrier Using SIMD Instructions for Intel\u00ae Xeon PhiTM Coprocessor"],"prefix":"10.1007","author":[{"given":"Diego","family":"Caballero","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alejandro","family":"Duran","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xavier","family":"Martorell","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"8_CR1","unstructured":"Balanced affinity type. Intel\u00ae C++ Compiler XE 13.1 User and Reference Guides, \n                      http:\/\/software.intel.com\/sites\/products\/documentation\/doclib\/iss\/2013\/compiler\/cpp-lin\/\n                     (accessed: May 09,2013)"},{"key":"8_CR2","unstructured":"Intel\u00ae Xeon PhiTM Coprocessor - The Architecture, \n                      http:\/\/software.intel.com\/en-us\/articles\/intel-xeon-phi-coprocessor-codename-knights-corner\n                     (accessed: May 09, 2013)"},{"key":"8_CR3","unstructured":"Intel\u00ae Xeon PhiTM Coprocessor Instruction Set Architecture Reference Manual (2012)"},{"key":"8_CR4","doi-asserted-by":"crossref","unstructured":"Abell\u00e1n, J.L., Fern\u00e1ndez, J., Acacio, M.E.: Efficient and scalable barrier synchronization for many-core CMPs. In: Proceedings of the 7th ACM International Conference on Computing Frontiers, CF 2010, pp. 73\u201374 (2010)","DOI":"10.1145\/1787275.1787289"},{"key":"8_CR5","doi-asserted-by":"crossref","unstructured":"Alm\u00e1si, G., Heidelberger, P., Archer, C.J., Martorell, X., Erway, C.C., Moreira, J.E., Steinmacher-Burow, B., Zheng, Y.: Optimization of MPI collective communication on BlueGene\/L systems. In: Proc. of the 19th Int. Conf. on Supercomp., ICS 2005 (2005)","DOI":"10.1145\/1088149.1088183"},{"key":"8_CR6","doi-asserted-by":"crossref","unstructured":"Bailey, D.H., Barszcz, E., Barton, J.T., Browning, D.S., Carter, R.L., Dagum, L., Fatoohi, R.A., Frederickson, P.O., Lasinski, T.A., Schreiber, R.S., Simon, H.D., Venkatakrishnan, V., Weeratunga, S.K.: The NAS parallel benchmarks - summary and preliminary results. In: Proc. of the 1991 ACM\/IEEE Conf. on Supercomp., SC 1991, pp. 158\u2013165 (1991)","DOI":"10.1145\/125826.125925"},{"key":"8_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1007\/978-3-642-30961-8_24","volume-title":"OpenMP in a Heterogeneous World","author":"J.M. Bull","year":"2012","unstructured":"Bull, J.M., Reid, F., McDonnell, N.: A microbenchmark suite for openMP tasks. In: Chapman, B.M., Massaioli, F., M\u00fcller, M.S., Rorro, M. (eds.) IWOMP 2012. LNCS, vol.\u00a07312, pp. 271\u2013274. Springer, Heidelberg (2012)"},{"key":"8_CR8","unstructured":"Eichenberger, A.E., Abraham, S.G.: Impact of load imbalance on the design of software barriers. In: Proc. of the 1995 Int. Conf. on Parallel Processing, pp. 63\u201372 (1995)"},{"issue":"2","key":"8_CR9","doi-asserted-by":"publisher","first-page":"175","DOI":"10.1109\/TC.1983.1676201","volume":"-32","author":"A. Gottlieb","year":"1983","unstructured":"Gottlieb, A., Grishman, R., Kruskal, C.P., McAuliffe, K.P., Rudolph, L., Snir, M.: The NYU ultracomputer. designing an MIMD shared memory parallel computer. IEEE Transactions on Computers C-32(2), 175\u2013189 (1983)","journal-title":"IEEE Transactions on Computers C"},{"issue":"3","key":"8_CR10","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1007\/BF01407897","volume":"18","author":"R. Gupta","year":"1989","unstructured":"Gupta, R., Hill, C.R.: A scalable implementation of barrier synchronization using an adaptive combining tree. Internat. Journal of Parallel Programming\u00a018(3), 161\u2013180 (1989)","journal-title":"Internat. Journal of Parallel Programming"},{"issue":"2","key":"8_CR11","doi-asserted-by":"publisher","first-page":"54","DOI":"10.1145\/68182.68187","volume":"17","author":"R. Gupta","year":"1989","unstructured":"Gupta, R.: The fuzzy barrier: a mechanism for high speed synchronization of processors. SIGARCH Comput. Archit. News\u00a017(2), 54\u201363 (1989)","journal-title":"SIGARCH Comput. Archit. News"},{"key":"8_CR12","unstructured":"Hoefler, T., Mehlan, T., Mietke, F., Rehm, W.: A survey of barrier algorithms for coarse grained supercomputers chemnitzer informatik berichte (2004)"},{"key":"8_CR13","doi-asserted-by":"publisher","first-page":"746","DOI":"10.1145\/1391469.1391660","volume-title":"Proceed. of the 45th Annual Design Automation Conference, DAC 2008","author":"W. Huang","year":"2008","unstructured":"Huang, W., Stant, M.R., Sankaranarayanan, K., Ribando, R.J., Skadron, K.: Many-core design from a thermal perspective. In: Proceed. of the 45th Annual Design Automation Conference, DAC 2008, pp. 746\u2013749. ACM, New York (2008)"},{"key":"8_CR14","unstructured":"McMahon, F.H.: The Livermore Fortran kernels: A computer test of the numerical performance range (1986)"},{"issue":"1","key":"8_CR15","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1145\/103727.103729","volume":"9","author":"J.M. Mellor-Crummey","year":"1991","unstructured":"Mellor-Crummey, J.M., Scott, M.L.: Algorithms for scalable synchronization on shared-memory multiprocessors. ACM Trans. Comput. Syst.\u00a09(1), 21\u201365 (1991)","journal-title":"ACM Trans. Comput. Syst."},{"key":"8_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1007\/978-3-642-02303-3_4","volume-title":"Evolving OpenMP in an Age of Extreme Parallelism","author":"R. Nanjegowda","year":"2009","unstructured":"Nanjegowda, R., Hernandez, O., Chapman, B., Jin, H.H.: Scalability evaluation of barrier algorithms for openMP. In: M\u00fcller, M.S., de Supinski, B.R., Chapman, B.M. (eds.) IWOMP 2009. LNCS, vol.\u00a05568, pp. 42\u201352. Springer, Heidelberg (2009)"},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Petrini, F., Kerbyson, D.J., Pakin, S.: The case of the missing supercomputer performance: Achieving optimal performance on the 8,192 processors of ASCI Q. In: Proceedings of the 2003 ACM\/IEEE Conference on Supercomputing, SC 2003, p. 55 (2003)","DOI":"10.1145\/1048935.1050204"},{"issue":"10","key":"8_CR18","doi-asserted-by":"publisher","first-page":"943","DOI":"10.1109\/TC.1985.6312198","volume":"-34","author":"G.F. Pfister","year":"1985","unstructured":"Pfister, G.F., Norton, V.A.: Hot-spot contention and combining in multistage interconnection networks. IEEE Transactions on Computers C-34(10), 943\u2013948 (1985)","journal-title":"IEEE Transactions on Computers C"},{"key":"8_CR19","doi-asserted-by":"crossref","unstructured":"Sampson, J., Gonzalez, R., Collard, J., Jouppi, N.P., Schlansker, M., Calder, B.: Exploiting fine-grained data parallelism with chip multiprocessors and fast barriers. In: Proc. of the 39th Annual IEEE\/ACM Int. Symp. on Microarchitecture, MICRO 39, pp. 235\u2013246 (2006)","DOI":"10.1109\/MICRO.2006.23"},{"key":"8_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1007\/978-3-642-11515-8_4","volume-title":"High Performance Embedded Architectures and Compilers","author":"J. Sartori","year":"2010","unstructured":"Sartori, J., Kumar, R.: Low-overhead, high-speed multi-core barrier synchronization. In: Patt, Y.N., Foglia, P., Duesterwald, E., Faraboschi, P., Martorell, X. (eds.) HiPEAC 2010. LNCS, vol.\u00a05952, pp. 18\u201334. Springer, Heidelberg (2010)"},{"issue":"4","key":"8_CR21","doi-asserted-by":"publisher","first-page":"449","DOI":"10.1007\/BF02577741","volume":"22","author":"M.L. Scott","year":"1994","unstructured":"Scott, M.L., Mellor-Crummey, J.M.: Fast, contention-free combining tree barriers for shared-memory multiprocessors. Int. Journal of Parallel Prog.\u00a022(4), 449\u2013481 (1994)","journal-title":"Int. Journal of Parallel Prog."},{"issue":"9","key":"8_CR22","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1145\/248209.237144","volume":"31","author":"S.L. Scott","year":"1996","unstructured":"Scott, S.L.: Synchronization and communication in the T3E multiprocessor. SIGPLAN Not.\u00a031(9), 26\u201336 (1996)","journal-title":"SIGPLAN Not."},{"key":"8_CR23","doi-asserted-by":"crossref","unstructured":"Villa, O., Palermo, G., Silvano, C.: Efficiency and scalability of barrier synchronization on NoC based many-core architectures. In: Proceedings of the 2008 International Conference on Compilers, Architectures and Synthesis for Embedded Systems, pp. 81\u201390 (2008)","DOI":"10.1145\/1450095.1450110"},{"issue":"4","key":"8_CR24","first-page":"388","volume":"-36","author":"P. Yew","year":"1987","unstructured":"Yew, P., Tzeng, N., Lawrie, D.H.: Distributing hot-spot addressing in large-scale multiprocessors. IEEE Transactions on Computers C-36(4), 388\u2013395 (1987)","journal-title":"IEEE Transactions on Computers C"},{"key":"8_CR25","doi-asserted-by":"crossref","unstructured":"Zhang, G., Mart\u00ednez, F., Tal, A., Blainey, B.: Busy-wait barrier synchronization using distributed counters with local sensor. In: Proc. of the WOMPAT, pp. 84\u201398 (2003)","DOI":"10.1007\/3-540-45009-2_7"}],"container-title":["Lecture Notes in Computer Science","OpenMP in the Era of Low Power Devices and Accelerators"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-40698-0_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,7]],"date-time":"2023-08-07T10:05:03Z","timestamp":1691402703000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-40698-0_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013]]},"ISBN":["9783642406973","9783642406980"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-40698-0_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2013]]}}}