{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,7,22]],"date-time":"2024-07-22T07:18:26Z","timestamp":1721632706725},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2009,11,1]],"date-time":"2009-11-01T00:00:00Z","timestamp":1257033600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J. Comput. Sci. Technol."],"published-print":{"date-parts":[[2009,11]]},"DOI":"10.1007\/s11390-009-9295-3","type":"journal-article","created":{"date-parts":[[2009,11,6]],"date-time":"2009-11-06T00:38:14Z","timestamp":1257467894000},"page":"1061-1073","source":"Crossref","is-referenced-by-count":29,"title":["Godson-T: An Efficient Many-Core Architecture for Parallel Program Executions"],"prefix":"10.1007","volume":"24","author":[{"given":"Dong-Rui","family":"Fan","sequence":"first","affiliation":[]},{"given":"Nan","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Jun-Chao","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yong-Bin","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Lin","sequence":"additional","affiliation":[]},{"given":"Feng-Long","family":"Song","sequence":"additional","affiliation":[]},{"given":"Xiao-Chun","family":"Ye","sequence":"additional","affiliation":[]},{"given":"He","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Guo-Ping","family":"Long","sequence":"additional","affiliation":[]},{"given":"Hao","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2009,11,6]]},"reference":[{"key":"9295_CR1","unstructured":"Asanovic K et al. The landscape of parallel computing research: A view from Berkeley. Technical Report No.UCB\/EECS-2006-183, University of California, Berkeley, December 18, 2006."},{"issue":"5","key":"9295_CR2","doi-asserted-by":"crossref","first-page":"33","DOI":"10.1109\/MC.2006.180","volume":"39","author":"EA Lee","year":"2006","unstructured":"Lee E A. The problem with threads. Computer, 2006, 39(5): 33\u201342.","journal-title":"Computer"},{"issue":"5","key":"9295_CR3","doi-asserted-by":"crossref","first-page":"16","DOI":"10.1145\/1454456.1454462","volume":"6","author":"B Cantrill","year":"2008","unstructured":"Cantrill B, Bonwick J. Real-world concurrency. ACM Queue, 2008, 6(5): 16\u201325.","journal-title":"ACM Queue"},{"key":"9295_CR4","unstructured":"Adve S V, Adve V S et al. Parallel computing research at Illinois: The UPCRC agenda. Technical Report, University of Illinois at Urbana-Champaign, November 2008."},{"key":"9295_CR5","unstructured":"Yuan N, Yu L, Fan D. An efficient and flexible task management for many-core architectures. In Proc. Workshop on Software and Hardware Challenges of Manycore Platforms, in Conjunction with the 35th International Symposium on Computer Architecture (ISCA-35), Beijing, China, June 22\u201326, 2008, pp.1\u201317."},{"issue":"5","key":"9295_CR6","doi-asserted-by":"crossref","first-page":"720","DOI":"10.1145\/324133.324234","volume":"46","author":"RD Blumofe","year":"1999","unstructured":"Blumofe R D, Leiserson C E. Scheduling multithreaded computations by work stealing. Journal of the ACM, 1999, 46(5): 720\u2013748.","journal-title":"Journal of the ACM"},{"key":"9295_CR7","unstructured":"Palatin P, Lhuillier Y, Temam O. CAPSULE: Hardware-assisted parallel execution of component-based programs. In Proc. the 39th Annual IEEE\/ACM International Symposium on Micro-Architecture, Washington, DC, USA: IEEE Computer Society, Dec. 9\u201313, 2006, pp.247\u2013258."},{"key":"9295_CR8","doi-asserted-by":"crossref","unstructured":"Villa O, Palermo G, Silvano C. Efficiency and scalability of barrier synchronization on NoC based many-core architecture. In Proc. CASES 2008, Atlanta, USA, Oct. 19\u201324, 2008, pp.81\u201390.","DOI":"10.1145\/1450095.1450110"},{"key":"9295_CR9","unstructured":"Carlson W W, Draper J M et al. Introduction to UPC and language specification. Technical Report No. CCS-TR-99-157, University of California, Berkeley, 1999."},{"issue":"2","key":"9295_CR10","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/289918.289920","volume":"17","author":"RW Numrich","year":"1998","unstructured":"Numrich R W, Reid J. Co-array Fortran for parallel programming. SIGPLAN Fortran Forum, 1998, 17(2): 1\u201331.","journal-title":"SIGPLAN Fortran Forum"},{"issue":"11\u201313","key":"9295_CR11","doi-asserted-by":"crossref","first-page":"825","DOI":"10.1002\/(SICI)1096-9128(199809\/11)10:11\/13<825::AID-CPE383>3.0.CO;2-H","volume":"10","author":"K Yelick","year":"1998","unstructured":"Yelick K, Semenzato L et al. Titanium: A high-performance Java dialect. Concurrency: Practice and Experience, 1998, 10(11-13): 825\u2013836.","journal-title":"Concurrency: Practice and Experience"},{"key":"9295_CR12","doi-asserted-by":"crossref","unstructured":"Fatahalian K, Horn D R et al. Sequoia: Programming the memory hierarchy. In Proc. the 2006 ACM\/IEEE Conference on Supercomputing, Tampa, Florida, Nov. 11\u201317, 2006, pp.83\u201395.","DOI":"10.1109\/SC.2006.55"},{"key":"9295_CR13","doi-asserted-by":"crossref","unstructured":"Bikshandi G, Guo J et al. Programming for parallelism and locality with hierarchically tiled arrays. In Proc. the Eleventh ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, New York, USA, March 29\u201331, 2006, pp.48\u201357.","DOI":"10.1145\/1122971.1122981"},{"key":"9295_CR14","doi-asserted-by":"crossref","unstructured":"Mellor-Crummey J M, Scott M L. Synchronization without contention. In Proc. Architectural Support for Programming Languages and Operating Systems, Santa Clara, USA, April 8\u201311, 1991, pp.269\u2013278.","DOI":"10.1145\/106972.106999"},{"key":"9295_CR15","doi-asserted-by":"crossref","unstructured":"Alverson R, Callahan D et al. The Tera computer system. In Proc. the 4th Int. Conf. Supercomputing, Amsterdam, The Netherlands, June 11\u201315, 1990, pp.1\u20136.","DOI":"10.1145\/77726.255132"},{"key":"9295_CR16","doi-asserted-by":"crossref","unstructured":"Zhu W, Sreedhar V C et al. Synchronization state buffer: Supporting efficient fine-grain synchronization on many-core architectures. In Proc. the 34th Annual International Symposium on Computer Architecture, San Diego, USA, June 9\u201313, 2007, pp.35\u201345.","DOI":"10.1145\/1250662.1250668"},{"key":"9295_CR17","doi-asserted-by":"crossref","unstructured":"Woo S C, Ohara M et al. The SPLASH-2 programs: Characterization and methodological considerations. In Proc. the 22nd Annual International Symposium on Computer Architecture, Santa Margnerita Ligure, Italy, June 22\u201324, 1995, pp.24\u201336.","DOI":"10.1145\/223982.223990"},{"issue":"1","key":"9295_CR18","doi-asserted-by":"crossref","first-page":"1948","DOI":"10.1093\/bioinformatics\/bth186","volume":"20","author":"Y Fu","year":"2004","unstructured":"Fu Y, Yang Q et al. Exploiting the kernel trick to correlate fragment ions for peptide identification via tandem mass spectrometry. Bioinformatics, 2004, 20(1): 1948\u20131954.","journal-title":"Bioinformatics"},{"issue":"17","key":"9295_CR19","doi-asserted-by":"crossref","first-page":"3389","DOI":"10.1093\/nar\/25.17.3389","volume":"25","author":"S Altschul","year":"1997","unstructured":"Altschul S, Madden T, Schaffer A et al. Gapped Blast and Psi-Blast: A new generation of protein database search programs. Nucleic Acids Research, 1997, 25(17): 3389\u20133402.","journal-title":"Nucleic Acids Research"},{"issue":"1","key":"9295_CR20","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1145\/301464.301477","volume":"27","author":"S Kumar","year":"1999","unstructured":"Kumar S, Jiang D et al. Evaluating synchronization on shared address space multiprocessors: Methodology and performance. ACM SIGMETRICS Performance Evaluation Review (SIGMETRICS 1999), 1999, 27(1): 23\u201334.","journal-title":"ACM SIGMETRICS Performance Evaluation Review (SIGMETRICS 1999)"},{"issue":"2","key":"9295_CR21","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1016\/0167-8191(88)90037-3","volume":"7","author":"J Feo","year":"1988","unstructured":"Feo J. An analysis of the computational and parallel complexity of the Livermore loops. Parallel Computing, 1988, 7(2): 163\u2013185.","journal-title":"Parallel Computing"},{"key":"9295_CR22","doi-asserted-by":"crossref","unstructured":"Yuan N, Zhou Y et al. High performance matrix multiplication on many cores. In Proc. European Conference on Parallel and Distributed Computing (Euro-Par), Delft, The Netherlands, Aug. 25\u201328, 2009, pp.948\u2013959.","DOI":"10.1007\/978-3-642-03869-3_87"},{"key":"9295_CR23","doi-asserted-by":"crossref","unstructured":"Volkov V, Demmel J W. Benchmarking GPUs to tune dense linear algebra. In Proc. 2008 ACM\/IEEE Conf. Supercomputing (SC 2008), Austin, USA, Now. 15\u201321, IEEE Press, 2008, pp.1\u201311.","DOI":"10.1109\/SC.2008.5214359"},{"key":"9295_CR24","doi-asserted-by":"crossref","unstructured":"Chen L, Hu Z et al. Optimizing fast Fourier transform on a multi-core architecture. In Proc. IEEE International Parallel and Distributed Processing Symposium, Long Beach, USA, March 26\u201330, 2007, pp.1\u20138.","DOI":"10.1109\/IPDPS.2007.370639"},{"key":"9295_CR25","doi-asserted-by":"crossref","unstructured":"Hu Z, Cuvillo J et al. Optimization of dense matrix multiplication on IBM Cyclops-64: Challenges and experiences. In Proc. Euro-Par 2006, Dresden, Germany, August 28\u2013September 1, pp.134\u2013144.","DOI":"10.1007\/11823285_14"},{"key":"9295_CR26","doi-asserted-by":"crossref","unstructured":"Govindaraju N K et al. High performance discrete Fourier transforms on graphics processors. In Proc. the 2008 ACM\/IEEE Conference on Supercomputing (SC2008), Austin, USA, Nov. 15\u201321, 2008, pp.13\u201324.","DOI":"10.1109\/SC.2008.5213922"},{"key":"9295_CR27","doi-asserted-by":"crossref","unstructured":"Williams S, Shalf J et al. The potential of the cell processor for scientific computing. In Proc. CF\u201906, Ischia, Italy, May 3\u20135, 2006, pp.9\u201320.","DOI":"10.1145\/1128022.1128027"},{"issue":"8","key":"9295_CR28","doi-asserted-by":"crossref","first-page":"798","DOI":"10.1109\/12.868026","volume":"49","author":"GR Gao","year":"2000","unstructured":"Gao G R, Sarkar V. Location consistency \u2014 A new memory model and cache consistency protocol. IEEE Transactions on Computers, 2000, 49(8): 798\u2013813.","journal-title":"IEEE Transactions on Computers"},{"key":"9295_CR29","doi-asserted-by":"crossref","unstructured":"Shen X et al. Commit-reconcile & fences (CRF): A new memory model for architects and compiler writers. In Proc. the 26th Annual International Symposium on Computer Architecture, Atlanta, USA, May 2\u20134, 1999, pp.150\u2013161.","DOI":"10.1145\/307338.300992"},{"key":"9295_CR30","doi-asserted-by":"crossref","unstructured":"Lftode L et al. Scope consistency: A bridge between release consistency and entry consistency. In Proc. the Eighth Annual ACM Symposium on Parallel Algorithms and Architectures, Padua, Italy, June 24\u201326, 1996, pp.277\u2013287.","DOI":"10.1145\/237502.237567"},{"key":"9295_CR31","doi-asserted-by":"crossref","unstructured":"Ceze L, Tuck J et al. BulkSC: Bulk enforcement of sequential consistency. In Proc. the 34th Annual International Symposium on Computer Architecture, San Diego, USA, June 9\u201313, 2007, pp.278\u2013289.","DOI":"10.1145\/1250662.1250697"},{"key":"9295_CR32","doi-asserted-by":"crossref","unstructured":"Hofstee P. Power efficient architecture and the cell processor. In Proc. HPCA-11, San Francisco, USA, February 12\u201316, 2005, pp.258\u2013262.","DOI":"10.1109\/HPCA.2005.26"},{"issue":"1","key":"9295_CR33","doi-asserted-by":"crossref","first-page":"26","DOI":"10.1145\/773365.773369","volume":"31","author":"G Almasi","year":"2003","unstructured":"Almasi G, Cascaval C et al. Dissecting cyclops: A detailed analysis of a multithreaded architecture. ACM SIGARCH Computer Architecture News, 2003, 31(1): 26\u201338.","journal-title":"ACM SIGARCH Computer Architecture News"},{"issue":"2","key":"9295_CR34","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1109\/MM.2008.31","volume":"28","author":"E Lindholm","year":"2008","unstructured":"Lindholm E et al. NVIDIA Tesla: A unified graphics and computing architecture. IEEE Micro, 2008, 28(2): 39\u201355.","journal-title":"IEEE Micro"},{"key":"9295_CR35","doi-asserted-by":"crossref","unstructured":"Mellor-Crummey, J M, Scott M L. Synchronization without contention. In Proc. Architectural Support for Programming Languages and Operating Systems, Santa Clara, USA, April 8\u201311, 1991, pp.269\u2013278.","DOI":"10.1145\/106972.106999"},{"key":"9295_CR36","doi-asserted-by":"crossref","unstructured":"Keckler S W et al. Exploiting fine-grain thread level parallelism on the MIT multi-alu processor. In Proc. the 25th Annual International Symposium on Computer Architecture, Barcelona, Spain, June 27\u2013July 1, 1998, pp.306\u2013317.","DOI":"10.1109\/ISCA.1998.694790"},{"key":"9295_CR37","doi-asserted-by":"crossref","unstructured":"Sampson J, Gonzalez R. Exploiting fine-grained data parallelism with chip multiprocessors and fast barriers. In Proc. the 39th Annual IEEE\/ACM International Symposium on Microarchitecture, Orlando, USA, Dec. 9\u201313, 2006, pp.235\u2013246.","DOI":"10.1109\/MICRO.2006.23"},{"key":"9295_CR38","doi-asserted-by":"crossref","unstructured":"Villa O et al. Efficiency and scalability of barrier synchronization on NoC based many-core architecture. In Proc. CASES 2008, Atlanta, USA, October 19\u201324, 2008, pp.81\u201390.","DOI":"10.1145\/1450095.1450110"}],"container-title":["Journal of Computer Science and Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-009-9295-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11390-009-9295-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-009-9295-3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T14:34:29Z","timestamp":1559399669000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11390-009-9295-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,11]]},"references-count":38,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2009,11]]}},"alternative-id":["9295"],"URL":"https:\/\/doi.org\/10.1007\/s11390-009-9295-3","relation":{},"ISSN":["1000-9000","1860-4749"],"issn-type":[{"value":"1000-9000","type":"print"},{"value":"1860-4749","type":"electronic"}],"subject":[],"published":{"date-parts":[[2009,11]]}}}