{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,1,7]],"date-time":"2025-01-07T05:24:42Z","timestamp":1736227482382,"version":"3.32.0"},"reference-count":49,"publisher":"Oxford University Press (OUP)","issue":"2","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2006,3,1]]},"DOI":"10.1093\/comjnl\/bxh157","type":"journal-article","created":{"date-parts":[[2005,12,20]],"date-time":"2005-12-20T01:13:16Z","timestamp":1135041196000},"page":"211-233","source":"Crossref","is-referenced-by-count":17,"title":["Instruction Level Parallelism through Microthreading\u2014A Scalable Approach to Chip Multiprocessors"],"prefix":"10.1093","volume":"49","author":[{"given":"Kostas","family":"Bousias","sequence":"first","affiliation":[]},{"given":"Nabil","family":"Hasasneh","sequence":"additional","affiliation":[]},{"given":"Chris","family":"Jesshope","sequence":"additional","affiliation":[]}],"member":"286","published-online":{"date-parts":[[2005,12,19]]},"reference":[{"key":"key\n\t\t\t\t20171011222329_B1","doi-asserted-by":"crossref","unstructured":"Barroso, L. A., Gharachorloo, K., McNamara, R., Nowatzyk, A., Qadeer, S., Sano, B., Smith, S., Stets, S. and Verghese, B. (2000) Piranha: a scalable architecture based on single-chip multiprocessing. In Proc. 27th Annual Int. Symp. Computer Architecture, Vancouver, British Columbia, Canada, June 12\u201314, pp. 282\u2013293. ACM Press, New York, NY.","DOI":"10.1145\/339647.339696"},{"key":"key\n\t\t\t\t20171011222329_B2","doi-asserted-by":"crossref","unstructured":"Hammond, L., Hubbert, B. A., Siu, M., Prabhu, M. K., Chen, M. and Olukotun, K. (2000) The Stanford Hydra CMP. IEEE Micro, 20, 71\u201384.","DOI":"10.1109\/40.848474"},{"key":"key\n\t\t\t\t20171011222329_B3","doi-asserted-by":"crossref","unstructured":"Hammond, L., Nayfah, B. A. and Olukotun, K. (1997) A singlechip multiprocessor. IEEE Comput. Soc., 30, 79\u201385.","DOI":"10.1109\/2.612253"},{"key":"key\n\t\t\t\t20171011222329_B4","doi-asserted-by":"crossref","unstructured":"Tendler, J. M., Dodson, J. S., Fields, J. S., Le, H. and Sinharoy, B. (2002) Power4 System Micro-architecture. IBM J. Res. Develop., 46, 5\u201325.","DOI":"10.1147\/rd.461.0005"},{"key":"key\n\t\t\t\t20171011222329_B5","doi-asserted-by":"crossref","unstructured":"Tremblay, M., Chan, J., Chaudhry, S., Conigliaro, A. W. and Tse, S. S. (2000) The MAJC architecture: a synthesis of parallelism and scalability. IEEE Micro, 20, 12\u201325.","DOI":"10.1109\/40.888700"},{"key":"key\n\t\t\t\t20171011222329_B6","doi-asserted-by":"crossref","unstructured":"Jesshope, C. R. (2004) Scalable instruction-level parallelism. In Proc. Computer Systems: Architectures, Modeling and Simulation, 3rd and 4th Int. Workshops, SAMOS 2004, Samos, Greece, July 19\u201321, LNCS3133, pp. 383\u2013392. Springer.","DOI":"10.1007\/978-3-540-27776-7_40"},{"key":"key\n\t\t\t\t20171011222329_B7","doi-asserted-by":"crossref","unstructured":"Bhandarkar, D. (2003) Billion transistor chips in mainstream enterprise platforms of the future. In Proc. 9th Int. Symp. High- Performance Computer Architecture, Anaheim, CA, February 8\u201312, pp. 3. IEEE Computer Society, Washington, DC.","DOI":"10.1109\/HPCA.2003.1183519"},{"key":"key\n\t\t\t\t20171011222329_B8","doi-asserted-by":"crossref","unstructured":"Agarwal, V., Hrishikesh, M. S., Keckler, S. W. and Burger, D. (2000) Clock rate versus IPC: the end of the road for conventional microarchitectures. In Proc. 27th Annual Int. Symp. Computer Architecture, Vancouver, British, Columbia, Canada, June 10\u201314, pp. 248\u2013259. ACM Press, New York, NY.","DOI":"10.1145\/339647.339691"},{"key":"key\n\t\t\t\t20171011222329_B9","doi-asserted-by":"crossref","unstructured":"Onder, S. and Gupta, R. (2001) Instruction wake-up in wide issue superscalars. In Proc. 7th Int. Euro-Par Conf. Manchester on Parallel Processing, Manchester, UK, August 28\u201331, pp. 418\u2013427. Springer-Verlag, London, UK.","DOI":"10.1007\/3-540-44681-8_61"},{"key":"key\n\t\t\t\t20171011222329_B10","doi-asserted-by":"crossref","unstructured":"Onder, S. and Gupta, R. (1998) Superscalar execution with dynamic data forwarding. In Proc. Int. Conf. Parallel Architectures and Compilation Techniques, Paris, France, October 12\u201318, pp. 130\u2013135. IEEE Computer Society, Washington, DC.","DOI":"10.1109\/PACT.1998.727183"},{"key":"key\n\t\t\t\t20171011222329_B11","doi-asserted-by":"crossref","unstructured":"Olukotun, K., Nayfeh, B. A., Hammond, L., Wilson, K. and Chang, K. (1996) The case for a single-chip multiprocessor. In Proc. Seventh Int. Symp. on Architectural Support for Programming Languages and Operating Systems (ASPLOS-7), Cambridge, MA, October 1\u20135. Cambridge, MA, September, pp. 2\u201311. ACM Press, New York, NY.","DOI":"10.1145\/237090.237140"},{"key":"key\n\t\t\t\t20171011222329_B12","doi-asserted-by":"crossref","unstructured":"Palacharla, S., Jouppi, N. P. and Smith, J. (1997) Complexityeffective superscalar processors. In Proc. 24th Int. Symp. Computer Architecture, Denver, CO, June 1\u20134, pp. 206\u2013218. ACM Press, New York, NY.","DOI":"10.1145\/264107.264201"},{"key":"key\n\t\t\t\t20171011222329_B13","doi-asserted-by":"crossref","unstructured":"Tullsen, D. M., Eggersa, S. and Levy, H. M. (1995) Simultaneous multithreading: maximizing on chip parallelism. In Proc. 22nd Annual Int. Symp. Computer Architecture, Santa Margherita Ligure, Italy, June 22\u201324, pp. 392\u2013403. ACM Press, New York, NY.","DOI":"10.1145\/223982.224449"},{"key":"key\n\t\t\t\t20171011222329_B14","doi-asserted-by":"crossref","unstructured":"Rixner, S., Dally,W. J., Khailany, B., Mattson, P. R., Kapasi,U. J. and Owens, J. D. (2000) Register organization for media processing. In Proc. Int. Symp. High Performance Computer Architecture, Toulouse, France, January 8\u201312, pp. 375\u2013386. IEEE CS Press, Los Alamitos, CA.","DOI":"10.1109\/HPCA.2000.824366"},{"key":"key\n\t\t\t\t20171011222329_B15","doi-asserted-by":"crossref","unstructured":"Balasubramonian, R., Dwarkadas, S. and Albonesi, D. (2001) Reducing the Complexity of the register file in dynamic superscalar processors. In Proc. 34th Int. Symp. on Microarchitecture, Austin, TX, December 1\u20135, pp. 237\u2013248. IEEE Computer Society, Washington, DC.","DOI":"10.1109\/MICRO.2001.991122"},{"key":"key\n\t\t\t\t20171011222329_B16","unstructured":"Diefendorff, K. and Duquesne, Y. (2002) Complex SOCs require new architectures. EE Times. Available at http:\/\/www.eetimes.com\/issue\/se\/OEG20020911S0076."},{"key":"key\n\t\t\t\t20171011222329_B17","doi-asserted-by":"crossref","unstructured":"Ungerer, T., Robec, B. and Silc, J. (2003) A survey of processors with explicit multithreading. ACM Comput. Surveys, 35, 29\u201363.","DOI":"10.1145\/641865.641867"},{"key":"key\n\t\t\t\t20171011222329_B18","doi-asserted-by":"crossref","unstructured":"Burns, J. and Gaudiot, J.-L. (2001) Area and system clock effects on SMT\/CMP processors. In Proc. 2001 Int. Conf. Parallel Architectures and Compilation Techniques, Barcelona, Spain, September 8\u201312, pp. 211\u2013218. IEEE Computer Society, Washington, DC.","DOI":"10.1109\/PACT.2001.953301"},{"key":"key\n\t\t\t\t20171011222329_B19","doi-asserted-by":"crossref","unstructured":"Jesshope, C. R. (2003) Multi-threaded microprocessors evolution or revolution. In Proc. 8th Asia-Pacific Conf. ACSAC'2003, Aizu, Japan, September 23\u201326, LNCS2823, pp. 21\u201345. Springer, Berlin, Germany.","DOI":"10.1007\/978-3-540-39864-6_4"},{"key":"key\n\t\t\t\t20171011222329_B20","unstructured":"Luo, B. and Jesshope, C. R. (2002) Performance of a microthreaded pipeline. In Proc. 7th Asia-Pacific Conf. Computer Systems Architecture, Melbourne, Victoria, Australia, January 28\u2013February 2, pp. 83\u201390. Australia Computer Society, Inc. Darlinghurst, Australia."},{"key":"key\n\t\t\t\t20171011222329_B21","doi-asserted-by":"crossref","unstructured":"Jesshope, C. R. (2001) Implementing an efficient vector instruction set in a chip multi-processor using micro-threaded pipelines. In Proc. ACSAC 2001, Gold Coast, Queensland, Australia, January 29\u201330, pp. 80\u201388. IEEE Computer Society, Los Alamitos, CA.","DOI":"10.1109\/ACAC.2001.903363"},{"key":"key\n\t\t\t\t20171011222329_B22","unstructured":"Zhou, H. and Conte, T. M. (2002) Code Size Efficiency in Global Scheduling for VLIW\/EPIC Style Embedded Processors. Technical Report, Department of Electrical and Computer Engineering, North Carolina State University, Raleigh, NC."},{"key":"key\n\t\t\t\t20171011222329_B23","unstructured":"Hwang, K. (1993) Advanced Computer Architecture. MIT and McGraw-Hill, New York, St Louis, San Francisco."},{"key":"key\n\t\t\t\t20171011222329_B24","doi-asserted-by":"crossref","unstructured":"Sudharsanan, S., Sriram, P., Frederickson, H. and Gulati, A. (2000) Image and video processing using MAJC 5200. In Proc. 2000 IEEE Int. Conf. Image Processing, Vancouver, BC, Canada, September 10\u201313, pp. 122\u2013125. IEEE Computer Society, Washington, DC.","DOI":"10.1109\/ICIP.2000.899310"},{"key":"key\n\t\t\t\t20171011222329_B25","doi-asserted-by":"crossref","unstructured":"Cintra, M. and Torrellas, J. (2002) Eliminating squashes through learning cross-thread violations in speculative parallelisation for multiprocessors. In Proc. 8th Int. Symp. High-Performance Computer Architecture, Boston, MA, February 2\u20136, pp. 43\u201354. IEEE Computer Society, Washington, DC.","DOI":"10.1109\/HPCA.2002.995697"},{"key":"key\n\t\t\t\t20171011222329_B26","doi-asserted-by":"crossref","unstructured":"Cintra, M. Martinez, J. S. and Torrellas, J. (2000) Architecture support for scalable speculative parallelization in sharedmemory multiprocessors. In Proc. Int. Symp. Computer Architecture, Vancouver, Canada, June 10\u201314, pp. 13\u201324. ACM Press, New York, NY.","DOI":"10.1145\/342001.363382"},{"key":"key\n\t\t\t\t20171011222329_B27","doi-asserted-by":"crossref","unstructured":"Terechko, A., Thenaff, E. L., Garg, M. J., Van Eijndhoven, J. V. and Corporaal, H. (2003) Inter-cluster communication models for clustered VLIW processors. In Proc. 9th Int. Symp. High-Performance Computer Architecture, Anaheim, CA, February 8\u201312, pp. 354\u2013364. IEEE Computer Society, Washington, DC.","DOI":"10.1109\/HPCA.2003.1183552"},{"key":"key\n\t\t\t\t20171011222329_B28","unstructured":"Halfhill, T. (1998) Inside IA-64. Byte Magaz., 23, 81\u201388."},{"key":"key\n\t\t\t\t20171011222329_B29","unstructured":"Schlansker,M. S. and Rau, B. R. (2000) EPIC: an architecture for instruction-level parallel processors. Compiler and Architecture Research, HPL-1999-111. HP Laboratories, Palo Alto."},{"key":"key\n\t\t\t\t20171011222329_B30","doi-asserted-by":"crossref","unstructured":"Sundararaman, K. and Franklin, M. (1997) Multiscalar execution along a single flow of control. In Proc. IEEE Int. Conf. Parallel Processing, Bloomington, IL, August 11\u201315, pp. 106\u2013113. IEEE Computer Society, Washington, DC.","DOI":"10.1109\/ICPP.1997.622568"},{"key":"key\n\t\t\t\t20171011222329_B31","doi-asserted-by":"crossref","unstructured":"Sohi, G. S., Breach, S. E. and Vijaykumar, T. N. (1995) Multiscalar processors. In Proc. 22nd Annual Int. Symp. Computer Architecture, S. Margherita Ligure, Italy, June 22\u201324, pp. 414\u2013425. ACM Press, New York, NY.","DOI":"10.1145\/223982.224451"},{"key":"key\n\t\t\t\t20171011222329_B32","doi-asserted-by":"crossref","unstructured":"Breach, S. E., Vijaykumar, T. N. and Sohi, G. S. (1994) The anatomy of the register file in a multiscalar processor. In Proc. 27th Int. Symp. Microarchitecture, San Jose, CA, November 30\u2013December 2, pp. 181\u2013190. ACM Press, New York, NY.","DOI":"10.1145\/192724.192750"},{"key":"key\n\t\t\t\t20171011222329_B33","doi-asserted-by":"crossref","unstructured":"Alverson, R., Callahan, D., Cummings, D., Koblenz, B., Porterfield, A. and Smith, B. (1990) The Tera computer system. In Proc. 4th Int. Conf. Supercomputing, Amsterdam, The Netherlands, June 11\u201315, pp. 1\u20136. ACMPress, New York, NY.","DOI":"10.1145\/77726.255132"},{"key":"key\n\t\t\t\t20171011222329_B34","doi-asserted-by":"crossref","unstructured":"Kongetira, P., Aingaran, K. and Olukotun, K. (2005) Niagara: 32-way multithreaded Sparc processor. IEEE Comput. Soc., 25, 21\u201329.","DOI":"10.1109\/MM.2005.35"},{"key":"key\n\t\t\t\t20171011222329_B35","unstructured":"Marr, D. T., Binns, F., Hill, D. L., Hinton, G., Koufaty, D. A. and Upton, M. (2002) Hyper-threading technology architecture and microarchitecture. Intel Technol. J., 6, 4\u201315."},{"key":"key\n\t\t\t\t20171011222329_B36","unstructured":"Emer, J. (1999) Simultaneous multithreading: multiple Alpha's performance. In Presentation at the Microprocessor Forum'99, MicroDesign Resources, San Jose, CA."},{"key":"key\n\t\t\t\t20171011222329_B37","doi-asserted-by":"crossref","unstructured":"Codrescu, L., Wills, D. S. and Meindl, J. D. (2001) Architecture of the Atlas Chip Multiprocessor: dynamically parallelising irregular applications. IEEE Comput. Soc., 50, 67\u201382.","DOI":"10.1109\/12.902753"},{"key":"key\n\t\t\t\t20171011222329_B38","unstructured":"Diefendorff, K. (1999) Power4 focuses on memory bandwidth: IBM confronts IA-64, says ISA not important. Microprocessor Rep., 13, 11\u201317."},{"key":"key\n\t\t\t\t20171011222329_B39","doi-asserted-by":"crossref","unstructured":"Preston, R. P. et al. (2002) Design of an 8-wide superscalar RISC microprocessor with simultaneous multithreading. In Proc. 2002 IEEE Int. Solid-State Circuits Conf., San Francisco, CA, February 4\u20136, pp. 334\u2013335. IEEE Solid-State Circuits, USA.","DOI":"10.1109\/ISSCC.2002.993068"},{"key":"key\n\t\t\t\t20171011222329_B40","unstructured":"Scott, L., Lee, L., Arends, J. and Moyer, B. (1998) Designing the low-power M-CORE architecture. In Proc. IEEE Power Driven Micro Architecture Workshop at ISCA98, Barcelona, Spain, June 28, pp. 145\u2013150."},{"key":"key\n\t\t\t\t20171011222329_B41","doi-asserted-by":"crossref","unstructured":"Park, I., Powell, M. D. and Vijaykumar, T. N. (2002) Reducing register ports for higher speed and lower energy. In Proc. 35th Annual ACM\/IEEE Int. Symp. Microarchitecture, Istanbul, Turkey, November 18\u201322, pp. 171\u2013182. IEEE Computer Society, Los Alamitos, CA.","DOI":"10.1109\/MICRO.2002.1176248"},{"key":"key\n\t\t\t\t20171011222329_B42","doi-asserted-by":"crossref","unstructured":"Kim, N. S. and Mudge, T. (2003) Reducing register ports using delayed write-back queues and operand pre-fetch. In Proc. 17th Annual Int. Conf. Supercomputing, San Francisco, CA, June 23\u201326, pp. 172\u2013182. ACM Press, New York, NY.","DOI":"10.1145\/782814.782839"},{"key":"key\n\t\t\t\t20171011222329_B43","doi-asserted-by":"crossref","unstructured":"Tseng, J. H. and Asanovic, K. (2003) Banked multiported register files for high-frequency superscalar microprocessors. In Proc. 30th Int. Symp. Computer Architecture, San Diego, CA, June 9\u201311, pp. 62\u201371. ACM Press, New York, NY.","DOI":"10.1145\/859618.859627"},{"key":"key\n\t\t\t\t20171011222329_B44","doi-asserted-by":"crossref","unstructured":"Bunchua, S., Wills, D. S. and Wills, L. M. (2003) Reducing operand transport complexity of superscalar processors using distributed register files. In Proc. 21st Int. Conf. Computer Design, San Jose, CA, October 13\u201315, pp. 532\u2013535. IEEE Computer Society, Los Alamitos, CA.","DOI":"10.1109\/ICCD.2003.1240951"},{"key":"key\n\t\t\t\t20171011222329_B45","doi-asserted-by":"crossref","unstructured":"Bolychevsky, A., Jesshope, C. R. and Muchnick, V. (1996) Dynamic Scheduling in RISC Architectures. IEE Proc. Comput. Digit. Tech., 143, 309\u2013317.","DOI":"10.1049\/ip-cdt:19960788"},{"key":"key\n\t\t\t\t20171011222329_B46","doi-asserted-by":"crossref","unstructured":"Jesshope, C. R. (2005) Micro-grids\u2014the exploitation of massive on-chip concurrency. In Proc. HPC Workshop 2004, Grid Computing: A New Frontier of High Performance Computing, L. Grandinetti (ed.), Cetraro, Italy, May 31\u2013June 3. Elsevier, Amsterdam.","DOI":"10.1016\/S0927-5452(05)80012-7"},{"key":"key\n\t\t\t\t20171011222329_B47","doi-asserted-by":"crossref","unstructured":"Bousias, K. and Jesshope, C. R. (2005) The challenges of massive on-chip concurrency. Tenth Asia-Pacific Computer Systems Architecture Conference, Singapore, October 24\u201326. LNCS3740, pp. 157\u2013170. Springer-Verlag.","DOI":"10.1007\/11572961_14"},{"key":"key\n\t\t\t\t20171011222329_B48","unstructured":"Shapiro, D. (1984) Globally Asynchronous Locally Synchronous Circuits. PhD Thesis, Report No. STAN-CS-84-1026, Stanford University."},{"key":"key\n\t\t\t\t20171011222329_B49","doi-asserted-by":"crossref","unstructured":"Shengxian, Z., Li, W., Carlsson, J., Palmkvist, K. and Wanhammar, L. (2002) An asynchronous wrapper with novel handshake circuits for GALS systems. In Proc. IEEE 2002 Int. Conf. Communication, Circuits and Systems, Cheungdu, China, June 29\u2013July 1, pp. 1521\u20131525. IEEE Society, CA.","DOI":"10.1109\/ICCCAS.2002.1179067"}],"container-title":["The Computer Journal"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/comjnl\/article-pdf\/49\/2\/211\/1199564\/bxh157.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,6]],"date-time":"2025-01-06T16:46:44Z","timestamp":1736182004000},"score":1,"resource":{"primary":{"URL":"http:\/\/academic.oup.com\/comjnl\/article\/49\/2\/211\/436583\/Instruction-Level-Parallelism-through"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2005,12,19]]},"references-count":49,"journal-issue":{"issue":"2","published-online":{"date-parts":[[2005,12,19]]},"published-print":{"date-parts":[[2006,3,1]]}},"URL":"https:\/\/doi.org\/10.1093\/comjnl\/bxh157","relation":{},"ISSN":["1460-2067","0010-4620"],"issn-type":[{"type":"electronic","value":"1460-2067"},{"type":"print","value":"0010-4620"}],"subject":[],"published-other":{"date-parts":[[2006,3]]},"published":{"date-parts":[[2005,12,19]]}}}