{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T05:27:48Z","timestamp":1747805268443},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2006,12,1]],"date-time":"2006-12-01T00:00:00Z","timestamp":1164931200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2006,12]]},"DOI":"10.1007\/s11227-006-8321-2","type":"journal-article","created":{"date-parts":[[2006,10,4]],"date-time":"2006-10-04T10:15:25Z","timestamp":1159956925000},"page":"237-259","source":"Crossref","is-referenced-by-count":9,"title":["Design and evaluation of a hierarchical decoupled architecture"],"prefix":"10.1007","volume":"38","author":[{"given":"Won W.","family":"Ro","sequence":"first","affiliation":[]},{"given":"Stephen P.","family":"Crago","sequence":"additional","affiliation":[]},{"given":"Alvin M.","family":"Despain","sequence":"additional","affiliation":[]},{"given":"Jean-Luc","family":"Gaudiot","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"8321_CR1","doi-asserted-by":"crossref","unstructured":"Agarwal V, Hrishikesh MS, Keckler SW, Burger D (2000) Clock rate versus IPC: The end of the road for conventional microarchitectures. In: Proceedings of the 27th International Symposium on Computer Architecture","DOI":"10.1145\/339647.339691"},{"key":"8321_CR2","doi-asserted-by":"crossref","unstructured":"Annavaram M, Patel JM, Davidson ES (2001) Data prefetching by dependence graph precoumputation. In: Proceedings of the 28th International Symposium on Computer Architecture","DOI":"10.1145\/379240.379251"},{"key":"8321_CR3","doi-asserted-by":"crossref","unstructured":"Bird P, Rawsthorne A, Topham N (1993) The effectiveness of decoupling. In: Proc. of Int. Conf. on Supercomputing, pages","DOI":"10.1145\/165939.165952"},{"key":"8321_CR4","doi-asserted-by":"crossref","unstructured":"Burger D, Austin T (1997) The SimpleScalar Tool Set. Version 2.0. Technical Report CS-TR-97-1342, University of Wisconsin-Madison","DOI":"10.1145\/268806.268810"},{"key":"8321_CR5","doi-asserted-by":"crossref","unstructured":"Burns J, Gaudiot J-L (2002) SMT layout overhead and scalability. Transactions on Parallel and Distributed Processing Systems 13(2)","DOI":"10.1109\/71.983942"},{"key":"8321_CR6","doi-asserted-by":"crossref","unstructured":"Chappell R, Stark J, Kim S, Reinhardt S, Patt Y (1999) Simultaneous subordinate microthreading (SSMT). In: Proceedings of the 26th Annual International Symposium on Computer Architecture","DOI":"10.1109\/ISCA.1999.765950"},{"issue":"5","key":"8321_CR7","doi-asserted-by":"crossref","first-page":"609","DOI":"10.1109\/12.381947","volume":"44","author":"T-F Chen","year":"1995","unstructured":"Chen T-F, Baer J-L (1995) Effective hardware-based data prefetching for high-performance processors. IEEE Transactions on Computers 44(5):609\u2013623","journal-title":"IEEE Transactions on Computers"},{"key":"8321_CR8","unstructured":"Crago S, Despain A, Gaudiot J-L, Makhija M, Ro W, Srivastava A (2000) A high-performance, hierarchical decoupled architecture. In: Proceedings of MEDEA Workshop"},{"key":"8321_CR9","doi-asserted-by":"crossref","unstructured":"Collins JD, Wang H, Tullsen DM, Hughes C, Lee Y-F, Lavery D, Shen JP (2001) Speculative precomputation: long-range prefetching of delinquent loads. In: Proceedings of the 28th International Symposium on Computer Architecture","DOI":"10.1145\/379240.379248"},{"key":"8321_CR10","doi-asserted-by":"crossref","unstructured":"Collins JD, Tullsen DM, Wang H, Shen JP (2001) Dynamic speculative precomputation. In: Proceedings of the 34th Annual International Symposium on Microarchitecture","DOI":"10.1109\/MICRO.2001.991128"},{"key":"8321_CR11","unstructured":"Dubois M, Song Y (1998) Assisted execution. Technical Report CENG #98-25, Department of EE-Systems, University of Southern California"},{"key":"8321_CR12","doi-asserted-by":"crossref","unstructured":"Eggers S, Emer J, Levy H, Lo J, Stamm R, Tullsen D (1997) Simultaneous multithreading: A platform for next-generation processors, IEEE Micro","DOI":"10.1109\/40.621209"},{"key":"8321_CR13","doi-asserted-by":"crossref","unstructured":"Farkas KI, Chow P, Jouppi NP, Vranesic Z (1997) The multicluster architecture: reducing cycle time through partitioning. In: Proceedings of the 30th Annual. IEEE\/ACM Symposium on Microarchitecture","DOI":"10.1109\/MICRO.1997.645806"},{"key":"8321_CR14","doi-asserted-by":"crossref","unstructured":"Farrens M, Nico P, Ng P (1993) A comparison of superscalar and decoupled access\/execute architectures. In: Proceedings of the 26th Annual International Symposium on Microarchitecture","DOI":"10.1109\/MICRO.1993.282746"},{"key":"8321_CR15","doi-asserted-by":"crossref","unstructured":"Goodman JR, Hsieh JT, Liou K, Pleszkun AR, Schechter PB, Young HC (1985) PIPE: a vlsi decoupled architecture. In: Proceedings the 12th International Symposium on Computer Architecture","DOI":"10.1145\/327070.327117"},{"key":"8321_CR16","doi-asserted-by":"crossref","unstructured":"Hong SI, McKee SA, Salinas MH, Klenke RH, Aylor JH, Wulf WA (1999) Access order and effective bandwidth for streams on a direct rambus memory. In: Proceedings of the 5th International Symposium on High-Performance Computer Architecture","DOI":"10.1109\/HPCA.1999.744337"},{"key":"8321_CR17","doi-asserted-by":"crossref","unstructured":"Jones GP, Topham NP (1997) A comparison of data prefetching on an access decoupled and superscalar machine. In: Proceedings of the 30th International Symposium on Microarchitecture","DOI":"10.1109\/MICRO.1997.645798"},{"key":"8321_CR18","doi-asserted-by":"crossref","unstructured":"Kavi KM, Arul J, Giorgi R (2000) Execution and cache performance of the scheduled dataflow architecture. Journal of Universal Computer Science, Special Issue on Multithreaded and Chip Multiprocessors","DOI":"10.1142\/9789812792037_0011"},{"key":"8321_CR19","doi-asserted-by":"crossref","unstructured":"Krishnan V, Torrellas J (1999) A chip-multiprocessor architecture with speculative multithreading. IEEE Trans Comput 48(9)","DOI":"10.1109\/12.795218"},{"key":"8321_CR20","doi-asserted-by":"crossref","unstructured":"Kurian L, Hulina PT, Coraor LD (1994) Memory latency effects in decoupled architectures. IEEE Trans Comput 43(10)","DOI":"10.1109\/12.324539"},{"key":"8321_CR21","doi-asserted-by":"crossref","unstructured":"Luk C-K, Mowry TC (1996) Compiler based prefetching for recursive data structures. In: Proceedings of the 7th International Conference on Architectural Support for Programming Languages and Operating Systems","DOI":"10.1145\/237090.237190"},{"key":"8321_CR22","doi-asserted-by":"crossref","unstructured":"Luk C-K (2001) Tolerating memory latency through software-controlled pre-execution in simultaneous multithreading processor. In: Proceedings of the 28th International Symposium on Computer Architecture","DOI":"10.1145\/379240.379250"},{"key":"8321_CR23","doi-asserted-by":"crossref","unstructured":"Andreas Moshovos, Dionisios Pnevmatikatos N, Amirali Baniasadi (2001) Slice-processors: An implementation of operation-based prediction. In: Proceedings of the 15th international conference on Supercomputing","DOI":"10.1145\/377792.377856"},{"key":"8321_CR24","doi-asserted-by":"crossref","unstructured":"Palacharla S, Jouppi NP, Smith JE (1997) Complexity-effective superscalar processors. In: Proceedings of the 24th International Symposium on Computer Architecture","DOI":"10.1145\/264107.264201"},{"key":"8321_CR25","doi-asserted-by":"crossref","unstructured":"Parcerisa J-M, Gonz\u00e1lez A (1999) The synergy of multithreading and access\/execute decoupling. In: Proceedings of the 5th International Symposium on High-Performance Computer Architecture","DOI":"10.1109\/HPCA.1999.744329"},{"key":"8321_CR26","doi-asserted-by":"crossref","unstructured":"Patterson D, Anderson T, Cardwell N, Fromm R, Keeton K, Kozyrakis C, Thomas R, Yelick K (1997) A case for intelligent DRAM: IRAM. IEEE Micro","DOI":"10.1109\/40.592312"},{"key":"8321_CR27","doi-asserted-by":"crossref","unstructured":"Ro WW, Gaudiot J-L, Crago SP, Despain AM (2003) HiDISC: A decoupled architecture for data-intensive applications. In: Proceedings of the 17th International Parallel and Distributed Processing Symposium (IPDPS 2003), Nice, France","DOI":"10.1109\/IPDPS.2003.1213076"},{"key":"8321_CR28","doi-asserted-by":"crossref","unstructured":"Roth A, Moshovos A, Sohi GS (1998) Dependence based prefetching for linked data structures. In: Proceedings of the 8th International Conference on Architectural Support for Programming Languages and Operating Systems","DOI":"10.1145\/291069.291034"},{"key":"8321_CR29","unstructured":"Roth A, Zilles CB, Sohi GS (2000) Speculative miss\/execute decoupling. In: Proceedings of MEDEA Workshop"},{"key":"8321_CR30","doi-asserted-by":"crossref","unstructured":"Roth A, Sohi GS (2001) Speculative data-driven multithreading. In: Proceedings of the 7th International Symposium on High-Performance Computer Architecture","DOI":"10.1109\/HPCA.2001.903250"},{"key":"8321_CR31","doi-asserted-by":"crossref","unstructured":"Smith J (1982) Decoupled access\/execute computer architecture. In: Proceedings of the 9th International Symposium on Computer Architecture","DOI":"10.1145\/1067649.801719"},{"key":"8321_CR32","doi-asserted-by":"crossref","unstructured":"Smith J (1989) Dynamic instruction scheduling and the astronautics ZS-1. IEEE Computer","DOI":"10.1109\/2.30730"},{"key":"8321_CR33","doi-asserted-by":"crossref","unstructured":"Sohi GS, Breach SE, Vijaykumar TN (1995) Multiscalar processors. In: Proceedings of the 22nd Annual International Symposium on Computer Architecture","DOI":"10.1145\/223982.224451"},{"key":"8321_CR34","doi-asserted-by":"crossref","unstructured":"Tyson G, Farrens M, Pleszkun A (1992) MISC: A multiple instruction stream computer. In: Proceedings of the 25th Annual International Symposium on Microarchitecture","DOI":"10.1145\/144965.145800"},{"key":"8321_CR35","doi-asserted-by":"crossref","unstructured":"Wulf WA (1992) Evaluation of the WM architecture. In: Proceedings of the 19th International Symposium on Computer Architecture","DOI":"10.1145\/139669.140402"},{"key":"8321_CR36","unstructured":"Zhang Y, Adams GB III (1998) performance modeling and code partitioning for the DS achitecture. In: Proceedings of the 25th Annual International Symposium on Computer Architecture"},{"key":"8321_CR37","unstructured":"Zhang Y, Adams GB III (1996) Exploiting instruction level parallelism with the ds architecture. In: Proceedings of the 1996 International Conference on Parallel Processing"},{"key":"8321_CR38","doi-asserted-by":"crossref","unstructured":"Zilles CB, Sohi GS (2000) Understanding the backward slices of performance degrading instructions. In: Proceedings of the 27th International Symposium on Computer Architecture","DOI":"10.1145\/339647.339676"},{"key":"8321_CR39","unstructured":"Data-intensive systems benchmarks suite analysis and specification. http:\/\/www.aaec.com\/projectweb\/dis\/"},{"key":"8321_CR40","unstructured":"DIS Stressmark Suite. http:\/\/www.aaec.com\/projectweb\/dis\/DIS_Stressmarks_v1_0.pdf"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-006-8321-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-006-8321-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-006-8321-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,9]],"date-time":"2023-05-09T02:09:47Z","timestamp":1683598187000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-006-8321-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006,12]]},"references-count":40,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2006,12]]}},"alternative-id":["8321"],"URL":"https:\/\/doi.org\/10.1007\/s11227-006-8321-2","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2006,12]]}}}