{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,5]],"date-time":"2025-05-05T01:10:01Z","timestamp":1746407401959,"version":"3.40.4"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2014,10,5]],"date-time":"2014-10-05T00:00:00Z","timestamp":1412467200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2015,2]]},"DOI":"10.1007\/s11227-014-1301-z","type":"journal-article","created":{"date-parts":[[2014,10,4]],"date-time":"2014-10-04T13:27:08Z","timestamp":1412429228000},"page":"391-447","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Enabling energy-proportional computing on instruction-level parallel processors"],"prefix":"10.1007","volume":"71","author":[{"given":"Yung-Cheng","family":"Ma","sequence":"first","affiliation":[]},{"given":"Wen-Shih","family":"Chao","sequence":"additional","affiliation":[]},{"given":"Tse-An","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,10,5]]},"reference":[{"unstructured":"Texas Instruments (2011) OMAP 5 mobile applications platform. http:\/\/www.ti.com\/pdfs\/wtbu\/swct010.pdf","key":"1301_CR1"},{"unstructured":"Philips (2011) Philips nexperia\u2014highly integrated programmable system-on-chip. http:\/\/www.semiconductors.philips.com\/products\/nexperia","key":"1301_CR2"},{"unstructured":"St Nomadik (2011) ST Nomadik multimedia processor. http:\/\/www.st.com\/stonline\/prodpres\/dedicate\/proc\/proc.htm","key":"1301_CR3"},{"unstructured":"Qualcomm Inc. (2010) Snapdragon mobile processors and chipsets. http:\/\/www.qualcomm.com\/snapdragon","key":"1301_CR4"},{"doi-asserted-by":"crossref","unstructured":"Lambrechts A, Raghavan P, Leroy A, Talavera G, Aa T, Jayapala M, Catthoor F, Verkest D, Deconinck G, Corporaal H, Robert F, Carrabina J (2005) Power breakdown analysis for a heterogeneous NoC platform running a video application. In: Proceedings of 16th IEEE international conference on application-specific systems, architecture processors (ASAP\u201905), pp 179\u2013184","key":"1301_CR5","DOI":"10.1109\/ASAP.2005.52"},{"doi-asserted-by":"crossref","unstructured":"Zyuban V, Kogge P (1998) The energy complexity of register files. In: Proceedings of the 1998 international symposium on low power electronics and design (ISLPED\u201998). ACM, New York, pp 305\u2013310","key":"1301_CR6","DOI":"10.1145\/280756.280943"},{"unstructured":"Texas Instruments (2005) TMS320C6455 fixed-point digital signal processor. http:\/\/www.ti.com.cn\/cn\/lit\/ds\/symlink\/tms320c6455.pdf","key":"1301_CR7"},{"unstructured":"Analog Devices (2010) Getting started with Blackfin processors. http:\/\/www.analog.com\/static\/imported-files\/processor_manuals\/GettingStartedwithBlackfinProcessors.pdf","key":"1301_CR8"},{"unstructured":"Freescale Semiconductor (2008) Tuning C code for StarCore-based digital signal processors. http:\/\/cache.freescale.com\/files\/dsp\/doc\/app_note\/an3357.pdf","key":"1301_CR9"},{"doi-asserted-by":"crossref","unstructured":"Terechko AS, Corporaal H (2007) Inter-cluster communication in VLIW architectures. ACM Trans Archit Code Optim 4(2):11","key":"1301_CR10","DOI":"10.1145\/1250727.1250731"},{"doi-asserted-by":"crossref","unstructured":"Wang M, Wang Y, Liu D, Qin Z, Shao Z (2010) Compiler-assisted leakage-aware loop scheduling for embedded VLIW DSP processors. J Syst Softw 83(5):772\u2013785","key":"1301_CR11","DOI":"10.1016\/j.jss.2009.11.727"},{"doi-asserted-by":"crossref","unstructured":"Nagpal R, Srikant Y (2011) Compiler-assisted power optimization for clustered VLIW architectures. Parallel Comput 37(1):42\u201359","key":"1301_CR12","DOI":"10.1016\/j.parco.2010.08.005"},{"unstructured":"Industry Technology Roadmap for Semiconductors (2010) ITRS report 2010. http:\/\/www.itrs.net\/links\/2010itrs\/home2010.htm","key":"1301_CR13"},{"doi-asserted-by":"crossref","unstructured":"Shin Y, Seomun J, Choi KM, Sakurai T (2010) Power gating: circuits, design methodologies, and best practice for standard-cell VLSI designs. ACM Trans Des Autom Electron Syst 15:28:1\u201328:37","key":"1301_CR14","DOI":"10.1145\/1835420.1835421"},{"doi-asserted-by":"crossref","unstructured":"Kim HS, Vijaykrishnan N, Kandemir M, Irwin MJ (2003) Adapting instruction level parallelism for optimizing leakage in VLIW architectures. In: Proceedings of the 2003 ACM SIGPLAN conference on Language, compiler, and tool for embedded systems (LCTES\u201903). ACM, New York, pp 275\u2013283","key":"1301_CR15","DOI":"10.1145\/780732.780770"},{"doi-asserted-by":"crossref","unstructured":"You Y-P, Lee C, Lee JK (2006) Compilers for leakage power reduction. ACM Trans Des Autom Electron Syst 11(1):147\u2013164","key":"1301_CR16","DOI":"10.1145\/1124713.1124723"},{"doi-asserted-by":"crossref","unstructured":"You Y-P, Huang C-W, Lee JK (2007) Compilation for compact power-gating controls. ACM Trans Des Autom Electron Syst 12(4):51","key":"1301_CR17","DOI":"10.1145\/1278349.1278364"},{"doi-asserted-by":"crossref","unstructured":"Venkatachalam V, Franz M (2005) Power reduction techniques for microprocessor systems. ACM Comput Surv 37(3):195\u2013237","key":"1301_CR18","DOI":"10.1145\/1108956.1108957"},{"doi-asserted-by":"crossref","unstructured":"Barroso L, Holzle U (2007) The case for energy-proportional computing. Computer 40:33\u201337","key":"1301_CR19","DOI":"10.1109\/MC.2007.443"},{"doi-asserted-by":"crossref","unstructured":"Cameron KW (2010) The challenges of energy-proportional computing. Computer 43:82\u201383","key":"1301_CR20","DOI":"10.1109\/MC.2010.145"},{"unstructured":"Ma YC, Liu TA, Chao WS (2013) Energy-aware compiler optimization for VLIW-DSP cores. In: Pan JS, Yang CN, Lin CC (eds) Advances in intelligent systems and applications of smart innovation, systems and technologies. Springer, Berlin. 21: 779\u2013788","key":"1301_CR21"},{"unstructured":"Taiwan Semiconductor Manufacturing Company (2011) 40nm technology. http:\/\/www.tsmc.com\/english\/dedicatedfoundry\/technology\/40nm.htm","key":"1301_CR22"},{"unstructured":"Fisher JA, Faraboschi P, Young C (2005) Embedded computing: a VLIW approach to architecture, compilers, and tools. Elsevier, London","key":"1301_CR23"},{"unstructured":"Aho AV, Lam MS, Sethi R, Ullman JD (2007) Compilers: principles, techniques, and tools, 2\/e. Addison Wesley, USA","key":"1301_CR24"},{"doi-asserted-by":"crossref","unstructured":"Chaitin GJ (1982) Register allocation & spilling via graph coloring. In: Proceedings of the 1982 SIGPLAN symposium on Compiler construction (SIGPLAN\u201982). ACM, New York, pp 98\u2013105","key":"1301_CR25","DOI":"10.1145\/800230.806984"},{"doi-asserted-by":"crossref","unstructured":"Faraboschi P, Brown G, Fisher JA, Desoli G, Homewood F (2000) Lx: a technology platform for customizable vliw embedded processing. In: Proceedings of the 27th annual internationalsymposium on computer architecture (ISCA\u201900). ACM, New York, pp 203\u2013213","key":"1301_CR26","DOI":"10.1145\/342001.339682"},{"doi-asserted-by":"crossref","unstructured":"Lin Y-C, Lu CH, Wu C-J, Tang C-L, You Y-P, Moo Y-C, Lee J-K (2008) Effective code generation for distributed and ping-pong register files: a case study on PAC VLIW DSP cores. J Signal Process Syst 51:269\u2013288","key":"1301_CR27","DOI":"10.1007\/s11265-007-0059-4"},{"doi-asserted-by":"crossref","unstructured":"Dally W, Balfour J, Black-Shaffer D, Chen J, Harting R, Parikh V, Park J, Sheffield D (2008) Efficient embedded computing. Computer 41(7):27\u201332","key":"1301_CR28","DOI":"10.1109\/MC.2008.224"},{"doi-asserted-by":"crossref","unstructured":"Aleta A, Codina JM, Gonzalez A, Kaeli D (2007) Heterogeneous clustered vliw microarchitectures. In: Proceedings of the international symposium on code generation and optimization (CGO\u201907). IEEE Computer Society, Washington, DC, pp 354\u2013366","key":"1301_CR29","DOI":"10.1109\/CGO.2007.15"},{"doi-asserted-by":"crossref","unstructured":"Kailas K, Franklin M, Ebcioglu K (2002) A register file architecture and compilation scheme for clustered ilp processors. In: Proceedings of the 8th international euro-par conference on parallel processing (Euro-Par\u201902). Springer, London, pp 500\u2013511","key":"1301_CR30","DOI":"10.1007\/3-540-45706-2_68"},{"doi-asserted-by":"crossref","unstructured":"Akturan C, Jacome MF (2001) Caliber: a software pipelining algorithm for clustered embedded VLIW processors. In: Proceedings of the 2001 IEEE\/ACM international conference on Computer-aided design (ICCAD\u201901). IEEE Press, Piscataway, pp 112\u2013118","key":"1301_CR31","DOI":"10.1109\/ICCAD.2001.968606"},{"doi-asserted-by":"crossref","unstructured":"Qian Y, Carr S, Sweany P (2002) Loop fusion for clustered vliw architectures. In: Proceedings of the joint conference on languages, compilers and tools for embedded systems: software and compilers for embedded systems (LCTES\/SCOPES\u201902). ACM, New York, pp 112\u2013119","key":"1301_CR32","DOI":"10.1145\/513829.513850"},{"unstructured":"Qian Y, Carr S, Sweany PH (2002) Optimizing loop performance for clustered vliw architectures. In: Proceedings of the 2002 International conference on parallel architectures and compilation techniques (PACT\u201902). IEEE Computer Society, Washington, DC, pp 271\u2013280","key":"1301_CR33"},{"doi-asserted-by":"crossref","unstructured":"Zalamea J, Llosa J, Ayguad\u00e9 E, Valero M (2001) Modulo scheduling with integrated register spilling for clustered vliw architectures. In: Proceedings of the 34th annual ACM\/IEEE international symposium on Microarchitecture (MICRO 34). IEEE Computer Society, Washington, DC, pp 160\u2013169","key":"1301_CR34","DOI":"10.1109\/MICRO.2001.991115"},{"unstructured":"Wen M, Wu N, Guan M, Zhang C (2008) Load scheduling: reducing pressure on distributed register files for free. In Proceedings of the 2008 Asia and South Pacific design automation conference (ASP DAC\u201908). IEEE Computer Society Press, Los Alamitos, pp 340\u2013345","key":"1301_CR35"},{"doi-asserted-by":"crossref","unstructured":"Wu C-J, Lin Y-T, Lee J-K (2012) Instruction scheduling methods and phase ordering framework for VLIW DSP processors with distributed register files. J Supercomput 61(3):1024\u20131047","key":"1301_CR36","DOI":"10.1007\/s11227-011-0671-8"},{"doi-asserted-by":"crossref","unstructured":"Shieh W-Y, Wang B-S (2012) Power-aware register assignment for large register file design. J Supercomput 61(3):719\u2013742","key":"1301_CR37","DOI":"10.1007\/s11227-011-0633-1"},{"doi-asserted-by":"crossref","unstructured":"Won HS, Kim KS, Jeong KO, Park KT, Choi KM, Kong JT (2003) MTCMOS design methodology and its application to mobile computing. In: Proceedings of the international symposium on low power electronics and design (ISLPED\u201903), pp 110\u2013115","key":"1301_CR38","DOI":"10.1145\/871506.871536"},{"doi-asserted-by":"crossref","unstructured":"Roy S, Ranganathan N, Katkoori S (2009) A framework for power-gating functional units in embedded microprocessors, IEEE Trans Very Large Scale Integr (VLSI) Syst 17(11):1640\u20131649","key":"1301_CR39","DOI":"10.1109\/TVLSI.2008.2005774"},{"unstructured":"Morgan R (1998) Building an optimizing compiler. Butterworth-Heinemann, London","key":"1301_CR40"},{"unstructured":"Hochbaum DS (1995) Approximation Algorithms for NP-Hard problems. International Thomson Publishing","key":"1301_CR41"},{"doi-asserted-by":"crossref","unstructured":"Guthaus M, Ringenberg J, Ernst D, Austin T, Mudge T, Brown R (2001) Mibench: a free, commercially representative embedded benchmark suite, In: Proceedings of the IEEE international workshop on workload characterization (WWC-4), pp 3\u201314","key":"1301_CR42","DOI":"10.1109\/WWC.2001.990739"},{"doi-asserted-by":"crossref","unstructured":"Chiu J-C, Yang K-M (2010) A novel instruction stream buffer for VLIW architectures. Comput Electr Eng 36(1):190\u2013198","key":"1301_CR43","DOI":"10.1016\/j.compeleceng.2009.08.006"},{"doi-asserted-by":"crossref","unstructured":"Lattner C, Adve V (2004) LLVM: a compilation framework for lifelong program analysis and transformation. In: Proceedings of the international symposium on code generation and optimization: feedbackdirected and runtime optimization (CGO\u201904). IEEE Computer Society, Washington, DC, p 75","key":"1301_CR44","DOI":"10.1109\/CGO.2004.1281665"},{"unstructured":"Keating M (2007) Low power methodology manual for system-on-chip design. Springer, New York","key":"1301_CR45"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-014-1301-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-014-1301-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-014-1301-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,5]],"date-time":"2025-05-05T00:36:51Z","timestamp":1746405411000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-014-1301-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,10,5]]},"references-count":45,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2015,2]]}},"alternative-id":["1301"],"URL":"https:\/\/doi.org\/10.1007\/s11227-014-1301-z","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"type":"print","value":"0920-8542"},{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2014,10,5]]}}}