{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,27]],"date-time":"2025-05-27T08:29:08Z","timestamp":1748334548309,"version":"3.30.1"},"reference-count":174,"publisher":"Elsevier BV","issue":"13-14","license":[{"start":{"date-parts":[[1999,12,1]],"date-time":"1999-12-01T00:00:00Z","timestamp":944006400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Parallel Computing"],"published-print":{"date-parts":[[1999,12]]},"DOI":"10.1016\/s0167-8191(99)00086-1","type":"journal-article","created":{"date-parts":[[2003,4,25]],"date-time":"2003-04-25T08:06:40Z","timestamp":1051258000000},"page":"1741-1783","source":"Crossref","is-referenced-by-count":24,"title":["Compilation techniques for parallel systems"],"prefix":"10.1016","volume":"25","author":[{"given":"Rajiv","family":"Gupta","sequence":"first","affiliation":[]},{"given":"Santosh","family":"Pande","sequence":"additional","affiliation":[]},{"given":"Kleanthis","family":"Psarris","sequence":"additional","affiliation":[]},{"given":"Vivek","family":"Sarkar","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"doi-asserted-by":"crossref","unstructured":"V. Adve, J. Mellor-Crummey, Using integer sets for data-parallel analysis and optimization, in: ACM SIGPLAN Conference on Programming Language Design and Implementation, Montreal, Canada, 1998, pp. 186\u2013198","key":"10.1016\/S0167-8191(99)00086-1_BIB1","DOI":"10.1145\/277650.277721"},{"issue":"9","key":"10.1016\/S0167-8191(99)00086-1_BIB2","doi-asserted-by":"crossref","first-page":"943","DOI":"10.1109\/71.466632","article-title":"Automatic partitioning of parallel loops and data arrays for distributed shared-memory multiprocessors","volume":"6","author":"Agarwal","year":"1995","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"issue":"7","key":"10.1016\/S0167-8191(99)00086-1_BIB3","doi-asserted-by":"crossref","first-page":"609","DOI":"10.1109\/71.707537","article-title":"Interprocedural partial redundancy elimination with application to distributed memory compilation","volume":"9","author":"Agrawal","year":"1998","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"unstructured":"A. Aho, R. Sethi, J. Ullman, Compilers: Principles, Techniques and Tools, Addison-Wesley, Reading, MA, 1988","key":"10.1016\/S0167-8191(99)00086-1_BIB4"},{"issue":"5","key":"10.1016\/S0167-8191(99)00086-1_BIB5","doi-asserted-by":"crossref","first-page":"584","DOI":"10.1109\/32.6136","article-title":"A development environment for horizontal microcode","volume":"14","author":"Aiken","year":"1988","journal-title":"IEEE Transactions on Software Engineering"},{"issue":"4","key":"10.1016\/S0167-8191(99)00086-1_BIB6","doi-asserted-by":"crossref","first-page":"491","DOI":"10.1145\/29873.29875","article-title":"Automatic translation of FORTRAN programs to vector form","volume":"9","author":"Allen","year":"1987","journal-title":"ACM Transactions on Programming Languages and Systems"},{"unstructured":"R. Allen, K. Kennedy, Vector register allocation, Technical Report TR86-45, Rice University, Houston, TX, December 1986","key":"10.1016\/S0167-8191(99)00086-1_BIB7"},{"unstructured":"E.R. Altman, Optimal software pipelining with functional unit and register constraints, Ph.D. Thesis, McGill University, Montreal, Quebec, 1995","key":"10.1016\/S0167-8191(99)00086-1_BIB8"},{"unstructured":"S.P. Amarasinghe, Parallelizing Compiler Techniques Based on Linear Inequalities, Computer Systems Laboratory, Stanford University, January 1997","key":"10.1016\/S0167-8191(99)00086-1_BIB9"},{"doi-asserted-by":"crossref","unstructured":"S.P. Amarasinghe, M.S. Lam, Communication optimization and code generation for distributed memory machines, in: Proceedings ACM SIGPLAN'93 Conference on Programming Language Design and Implementation, Albuquerque, New Mexico, June 1993","key":"10.1016\/S0167-8191(99)00086-1_BIB10","DOI":"10.1145\/155090.155102"},{"doi-asserted-by":"crossref","unstructured":"C. Ancourt, F. Irigoin, Scanning polyhedra with do loops, in: Proceedings of the Third ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, Williamsburg, VA, April 1991","key":"10.1016\/S0167-8191(99)00086-1_BIB11","DOI":"10.1145\/109625.109631"},{"issue":"1","key":"10.1016\/S0167-8191(99)00086-1_BIB12","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1155\/1997\/195689","article-title":"A linear algebra framework for static HPF code distribution","volume":"6","author":"Ancourt","year":"1997","journal-title":"Scientific Programming"},{"doi-asserted-by":"crossref","unstructured":"J.M. Anderson, M.S. Lam, Global optimizations for parallelism and locality on scalable parallel machines, in: Proceedings of the ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI), June 1993, pp. 112\u2013125","key":"10.1016\/S0167-8191(99)00086-1_BIB13","DOI":"10.1145\/155090.155101"},{"doi-asserted-by":"crossref","unstructured":"G. Araujo, S. Malik, M. Lee, Using register-transfer paths in code generation for heterogeneous memory-register architectures, in: Proceedings of the 33rd ACM\/IEEE Design Automation Conference, 1996, pp. 591\u2013596","key":"10.1016\/S0167-8191(99)00086-1_BIB14","DOI":"10.1145\/240518.240630"},{"unstructured":"G. Araujo, A. Sudarsanam, S. Malik, Instruction set design and optimization for address computation in DSP architectures, in: Proceedings of the Ninth International Symposium on System Synthesis, 1997, pp. 31\u201337","key":"10.1016\/S0167-8191(99)00086-1_BIB15"},{"unstructured":"K. Arnold, J. Gosling, The Java Programming Language, Addison-Wesley, Reading, MA, 1996","key":"10.1016\/S0167-8191(99)00086-1_BIB16"},{"doi-asserted-by":"crossref","unstructured":"D.I. August, W.W. Hwu, S.A. Mahlke, A framework for balancing control flow and predication, in: Proceedings of the 30th Annual International Symposium on Microarchitecture, Research Triangle Park, North Carolina, December 1997, pp. 92\u2013103","key":"10.1016\/S0167-8191(99)00086-1_BIB17","DOI":"10.1109\/MICRO.1997.645801"},{"doi-asserted-by":"crossref","unstructured":"U. Banerjee, Dependence Analysis for Supercomputing, Kluwer Academic Publishers, Norwell, MA, 1988","key":"10.1016\/S0167-8191(99)00086-1_BIB18","DOI":"10.1007\/978-1-4684-6894-6"},{"doi-asserted-by":"crossref","unstructured":"U. Banerjee, Loop Transformations for Restructuring Compilers: The Foundations, Kluwer Academic Publishers, Boston, MA, 1993","key":"10.1016\/S0167-8191(99)00086-1_BIB19","DOI":"10.1007\/b102311"},{"issue":"10","key":"10.1016\/S0167-8191(99)00086-1_BIB20","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1109\/2.467577","article-title":"The PARADIGM compiler for distributed-memory multicomputers","volume":"28","author":"Banerjee","year":"1995","journal-title":"IEEE Computer"},{"issue":"2","key":"10.1016\/S0167-8191(99)00086-1_BIB21","doi-asserted-by":"crossref","first-page":"101","DOI":"10.1002\/spe.4380220202","article-title":"Optimizing stack frame accesses for processors with restricted addressing modes","volume":"22","author":"Bartley","year":"1992","journal-title":"Software Practice and Experience"},{"doi-asserted-by":"crossref","unstructured":"D. Bau, I. Koduklula, V. Kotlyar, K. Pingali, P. Stodghill, Solving alignment using elementary linear algebra, in: Proceedings of the Seventh Workshop on Languages and Compilers for Parallel Computing, Lecture Notes in Computer Science, vol. 892, Ithica, NY, 1994, Springer, Berlin, 1995, pp. 46\u201360","key":"10.1016\/S0167-8191(99)00086-1_BIB22","DOI":"10.1007\/BFb0025870"},{"unstructured":"S. Benkner, B. Chapman, H. Zima, Vienna Fortran 90, in: Proceedings of the 1992 Scalable High Performance Computing Conference, Williamsburg, VA, April 1992","key":"10.1016\/S0167-8191(99)00086-1_BIB23"},{"doi-asserted-by":"crossref","unstructured":"P. Beckman, D. Gannon, Tulip: a portable run-time system for object-parallel systems, in: Proceedings of the 10th International Parallel Processing Symposium, April 1996","key":"10.1016\/S0167-8191(99)00086-1_BIB24","DOI":"10.1109\/IPPS.1996.508107"},{"unstructured":"D.A. Berson, R. Gupta, M.L. Soffa, Resource spackling: a framework for integrating register allocation in local and global schedulers, in: Proceedings of IFIP WG 10.3 Working Conference on Parallel Architectures and Compilation Techniques, 1994, pp. 135\u2013146","key":"10.1016\/S0167-8191(99)00086-1_BIB25"},{"unstructured":"R. Bixby, K. Kennedy, U. Kremer, Automatic data layout using 0-1 integer programming, in: Proceedings of the 1994 International Conference on Parallel Architectures and Compilation Techniques, Montr\u00e9al, Canada, August 1994, pp. 111\u2013122","key":"10.1016\/S0167-8191(99)00086-1_BIB26"},{"doi-asserted-by":"crossref","unstructured":"B. Blanchet, Escape analysis: correctness, proof, implementation and experimental results, in: Proceedings of the 25th Annual ACM Symposium on Principles of Programming Languages, San Diego, CA, January 1998, pp. 25\u201337","key":"10.1016\/S0167-8191(99)00086-1_BIB27","DOI":"10.1145\/268946.268949"},{"issue":"12","key":"10.1016\/S0167-8191(99)00086-1_BIB28","doi-asserted-by":"crossref","DOI":"10.1109\/71.737695","article-title":"Nonlinear and symbolic data dependence testing","volume":"9","author":"Blume","year":"1998","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"doi-asserted-by":"crossref","unstructured":"R. Bodik, R. Gupta, M.L. Soffa, Complete removal of redundant expressions, in: Proceedings of the ACM SIGPLAN Conference on Programming Language Design and Implementation, Montreal, Canada, June 1998, pp. 1\u201314","key":"10.1016\/S0167-8191(99)00086-1_BIB29","DOI":"10.1145\/277650.277653"},{"doi-asserted-by":"crossref","unstructured":"R. Bodik, R. Gupta, Partial dead code elimination using slicing transformations, in: Proceedings of the ACM SIGPLAN Conference on Programming Language Design and Implementation, Las Vegas, Nevada, June 1997, pp. 159\u2013170","key":"10.1016\/S0167-8191(99)00086-1_BIB30","DOI":"10.1145\/258915.258930"},{"doi-asserted-by":"crossref","unstructured":"M. Burke, R. Cytron, Interprocedural dependence analysis and parallelization, in: Proceedings of the SIGPLAN Symposium on Compiler Construction, July 1986, pp. 162\u2013175","key":"10.1016\/S0167-8191(99)00086-1_BIB31","DOI":"10.1145\/12276.13328"},{"unstructured":"A. Caro, Generating multithreaded code from parallel Haskell for symmetric multiprocessors, Ph.D. Thesis, Massachussetts Institute of Technology, 1999","key":"10.1016\/S0167-8191(99)00086-1_BIB32"},{"doi-asserted-by":"crossref","unstructured":"S. Chakrabarti, M. Gupta, J.-D. Choi, Global communication analysis and optimization, in: Proceedings ACM SIGPLAN Conference on Programming Language Design and Implementation, Philadelphia, PA, May 1996","key":"10.1016\/S0167-8191(99)00086-1_BIB33","DOI":"10.1145\/231379.231391"},{"doi-asserted-by":"crossref","unstructured":"K.M. Chandy, C. Kesselman, CC++: a declarative concurrent object-oriented programming notation, in: Research Directions in Concurrent Object Oriented Programming, MIT Press, Cambridge, MA, 1993","key":"10.1016\/S0167-8191(99)00086-1_BIB34","DOI":"10.7551\/mitpress\/2087.003.0016"},{"doi-asserted-by":"crossref","unstructured":"B. Chapman, T. Fahringer, H. Zima, Automatic support for data distribution on distributed memory multiprocessor systems, in: Proceedings of the Sixth Workshop on Languages and Compilers for Parallel Computing, Lecture Notes in Computer Science, vol. 768, Portland, OR, Aug. 1993, Springer, Berlin, 1994, pp. 184\u2013199","key":"10.1016\/S0167-8191(99)00086-1_BIB35","DOI":"10.1007\/3-540-57659-2_11"},{"doi-asserted-by":"crossref","unstructured":"S. Chatterjee, J. Gilbert, R. Schreiber, The alignment-distribution graph, in: Languages and Compilers for Parallel Computing, Sixth International Workshop, Lecture Notes in Computer Science, vol. 768, Springer, Berlin, 1993","key":"10.1016\/S0167-8191(99)00086-1_BIB36","DOI":"10.1007\/3-540-57659-2_14"},{"issue":"1","key":"10.1016\/S0167-8191(99)00086-1_BIB37","doi-asserted-by":"crossref","first-page":"72","DOI":"10.1006\/jpdc.1995.1049","article-title":"Generating local addresses and communication sets for data parallel programs","volume":"26","author":"Chatterjee","year":"1995","journal-title":"Journal of Parallel and Distributed Computing"},{"issue":"9","key":"10.1016\/S0167-8191(99)00086-1_BIB38","doi-asserted-by":"crossref","first-page":"924","DOI":"10.1109\/71.308531","article-title":"Communication-free data allocation techniques for parallelizing compilers on multicomputers","volume":"5","author":"Chen","year":"1994","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"doi-asserted-by":"crossref","unstructured":"G. Chrysos, J. Emer, Memory dependence prediction using store sets, in: Proceedings of the ACM\/IEEE 25th International Symposium on Computer Architecture, Barcelona, Spain, July 1998, pp. 142\u2013154","key":"10.1016\/S0167-8191(99)00086-1_BIB39","DOI":"10.1109\/ISCA.1998.694770"},{"unstructured":"T.M. Conte, Evolutionary compilation to long instruction superscalar microarchitectures for exploiting parallelism at all levels, in: ASPLOS Wild and Crazy Idea Session, 1998","key":"10.1016\/S0167-8191(99)00086-1_BIB40"},{"doi-asserted-by":"crossref","unstructured":"K. Cooper, K. Kennedy, Efficient computation of flow insensitive interprocedural summary information, in: Proceedings of the ACM SIGPLAN'84 Symposium on Compiler Construction, June 1984","key":"10.1016\/S0167-8191(99)00086-1_BIB41","DOI":"10.1145\/502874.502898"},{"issue":"4","key":"10.1016\/S0167-8191(99)00086-1_BIB42","doi-asserted-by":"crossref","first-page":"491","DOI":"10.1145\/6465.6489","article-title":"The impact of interprocedural analysis and optimization in the Rn programming environment","volume":"8","author":"Cooper","year":"1986","journal-title":"ACM Transactions on Programming Languages and Systems"},{"doi-asserted-by":"crossref","unstructured":"K. Cooper, P. Schielke, Non-local instruction scheduling with limited code growth, in: Proceedings of Languages, Compilers and Tools for Embedded Systems, 1998, pp. 193\u2013207","key":"10.1016\/S0167-8191(99)00086-1_BIB43","DOI":"10.1007\/BFb0057791"},{"unstructured":"D. Culler, A. Dusseau, S. Goldstein, A. Krishnamurthy, S. Lumetta, T. von Eicken, K. Yelick, Parallel programming in Split-C, in: Proceedings of Supercomputing'93, 1993","key":"10.1016\/S0167-8191(99)00086-1_BIB44"},{"doi-asserted-by":"crossref","unstructured":"G. Dantzig, B. Eaves, Fourier\u2013Motzkin Elimination and its Dual, Journal of Combinatorial Theory (A) 14 (1973)","key":"10.1016\/S0167-8191(99)00086-1_BIB45","DOI":"10.1016\/0097-3165(73)90004-6"},{"unstructured":"J. Davies, C. Huson, T. Macke, B. Leasure, M. Wolfe, The KAP\/S-1: an advanced source-to-source vectorizer for the S-1 Mark IIa Supercomputer, in: Proceedings of the 1986 International Conference on Parallel Processing, St. Charles, Illinois, August 1986, pp. 833\u2013835","key":"10.1016\/S0167-8191(99)00086-1_BIB46"},{"doi-asserted-by":"crossref","unstructured":"M. Dion, C. Randriamaro, Y. Robert, How to optimize residual communications? (special issue), Journal of Parallel and Distributed Computing on Compilation Techniques for Distributed Memory Systems 38 (1996)","key":"10.1016\/S0167-8191(99)00086-1_BIB47","DOI":"10.1109\/IPPS.1996.508085"},{"doi-asserted-by":"crossref","unstructured":"C. Dulong, The IA-64 architecture at work, IEEE Computer (1998) 24\u201332","key":"10.1016\/S0167-8191(99)00086-1_BIB48","DOI":"10.1109\/2.689674"},{"doi-asserted-by":"crossref","unstructured":"K. Ebcioglu, E. Altman, DAISY: dynamic compilation for 100% architectural compatibility, in: Proceedings of the International Symposium on Computer Architecture, Denver, Colorado, 1997, pp. 26\u201337","key":"10.1016\/S0167-8191(99)00086-1_BIB49","DOI":"10.1145\/264107.264126"},{"issue":"4","key":"10.1016\/S0167-8191(99)00086-1_BIB50","doi-asserted-by":"crossref","first-page":"349","DOI":"10.1016\/0743-7315(90)90035-N","article-title":"A report on sisal language project","volume":"10","author":"Feo","year":"1990","journal-title":"Journal of Parallel and Distributed Computing"},{"issue":"3","key":"10.1016\/S0167-8191(99)00086-1_BIB51","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1145\/24039.24041","article-title":"The program dependence graph and its use in optimization","volume":"9","author":"Ferrante","year":"1987","journal-title":"ACM Transactions on Programming Languages and Systems"},{"issue":"7","key":"10.1016\/S0167-8191(99)00086-1_BIB52","doi-asserted-by":"crossref","first-page":"478","DOI":"10.1109\/TC.1981.1675827","article-title":"Trace scheduling: a technique for global microcode compaction","volume":"30","author":"Fisher","year":"1981","journal-title":"IEEE Transactions on Computers"},{"issue":"9","key":"10.1016\/S0167-8191(99)00086-1_BIB53","doi-asserted-by":"crossref","first-page":"40","DOI":"10.1109\/2.612246","article-title":"Walk-time techniques: catalyst for architectural change","volume":"30","author":"Fisher","year":"1997","journal-title":"IEEE Computer"},{"issue":"4","key":"10.1016\/S0167-8191(99)00086-1_BIB54","doi-asserted-by":"crossref","first-page":"1156","DOI":"10.1145\/183432.183446","article-title":"Avoidance and suppression of compensation code in a trace scheduling compiler","volume":"16","author":"Freudenberger","year":"1994","journal-title":"ACM Transactions on Programming Languages and Systems"},{"doi-asserted-by":"crossref","unstructured":"D.H. Friendly, S.J. Patel, Y.N. Patt, Putting the fill unit to work: dynamic optimizations for trace cache microprocessors, in: Proceedings of the 31st Annual ACM\/IEEE Symposium on Microarchitecture, 1998, pp. 173\u2013181","key":"10.1016\/S0167-8191(99)00086-1_BIB55","DOI":"10.1109\/MICRO.1998.742779"},{"doi-asserted-by":"crossref","unstructured":"C. Fu, M.D. Jennings, S.Y. Larin, T.M. Conte, Value speculation scheduling for high performance processors, in: Proceedings of the International Conference on Architectural Support for Programming Languages and Operating Systems, 1998, pp. 262\u2013271","key":"10.1016\/S0167-8191(99)00086-1_BIB56","DOI":"10.1145\/291069.291058"},{"doi-asserted-by":"crossref","unstructured":"K. Gallivan, W. Jalby, U. Meier, The use of BLAS3 in linear algebra on a parallel processor with a hierarchical memory, Technical Report CSRD Rpt. No. 610, Center for Supercomputing Res. and Dev., University of Illinois, October 1986","key":"10.1016\/S0167-8191(99)00086-1_BIB57","DOI":"10.1137\/0908086"},{"doi-asserted-by":"crossref","unstructured":"J. Garcia, E. Ayguad\u00e9, J. Labarta, A novel approach towards automatic data distribution, in: Proceedings of the Workshop on Automatic Data Layout and Performance Prediction, Houston, TX, April 1995","key":"10.1016\/S0167-8191(99)00086-1_BIB58","DOI":"10.1145\/224170.224500"},{"doi-asserted-by":"crossref","unstructured":"M. Gerndt, H. Zima, SUPERB: Experiences and future research, in: Proceedings of the Workshop on Languages, Compilers, and Run-Time Environments for Distributed Memory Machines, North-Holland, Amsterdam, The Netherlands, 1992","key":"10.1016\/S0167-8191(99)00086-1_BIB59","DOI":"10.1016\/B978-0-444-88712-2.50005-6"},{"doi-asserted-by":"crossref","unstructured":"M. Girkar, C. Polychronopoulos, The HTG: An intermediate representation for programs based on control and data dependences, IEEE Transactions on Parallel and Distributed Systems 3 (2) (1992)","key":"10.1016\/S0167-8191(99)00086-1_BIB60","DOI":"10.1109\/71.127258"},{"doi-asserted-by":"crossref","unstructured":"G. Golf, K. Kennedy, C.W. Tseng, Practical dependence testing, in: Proceedings of the SIGPLAN'91 Conference on Programming Language Design and Implementation, Toronto, Canada, 1991","key":"10.1016\/S0167-8191(99)00086-1_BIB61","DOI":"10.1145\/113445.113448"},{"doi-asserted-by":"crossref","unstructured":"C. Gong, R. Gupta, R. Melhem, Compilation techniques for optimizing communication in distributed-memory systems, in: Proceedings 1993 International Conference on Parallel Processing, St. Charles, IL, August 1993","key":"10.1016\/S0167-8191(99)00086-1_BIB62","DOI":"10.1109\/ICPP.1993.58"},{"doi-asserted-by":"crossref","unstructured":"J.R. Goodman, W-C. Hsu, Code scheduling and register allocation in large basic blocks, in: Proceedings of ACM Supercomputing Conference, 1988, pp. 442\u2013452","key":"10.1016\/S0167-8191(99)00086-1_BIB63","DOI":"10.1145\/55364.55407"},{"doi-asserted-by":"crossref","unstructured":"M. Gupta, P. Banerjee, A methodology for high-level synthesis of communication for multicomputers, in: Proceedings of the ACM International Conference on Supercomputing, Washington, DC, 1992","key":"10.1016\/S0167-8191(99)00086-1_BIB64","DOI":"10.1145\/143369.143433"},{"issue":"2","key":"10.1016\/S0167-8191(99)00086-1_BIB65","doi-asserted-by":"crossref","first-page":"155","DOI":"10.1006\/jpdc.1996.0011","article-title":"On compiling array expressions for efficient execution on distributed-memory machines","volume":"32","author":"Gupta","year":"1996","journal-title":"Journal of Parallel and Distributed Computing"},{"issue":"4","key":"10.1016\/S0167-8191(99)00086-1_BIB66","doi-asserted-by":"crossref","first-page":"421","DOI":"10.1109\/32.54294","article-title":"Region scheduling: an approach for detecting and redistributing parallelism","volume":"16","author":"Gupta","year":"1990","journal-title":"IEEE Transactions on Software Engineering"},{"doi-asserted-by":"crossref","unstructured":"R. Gupta, Code optimization as a side effect of instruction scheduling, in: Proceedings of the International Conference on High Performance Computing, Bangalore, India, 1997, pp. 370\u2013377","key":"10.1016\/S0167-8191(99)00086-1_BIB67","DOI":"10.1109\/HIPC.1997.634517"},{"doi-asserted-by":"crossref","unstructured":"R. Gupta, R. Bodik, Register pressure sensitive redundancy elimination, in: Proceedings of the International Conference on Compiler Construction, Lecture Notes in Computer Science, vol. 1575, Springer, Amsterdam, Netherlands, pp. 107\u2013121","key":"10.1016\/S0167-8191(99)00086-1_BIB68","DOI":"10.1007\/978-3-540-49051-7_8"},{"doi-asserted-by":"crossref","unstructured":"R. Gupta, D. Berson, J.Z. Fang, Resource-sensitive profile-directed data flow analysis for code optimization, in: Proceedings of the 30th Annual IEEE\/ACM International Symposium on Microarchitecture, Research Triangle Park, North Carolina, 1997, pp. 358\u2013368","key":"10.1016\/S0167-8191(99)00086-1_BIB69","DOI":"10.1109\/MICRO.1997.645834"},{"doi-asserted-by":"crossref","unstructured":"R. Gupta, D. Berson, J.Z. Fang, Path profile guided partial redundancy elimination using speculation, in: Proceedings of the IEEE International Conference on Computer Languages, Chicago, Illinois, 1998, pp. 230\u2013239","key":"10.1016\/S0167-8191(99)00086-1_BIB70","DOI":"10.1109\/ICCL.1998.674173"},{"doi-asserted-by":"crossref","unstructured":"R. Gupta, D. Berson, J.Z. Fang, Path profile guided partial dead code elimination using predication, in: Proceedings of the International Conference on Parallel Architectures and Compilation Techniques, San Francisco, California, 1997, pp. 102\u2013115","key":"10.1016\/S0167-8191(99)00086-1_BIB71","DOI":"10.1109\/PACT.1997.644007"},{"doi-asserted-by":"crossref","unstructured":"M. Haghighat, C. Polychronopoulos, Symbolic analysis for parallelizing compilers, ACM Transactions on Programming Languages and Systems 18 (4) (1996)","key":"10.1016\/S0167-8191(99)00086-1_BIB72","DOI":"10.1145\/233561.233568"},{"doi-asserted-by":"crossref","unstructured":"R.E. Hank, W-M.W. Hwu, B.R. Rau, Region-based compilation: an introduction and motivation, in: Proceedings of the 28th Annual IEEE\/ACM International Symposium on Microarchitecture, 1995","key":"10.1016\/S0167-8191(99)00086-1_BIB73","DOI":"10.1109\/MICRO.1995.476821"},{"doi-asserted-by":"crossref","unstructured":"R.v. Hanxleden, K. Kennedy, Give-n-take \u2013 a balanced code placement framework, in: Proceedings of the ACM SIGPLAN '94 Conference on Programming Language Design and Implementation, Orlando, Florida, June 1994","key":"10.1016\/S0167-8191(99)00086-1_BIB74","DOI":"10.1145\/178243.178253"},{"doi-asserted-by":"crossref","unstructured":"R.v. Hanxleden, K. Kennedy, C. Koelbel, R. Das, J. Saltz, Compiler analysis for irregular problems in Fortran D, in: Proceedings Fifth Workshop on Languages and Compilers for Parallel Computing, New Haven, CT, August 1992","key":"10.1016\/S0167-8191(99)00086-1_BIB75","DOI":"10.1007\/3-540-57502-2_42"},{"doi-asserted-by":"crossref","unstructured":"P. Hatcher, M. Quinn, Data-Parallel Programming on MIMD Com;uters, The MIT Press, Cambridge, MA, 1991","key":"10.1016\/S0167-8191(99)00086-1_BIB76","DOI":"10.7551\/mitpress\/2278.001.0001"},{"unstructured":"J.L. Hennessy, D.A. Patterson, Computer Architecture: A Quantitative Approach, Morgan Kaufmann, Los Altos, CA, 1990","key":"10.1016\/S0167-8191(99)00086-1_BIB77"},{"unstructured":"High Performance Fortran Forum, High Performance Fortran language specification, version 2.0. Technical Report CRPC-TR92225, Center for Research on Parallel Computation, Rice University, Houston, TX, January 1997","key":"10.1016\/S0167-8191(99)00086-1_BIB78"},{"doi-asserted-by":"crossref","unstructured":"M.W. Hall, S. Hiranandani, K. Kennedy, C. Tseng, Interprocedural compilation of fortran D for MIMD distributed-memory machines, in: Proceedings of Supercomputing'92, Minneapolis, MN, 1992, pp. 522\u2013534","key":"10.1016\/S0167-8191(99)00086-1_BIB79","DOI":"10.1109\/SUPERC.1992.236652"},{"issue":"8","key":"10.1016\/S0167-8191(99)00086-1_BIB80","doi-asserted-by":"crossref","first-page":"66","DOI":"10.1145\/135226.135230","article-title":"Compiling Fortran D for MIMD distributed-memory machines","volume":"35","author":"Hiranandani","year":"1992","journal-title":"Communications of the ACM"},{"unstructured":"S. Hiranandani, K. Kennedy, C.-W. Tseng, Preliminary experiences with the fortran D compiler, in: Proceedings of Supercomputing'93, Portland, OR, November 1993","key":"10.1016\/S0167-8191(99)00086-1_BIB81"},{"issue":"4","key":"10.1016\/S0167-8191(99)00086-1_BIB82","doi-asserted-by":"crossref","first-page":"465","DOI":"10.1109\/71.149964","article-title":"Partitioning and labeling of loops by unimodular transformations","volume":"3","author":"D'Hollander","year":"1992","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"10.1016\/S0167-8191(99)00086-1_BIB83","doi-asserted-by":"crossref","first-page":"90","DOI":"10.1006\/jpdc.1993.1094","article-title":"Communication-free hyperplane partitioning of nested loops","volume":"19","author":"Huang","year":"1993","journal-title":"Journal of Parallel and Distributed Computing"},{"doi-asserted-by":"crossref","unstructured":"D.E. Hudak, S.G. Abraham, Compiling parallel loops for high performance computers \u2013 partitioning, in: Data Assignment and Remapping, Kluwer Academic Publishers, Boston, MA, 1993","key":"10.1016\/S0167-8191(99)00086-1_BIB84","DOI":"10.1007\/978-1-4615-3164-7"},{"issue":"1","key":"10.1016\/S0167-8191(99)00086-1_BIB85","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1109\/MC.1998.9730815","article-title":"Technology outlook: introduction to predicated execution","volume":"31","author":"Hwu","year":"1998","journal-title":"IEEE Computer"},{"unstructured":"W-M. Hwu, S.A. Mahlke, W.Y. Chen, P.P. Chang, N.J. Warter, R.A. Bringmann, R.G. Ouellette, R.E. Hank, T. Kiyohara, G.E. Haab, J.G. Holm, D.M. Lavery, The superblock: an effective technique for VLIW and superscalar compilation, Journal of Supercomputing A (1993) 229\u2013248","key":"10.1016\/S0167-8191(99)00086-1_BIB86"},{"unstructured":"IBM, Engineering and Scientific Subroutine Library (ESSL), Guide and Reference, Document SC23-0526-01, 1994","key":"10.1016\/S0167-8191(99)00086-1_BIB87"},{"doi-asserted-by":"crossref","unstructured":"F. Irigoin, R. Triolet, Supernode partitioning, in: Proceedings of the 15th ACM Symposium on Principles of Programming Languages, 1988","key":"10.1016\/S0167-8191(99)00086-1_BIB88","DOI":"10.1145\/73560.73588"},{"unstructured":"Y. Ishikawa, Multiple threads template library, Technical Report TR-96-012, Real World Computing Partnership, September 1996","key":"10.1016\/S0167-8191(99)00086-1_BIB89"},{"doi-asserted-by":"crossref","unstructured":"S. Jain, Circular scheduling: A new technique to perform software pipelining, in: Proceedings of the ACM SIGPLAN Conference on Programming Language Design and Implementation, Toronto, Canada, 1991, pp. 219\u2013228","key":"10.1016\/S0167-8191(99)00086-1_BIB90","DOI":"10.1145\/113445.113464"},{"unstructured":"V. Jain, S. Pande, Code motion for generating compact code on embedded DSPs', 1998 Workshop on Compiler and architecture support for embedded systems, Washington, DC, 4\u20136 December'98. Available under publications link at http:\/\/www.ececs.uc.edu\/ compiler","key":"10.1016\/S0167-8191(99)00086-1_BIB91"},{"unstructured":"E. Johnson, D. Gannon, HPC++: Experiments with the parallel standard template library, Technical Report TR-96-51, Indiana University, Department of Computer Science, December 1996","key":"10.1016\/S0167-8191(99)00086-1_BIB92"},{"doi-asserted-by":"crossref","unstructured":"R. Johnson, M. Schlansker, Analysis techniques for predicated code, in: Proceedings of the 29th Annual International Symposium on Microarchitecture, 1996, pp. 100\u2013113","key":"10.1016\/S0167-8191(99)00086-1_BIB93","DOI":"10.1109\/MICRO.1996.566454"},{"doi-asserted-by":"crossref","unstructured":"M. Kandemir, N. Shenoy, P. Banerjee, J. Ramanujam, A.Choudhary, Minimizing data and synchronization costs in one-way communication, in: International Conference on Parallel Processing, 1998, pp. 180\u2013188","key":"10.1016\/S0167-8191(99)00086-1_BIB94","DOI":"10.1109\/ICPP.1998.708483"},{"doi-asserted-by":"crossref","unstructured":"K. Kennedy, K.S. McKinley, Optimizing for parallelism and data locality, in: Proceedings of the ACM 1992 International Conference on Supercomputing, July 1992","key":"10.1016\/S0167-8191(99)00086-1_BIB95","DOI":"10.1145\/143369.143427"},{"doi-asserted-by":"crossref","unstructured":"K. Kennedy, N. Nedeljkovic, A. Sethi, A linear-time algorithm for computing the memory access sequence in data-parallel programs, in: Proceedings of Fifth ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, Santa Barbara, CA, 1995, pp. 102\u2013111","key":"10.1016\/S0167-8191(99)00086-1_BIB96","DOI":"10.1145\/209936.209948"},{"doi-asserted-by":"crossref","unstructured":"K. Kennedy, N. Nedeljkovic, Combining dependence and data-flow analyses to optimize communication, in: Proceedings Ninth International Parallel Processing Symposium, Santa Barbara, CA, 1995","key":"10.1016\/S0167-8191(99)00086-1_BIB97","DOI":"10.1109\/IPPS.1995.395954"},{"doi-asserted-by":"crossref","unstructured":"K. Kennedy, A. Sethi, Resource-based communication placement analysis, in: Proceedings Ninth Workshop on Languages and Compilers for Parallel Computing, San Jose, CA, August 1996","key":"10.1016\/S0167-8191(99)00086-1_BIB98","DOI":"10.1007\/BFb0017264"},{"issue":"7","key":"10.1016\/S0167-8191(99)00086-1_BIB99","doi-asserted-by":"crossref","first-page":"527","DOI":"10.1002\/cpe.4330050703","article-title":"Optimization techniques for SIMD fortran compilers","volume":"5","author":"Knobe","year":"1993","journal-title":"Concurrency: Practice and Experience"},{"doi-asserted-by":"crossref","unstructured":"K. Knobe, V. Sarkar, Array SSA form and its use in parallelization, in: Proceedings of the 25th ACM Symposium on Principles of Programming Languages, San Diego, California, January 1998","key":"10.1016\/S0167-8191(99)00086-1_BIB100","DOI":"10.1145\/268946.268956"},{"doi-asserted-by":"crossref","unstructured":"I. Kodukula, N. Ahmed, K. Pingali, Data centric multi-level blocking, in: Proceedings of the SIGPLAN ACM Conference on Programming Language Design and Implementation, 1997","key":"10.1016\/S0167-8191(99)00086-1_BIB101","DOI":"10.1145\/258915.258946"},{"doi-asserted-by":"crossref","unstructured":"C. Koelbel, Compile-time Generation of Communication for scientific programs, in: Proceedings of Supercomputing '91, Albuquerque, NM, 1991, pp. 101\u2013110","key":"10.1016\/S0167-8191(99)00086-1_BIB102","DOI":"10.1145\/125826.125890"},{"doi-asserted-by":"crossref","unstructured":"X. Kong, D. Klappholz, K. Psarris, The I-Test: An improved dependence test for automatic parallelization and vectorization, IEEE Transactions on Parallel and Distributed Systems, Special Issue on Parallel Languages and Compilers 2 (3) (1991)","key":"10.1016\/S0167-8191(99)00086-1_BIB103","DOI":"10.1109\/71.86109"},{"unstructured":"D. Knuth, The Art of Computer Programming, vol. 2, Seminumerical Algorithms, Addison-Wesley, Reading, MA, 1981","key":"10.1016\/S0167-8191(99)00086-1_BIB104"},{"unstructured":"Kuck and Associates, Inc., KAP for IBM fortran, user's guide version 3.3, Document #9603001, Champaign, IL, 1996","key":"10.1016\/S0167-8191(99)00086-1_BIB105"},{"issue":"2","key":"10.1016\/S0167-8191(99)00086-1_BIB106","doi-asserted-by":"crossref","first-page":"83","DOI":"10.1145\/360827.360844","article-title":"The parallel execution of DO loops","volume":"17","author":"Lamport","year":"1974","journal-title":"Communications of the ACM"},{"doi-asserted-by":"crossref","unstructured":"J. Lee, S.P. Midkiff, D.A. Padua, Concurrent static single assignment form and constant propagation for explicitly parallel Programs, in: Proceedings of the 10th International Workshop on Languages and Compilers for Parallel Computing, Lecture Notes in Computer Science, Springer, Minneapolis, MN, August 1997","key":"10.1016\/S0167-8191(99)00086-1_BIB107","DOI":"10.1007\/BFb0032687"},{"issue":"3","key":"10.1016\/S0167-8191(99)00086-1_BIB108","doi-asserted-by":"crossref","first-page":"361","DOI":"10.1109\/71.86111","article-title":"Compiling communication-efficient programs for massively parallel machines","volume":"2","author":"Li","year":"1991","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"unstructured":"Z. Li, P. Yew, Efficient interprocedural analysis for program parallelization and restructuring, in: Proceedings of the ACM\/SIGPLAN Symposium on Parallel Programming, New Haven, CT, July 1998","key":"10.1016\/S0167-8191(99)00086-1_BIB109"},{"doi-asserted-by":"crossref","unstructured":"Z. Li, P. Yew, C. Zhu, An efficient data dependence analysis for parallelizing compilers, IEEE Transactions on Parallel and Distributed Systems 1 (1) (1990)","key":"10.1016\/S0167-8191(99)00086-1_BIB110","DOI":"10.1109\/71.80122"},{"doi-asserted-by":"crossref","unstructured":"S. Liao et al., Storage assignment to decrease code size, ACM Transactions on Programming Languages and Systems 18 (3) (1996) 235\u2013253","key":"10.1016\/S0167-8191(99)00086-1_BIB111","DOI":"10.1145\/229542.229543"},{"doi-asserted-by":"crossref","unstructured":"S. Liao et al., Instruction selection using binate covering for code size optimization, in: Proceedings of the 1995 International Conference on Computer-Aided Design, 1995","key":"10.1016\/S0167-8191(99)00086-1_BIB112","DOI":"10.1109\/ICCAD.1995.480146"},{"doi-asserted-by":"crossref","unstructured":"A.W. Lim, M.S. Lam, Communication-free parallelization via affine transformations, in: Proceedings of the Seventh Workshop on Languages and Compilers for Parallel Computing, August 1994","key":"10.1016\/S0167-8191(99)00086-1_BIB113","DOI":"10.1007\/BFb0025873"},{"doi-asserted-by":"crossref","unstructured":"M.H. Lipasti, C.B. Wilkerson, J.P. Shen, Value locality and load value prediction, in: Proceedings of the Seventh International Conference on Architectural Support for Programming Languages and Operating Systems, Cambridge, MA, 1996, pp. 138\u2013149","key":"10.1016\/S0167-8191(99)00086-1_BIB114","DOI":"10.1145\/237090.237173"},{"unstructured":"T. MacDonald, D. Pase, A. Meltzer, Addressing in Cray research's MPP Fortran, in: Proceedings of the Third Workshop on Compilers for Parallel Computers, Vienna, Austria, 1992, pp. 161\u2013172","key":"10.1016\/S0167-8191(99)00086-1_BIB115"},{"key":"10.1016\/S0167-8191(99)00086-1_BIB116","doi-asserted-by":"crossref","first-page":"423","DOI":"10.1145\/233561.233564","article-title":"Improving data locality with loop transformations","volume":"18","author":"McKinley","year":"1996","journal-title":"ACM Transactions on Programming Languages and Systems"},{"key":"10.1016\/S0167-8191(99)00086-1_BIB117","first-page":"77","article-title":"Local iteration set computation for block-cyclic distributions","volume":"II","author":"Midkiff","year":"1995","journal-title":"Proceedings International Conference on Parallel Processing"},{"doi-asserted-by":"crossref","unstructured":"A.I. Moshovos, S.E. Breach, T.N. Vijaykumar, G.S. Sohi, Dynamic speculation and synchronization of data dependences, in: Proceedings of the 24th International Symposium on Computer Architecture, 1997","key":"10.1016\/S0167-8191(99)00086-1_BIB118","DOI":"10.1145\/264107.264189"},{"doi-asserted-by":"crossref","unstructured":"R. Nair, M.E. Hopkins, Exploiting instruction level parallelism in processors by caching scheduled groups, in: Proceedings of the International Symposium on Computer Architecture, Denver, Colorado, 1997, pp. 13\u201325","key":"10.1016\/S0167-8191(99)00086-1_BIB119","DOI":"10.1145\/264107.264125"},{"doi-asserted-by":"crossref","unstructured":"T. Nakra, R. Gupta, M.L. Soffa, Value prediction in VLIW machines, in: Proceedings of the ACM\/IEEE 26th International Symposium on Computer Architecture, Atlanta, Georgia, 1999","key":"10.1016\/S0167-8191(99)00086-1_BIB120","DOI":"10.1109\/ISCA.1999.765956"},{"doi-asserted-by":"crossref","unstructured":"C. Norris, L.L. Pollock, A scheduler-sensitive global register allocator, in: Proceedings of Supercomputing'93, Portland, Oregon, 1993, pp. 804\u2013813","key":"10.1016\/S0167-8191(99)00086-1_BIB121","DOI":"10.1145\/169627.169839"},{"doi-asserted-by":"crossref","unstructured":"S. Onder, R. Gupta, Superscalar execution with direct data forwarding, in: Proceedings of the International Conference on Parallel Architectures and Compilation Techniques, Paris, France, 1998, pp. 130\u2013135","key":"10.1016\/S0167-8191(99)00086-1_BIB122","DOI":"10.1109\/PACT.1998.727183"},{"unstructured":"J. Oplinger, D. Heine, S-W. Liao, B.A. Nayfeh, M.S. Lam, K. Olukotun, Software and hardware for exploiting speculative parallelism with a multiprocessor, Stanford University Computer Systems Lab, Technical Report CSL-TR-97-715, February 1997","key":"10.1016\/S0167-8191(99)00086-1_BIB123"},{"unstructured":"D.J. Palermo, Compiler techniques for optimizing communication and data distribution for distributed-memory multicomputers, Ph.D. Thesis, Department of Electrical and Computer Engineering, University of Ilinois, Urbana, IL, June 1996","key":"10.1016\/S0167-8191(99)00086-1_BIB124"},{"issue":"2","key":"10.1016\/S0167-8191(99)00086-1_BIB125","doi-asserted-by":"crossref","first-page":"158","DOI":"10.1006\/jpdc.1996.0138","article-title":"Dynamic data partitioning for distributed-memory multicomputers","volume":"38","author":"Palermo","year":"1996","journal-title":"Journal of Parallel and Distributed Computing"},{"doi-asserted-by":"crossref","unstructured":"S. Pande, T. Bali, A computation + communication load balanced loop partitioning method for distributed memory systems, Journal of Parallel and Distributed Computing, to appear","key":"10.1016\/S0167-8191(99)00086-1_BIB126","DOI":"10.1006\/jpdc.1999.1567"},{"key":"10.1016\/S0167-8191(99)00086-1_BIB127","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1006\/jpdc.1996.0134","article-title":"Compilation techniques for distributed memory systems: Guest editorial introduction (special issue)","volume":"38","author":"Pande","year":"1996","journal-title":"Journal of Parallel and Distributed Computing on Compilation Techniques for Distributed Memory Systems"},{"doi-asserted-by":"crossref","unstructured":"S. Pande, A compile time partitioning method for DOALL loops on distributed memory systems, in: International Conference on Parallel Processing, vol. III, IEEE Computer Society Press, Silver Spring, MD, 1996, pp. 35\u201344","key":"10.1016\/S0167-8191(99)00086-1_BIB128","DOI":"10.1109\/ICPP.1996.538557"},{"unstructured":"S. Pande, J. Ramanujam, Y. Robert, Workshop on challenges compiling for scalable parallel systems, in: Eighth IEEE Symposium on Parallel and Distributed Systems, October 1996","key":"10.1016\/S0167-8191(99)00086-1_BIB129"},{"doi-asserted-by":"crossref","unstructured":"K. Pingali, M. Beck, R. Johnson, M. Moudgill, P. Stodghill, Dependence flow graphs: An algebraic approach to program dependences, in: Proceedings of the ACM Symposium on Principles of Programming Languages, 1991","key":"10.1016\/S0167-8191(99)00086-1_BIB130","DOI":"10.1145\/99583.99595"},{"doi-asserted-by":"crossref","unstructured":"K. Psarris, The Banerjee-Wolfe and GCD tests on exact data dependence information, Journal of Parallel and Distributed Computing 32 (2) (1996)","key":"10.1016\/S0167-8191(99)00086-1_BIB131","DOI":"10.1006\/jpdc.1996.0009"},{"doi-asserted-by":"crossref","unstructured":"K. Psarris, D. Klappholz, X. Kong, On the accuracy of the Banerjee test, Journal of Parallel and Distributed Computing, Special Issue on Shared Memory Multiprocessors 12 (2) (1991)","key":"10.1016\/S0167-8191(99)00086-1_BIB132","DOI":"10.1016\/0743-7315(91)90019-6"},{"doi-asserted-by":"crossref","unstructured":"K. Psarris, S. Pande, An empirical study of the I test for exact data dependence, in: Proceedings of the 1994 International Conference on Parallel Processing, St. Charles, IL, August 1994","key":"10.1016\/S0167-8191(99)00086-1_BIB133","DOI":"10.1109\/ICPP.1994.55"},{"doi-asserted-by":"crossref","unstructured":"K. Psarris, X. Kong, D. Klappholz, The direction vector I test, IEEE Transactions on Parallel and Distributed Systems 4 (11) (1993)","key":"10.1016\/S0167-8191(99)00086-1_BIB134","DOI":"10.1109\/71.250105"},{"doi-asserted-by":"crossref","unstructured":"W. Pugh, A practical algorithm for exact array dependence analysis, Communications of the ACM 35 (8) (1992)","key":"10.1016\/S0167-8191(99)00086-1_BIB135","DOI":"10.1145\/135226.135233"},{"issue":"4","key":"10.1016\/S0167-8191(99)00086-1_BIB136","doi-asserted-by":"crossref","first-page":"472","DOI":"10.1109\/71.97903","article-title":"Compile-time techniques for data distribution in distributed memory machines","volume":"2","author":"Ramanujam","year":"1991","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"doi-asserted-by":"crossref","unstructured":"F. Rastello, A. Rao, S. Pande, Optimal task scheduling to minimize inter-tile latencies, in: International Conference on Parallel Processing, 1998, pp. 172\u2013179","key":"10.1016\/S0167-8191(99)00086-1_BIB137","DOI":"10.1109\/ICPP.1998.708480"},{"doi-asserted-by":"crossref","unstructured":"B.R. Rau, C.D. Glaser, Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing, in: Proceedings of the 14th Annual Microprogramming Workshop Chatham, MA, 1981, pp. 183\u2013198","key":"10.1016\/S0167-8191(99)00086-1_BIB138","DOI":"10.1145\/1014192.802449"},{"doi-asserted-by":"crossref","unstructured":"L. Rauchwerger, D. Padua, The LRPD test: Speculative run-time parallelization of loops with privatization and reduction parallelization, in: Proceedings of the ACM SIGPLAN Conference on Programming Language Design and Implementation, 1995","key":"10.1016\/S0167-8191(99)00086-1_BIB139","DOI":"10.1145\/207110.207148"},{"issue":"9","key":"10.1016\/S0167-8191(99)00086-1_BIB140","doi-asserted-by":"crossref","first-page":"897","DOI":"10.1109\/71.536935","article-title":"An implementation framework for HPF distributed arrays on message-passing parallel computer systems","volume":"7","author":"van Reeuwijk","year":"1996","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"unstructured":"Pacific-Sierra Research Corporation, VAST-2 for XL Fortran, User's Guide, Edition 1.2, Document Number VA061, Santa Monica, CA, 1994","key":"10.1016\/S0167-8191(99)00086-1_BIB141"},{"doi-asserted-by":"crossref","unstructured":"A. Rao, S. Pande, Storage assignment optimizations to generate compact and efficient code on embedded DSPs, in: ACM SIGPLAN Conference on Programming Language Design and Implementation, Atlanta, 1999","key":"10.1016\/S0167-8191(99)00086-1_BIB142","DOI":"10.1145\/301618.301653"},{"doi-asserted-by":"crossref","unstructured":"J. Ruttenberg, G.R. Gao, A. Stoutchinin, W. Lichtenstein, Software pipelining showdown: optimal vs. heuristic methods in a production compiler, in: Proceedings of the ACM SIGPLAN Conference on Programming Language Design and Implementation, Philadelphia, Pennsylvania, 1996, pp. 1\u201311","key":"10.1016\/S0167-8191(99)00086-1_BIB143","DOI":"10.1145\/231379.231385"},{"issue":"1","key":"10.1016\/S0167-8191(99)00086-1_BIB144","doi-asserted-by":"crossref","first-page":"224","DOI":"10.1006\/jpdc.1994.1134","article-title":"Optimizing CM Fortran compiler for connection machine computers","volume":"23","author":"Sabot","year":"1994","journal-title":"Journal of Parallel and Distributed Computing"},{"doi-asserted-by":"crossref","unstructured":"J. Saltz, K. Crowley, R. Mirchandaney, H. Berryman, Run-time scheduling and execution of loops on message passing machines, Journal of Parallel and Distributed Computing 8 (4) (1990)","key":"10.1016\/S0167-8191(99)00086-1_BIB145","DOI":"10.1016\/0743-7315(90)90129-D"},{"unstructured":"V. Sarkar, Partitioning and scheduling parallel programs for multiprocessors, Pitman, London and The MIT Press, Cambridge, MA, 1989","key":"10.1016\/S0167-8191(99)00086-1_BIB146"},{"doi-asserted-by":"crossref","unstructured":"V. Sarkar, Automatic partitioning of a program dependence graph into parallel tasks, IBM Journal of Research and Development, 35 (5\/6) (1991)","key":"10.1016\/S0167-8191(99)00086-1_BIB147","DOI":"10.1147\/rd.355.0779"},{"doi-asserted-by":"crossref","unstructured":"V. Sarkar, R. Thekkath, A general framework for iteration-reordering loop transformations, in: Proceedings of the ACM SIGPLAN Conference on Programming Language Design and Implementation, San Francisco, California, 1992, pp. 175\u2013187","key":"10.1016\/S0167-8191(99)00086-1_BIB148","DOI":"10.1145\/143095.143132"},{"doi-asserted-by":"crossref","unstructured":"V. Sarkar, Automatic selection of high order transformations in the IBM XL Fortran Compilers, IBM Journal of Research and Development 41 (3) (1997)","key":"10.1016\/S0167-8191(99)00086-1_BIB149","DOI":"10.1147\/rd.413.0233"},{"doi-asserted-by":"crossref","unstructured":"V. Sarkar, Analysis and optimization of explicitly parallel programs using the parallel program graph representation, in: Proceedings of the 10th International Workshop on Languages and Compilers for Parallel Computing, LNCS Springer, Minneapolis, MN, 1997","key":"10.1016\/S0167-8191(99)00086-1_BIB150","DOI":"10.1007\/BFb0032686"},{"issue":"2","key":"10.1016\/S0167-8191(99)00086-1_BIB151","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1147\/rd.302.0163","article-title":"A vectorizing Fortran compiler","volume":"30","author":"Scarborough","year":"1986","journal-title":"IBM Journal of Research and Development"},{"doi-asserted-by":"crossref","unstructured":"M. Schlansker, V. Kathail, Critical path reduction for scalar programs, in: 28th Annual IEEE\/ACM International Symposium on Microarchitecture, 1995","key":"10.1016\/S0167-8191(99)00086-1_BIB152","DOI":"10.1109\/MICRO.1995.476811"},{"doi-asserted-by":"crossref","unstructured":"T.J. Sheffler, R. Schreiber, J.R. Gilbert, W. Pugh, Efficient Distribution Analysis via Graph Contraction, in: Proceedings of the Eighth Workshop on Languages and Compilers for Parallel Computing, Lecture Notes in Computer Science 1033, Columbus, OH, August 1995. Springer, Berlin, 1996, pp. 377\u2013391","key":"10.1016\/S0167-8191(99)00086-1_BIB153","DOI":"10.1007\/BFb0014212"},{"doi-asserted-by":"crossref","unstructured":"K.-P. Shih, J.-P. Sheu, C.-H. Huang, Statement-level communication-free partitioning techniques for parallelizing compilers, in: Proceedings of the Ninth Workshop on Languages and Compilers for Parallel Computing, 1996","key":"10.1016\/S0167-8191(99)00086-1_BIB154","DOI":"10.1007\/BFb0017265"},{"doi-asserted-by":"crossref","unstructured":"M. Snir et al., The communication software and parallel environment of the IBM SP2, IBM Systems Journal 34 (2) (1995) 205\u2013221","key":"10.1016\/S0167-8191(99)00086-1_BIB155","DOI":"10.1147\/sj.342.0205"},{"unstructured":"SPAM Research Group, SPAM compiler user's manual, 1997, http:\/\/www.ee.princeton.edu\/spam","key":"10.1016\/S0167-8191(99)00086-1_BIB156"},{"unstructured":"H. Srinivasan, Optimizing explicitly parallel programs, Ph.D. Thesis, Department of Computer Science, University of Colorado, Denver, Colorado, 1994","key":"10.1016\/S0167-8191(99)00086-1_BIB157"},{"issue":"1","key":"10.1016\/S0167-8191(99)00086-1_BIB158","doi-asserted-by":"crossref","first-page":"150","DOI":"10.1006\/jpdc.1994.1048","article-title":"Generating communication for array statements: Design, implementation and evaluation","volume":"21","author":"Stichnoth","year":"1994","journal-title":"Journal of Parallel and Distributed Computing"},{"doi-asserted-by":"crossref","unstructured":"R. Subramanian, S. Pande, Efficient program partitioning based on compiler controlled communication, in: Proceedings of the Fourth International Workshop on High-Level Parallel Programming Models and Supportive Environments, to appear","key":"10.1016\/S0167-8191(99)00086-1_BIB159","DOI":"10.1007\/BFb0097884"},{"doi-asserted-by":"crossref","unstructured":"A. Sudarsanam et al., Optimization of embedded DSP programs using post-pass data-flow analysis, in: Proceedings of International Conference on Acoustics, Speech, and Signal Processing, 1997","key":"10.1016\/S0167-8191(99)00086-1_BIB160","DOI":"10.1109\/ICASSP.1997.599863"},{"doi-asserted-by":"crossref","unstructured":"A. Sudarsanam, S. Malik. Memory bank and register allocation in software synthesis for ASIPs, in: Proceedings of International Conference on Computer Aided Design, 1995, pp. 388\u2013392","key":"10.1016\/S0167-8191(99)00086-1_BIB161","DOI":"10.1109\/ICCAD.1995.480145"},{"doi-asserted-by":"crossref","unstructured":"A. Thirumalai, J. Ramanujam, Fast address sequence generation for data-parallel programs using integer lattices, in: Proceedings of the languages and compilers for parallel computing, Lecture Notes in Computer Science 1033, Springer, Berlin, 1996, pp. 191\u2013208","key":"10.1016\/S0167-8191(99)00086-1_BIB162","DOI":"10.1007\/BFb0014200"},{"issue":"2","key":"10.1016\/S0167-8191(99)00086-1_BIB163","doi-asserted-by":"crossref","first-page":"188","DOI":"10.1006\/jpdc.1996.0140","article-title":"Efficient computation of address sequences in data-parallel programs using closed forms for basis vectors","volume":"38","author":"Thirumalai","year":"1996","journal-title":"Journal of Parallel and Distributed Computing"},{"doi-asserted-by":"crossref","unstructured":"J. Tims, R. Gupta, M.L. Soffa, Dataflow analysis driven dynamic data partitioning, in: Fourth Workshop on Languages, Compilers, and Run-Time Systems for Scalable Computers, Lecture Notes in Computer Science, vol. 1511, Springer, Pittsburgh, PA, May 1998, pp. 75\u201390","key":"10.1016\/S0167-8191(99)00086-1_BIB164","DOI":"10.1007\/3-540-49530-4_6"},{"doi-asserted-by":"crossref","unstructured":"K.R. Traub, D.E. Culler, K.E. Schauser, Global analysis for partitioning non-strict programs into sequential threads, in: ACM Conference on Lisp and Functional Programming, San Francisco, CA, 1992","key":"10.1016\/S0167-8191(99)00086-1_BIB165","DOI":"10.1145\/141471.141568"},{"doi-asserted-by":"crossref","unstructured":"R. Triolet, F. Irigoin, P. Feautrier, Direct parallelization of call statements, in: Proceedings of the Sigplan Symposium on Compiler Construction, 1986, pp. 176\u2013185","key":"10.1016\/S0167-8191(99)00086-1_BIB166","DOI":"10.1145\/12276.13329"},{"doi-asserted-by":"crossref","unstructured":"P. Tu, D. Padua, Array privatization for shared and distributed memory machines, in: Proceedings Second Workshop on Languages, Compilers, and Run-Time Environments for Distributed Memory Machines, in ACM SIGPLAN Notices, 1993","key":"10.1016\/S0167-8191(99)00086-1_BIB167","DOI":"10.1145\/156668.156692"},{"doi-asserted-by":"crossref","unstructured":"L. Wang, J. Stichnoth, S. Chatterjee, Runtime performance of parallel array assignment: an empirical study, in: Proceedings Supercomputing 96, Pittsburgh, PA, 1996","key":"10.1016\/S0167-8191(99)00086-1_BIB168","DOI":"10.1145\/369028.369036"},{"doi-asserted-by":"crossref","unstructured":"H. Wijshoff, Data Organization in Parallel Computers, Kluwer Academic Publishers, Dordrecht, 1989","key":"10.1016\/S0167-8191(99)00086-1_BIB169","DOI":"10.1007\/978-1-4613-1711-1"},{"doi-asserted-by":"crossref","unstructured":"R. Wilson et al., SUIF: a parallelizing and optimizing research compiler, SIGPLAN Notices 29 (12) (1994) 31\u201337","key":"10.1016\/S0167-8191(99)00086-1_BIB170","DOI":"10.1145\/193209.193217"},{"issue":"4","key":"10.1016\/S0167-8191(99)00086-1_BIB171","doi-asserted-by":"crossref","first-page":"452","DOI":"10.1109\/71.97902","article-title":"A loop transformation theory and an algorithm to maximize parallelism","volume":"2","author":"Wolf","year":"1991","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"unstructured":"M.J. Wolfe, Optimizing Supercompilers for Supercomputers, Pitman, London and The MIT Press, Cambridge, MA, 1989","key":"10.1016\/S0167-8191(99)00086-1_BIB172"},{"unstructured":"M.J. Wolfe, Iteration space tiling for memory hierarchies, in: Proceedings of the Third SIAM Conference on Parallel Processing for Scientific Computing, 1987, pp. 357\u2013361","key":"10.1016\/S0167-8191(99)00086-1_BIB173"},{"doi-asserted-by":"crossref","unstructured":"M. Wolfe, C. Tseng, The power test for data dependence, IEEE Transactions on Parallel and Distributed Systems 3 (5) (1992)","key":"10.1016\/S0167-8191(99)00086-1_BIB174","DOI":"10.1109\/71.159042"}],"container-title":["Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167819199000861?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167819199000861?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T07:55:05Z","timestamp":1733298905000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167819199000861"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1999,12]]},"references-count":174,"journal-issue":{"issue":"13-14","published-print":{"date-parts":[[1999,12]]}},"alternative-id":["S0167819199000861"],"URL":"https:\/\/doi.org\/10.1016\/s0167-8191(99)00086-1","relation":{},"ISSN":["0167-8191"],"issn-type":[{"type":"print","value":"0167-8191"}],"subject":[],"published":{"date-parts":[[1999,12]]}}}