{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,24]],"date-time":"2025-02-24T05:07:48Z","timestamp":1740373668751,"version":"3.37.3"},"publisher-location":"Berlin, Heidelberg","reference-count":23,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540280095"},{"type":"electronic","value":"9783540318132"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2005]]},"DOI":"10.1007\/11532378_7","type":"book-chapter","created":{"date-parts":[[2010,7,20]],"date-time":"2010-07-20T19:16:36Z","timestamp":1279653396000},"page":"72-86","source":"Crossref","is-referenced-by-count":2,"title":["Empirical Performance-Model Driven Data Layout Optimization"],"prefix":"10.1007","author":[{"given":"Qingda","family":"Lu","sequence":"first","affiliation":[]},{"given":"Xiaoyang","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Sriram","family":"Krishnamoorthy","sequence":"additional","affiliation":[]},{"given":"Gerald","family":"Baumgartner","sequence":"additional","affiliation":[]},{"given":"J.","family":"Ramanujam","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sadayappan","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"7_CR1","unstructured":"Aggregate Remote Memory Copy Interface, http:\/\/www.emsl.pnl.gov\/docs\/parsoft\/armci\/"},{"key":"7_CR2","doi-asserted-by":"crossref","unstructured":"Anderson, J.M., Amarasinghe, S.P., Lam, M.S.: Data and Computation Transformations for Multiprocessors. In: Proc. of the Fifth ACM SIGPLAN Symposium on Principles and Practice of Parallel Processing (July 1995)","DOI":"10.1145\/209936.209954"},{"key":"7_CR3","doi-asserted-by":"crossref","unstructured":"Baumgartner, G., Bernholdt, D.E., Cociorva, D., Harrison, R., Hirata, S., Lam, C., Nooijen, M., Pitzer, R., Ramanujam, J., Sadayappan, P.: A High-Level Approach to Synthesis of High-Performance Codes for Quantum Chemistry. In: Proc. of SC 2002 (November 2002)","DOI":"10.1109\/SC.2002.10056"},{"key":"7_CR4","unstructured":"Cannon, L.: A Cellular Computer to Implement the Kalman Filter Algorithm. PhD thesis, Montana State University (1969)"},{"key":"7_CR5","doi-asserted-by":"crossref","unstructured":"Cierniak, M., Li, W.: Unifying data and control transformations for distributed shared memory machines. In: ACM SIGPLAN IPDPS, pp. 205\u2013217 (1995)","DOI":"10.1145\/207110.207145"},{"key":"7_CR6","doi-asserted-by":"crossref","unstructured":"Cociorva, D., Baumgartner, G., Lam, C., Ramanujam, J., Sadayappan, P., Nooijen, M., Bernholdt, D., Harrison, R.: Space-Time Trade-Off Optimization for a Class of Electronic Structure Calculations. In: Proc. of ACM SIGPLAN PLDI 2002, pp. 177\u2013186 (2002)","DOI":"10.1145\/512529.512551"},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Cociorva, D., Gao, X., Krishnan, S., Baumgartner, G., Lam, C., Sadayappan, P., Ramanujam, J.: Global Communication Optimization for Tensor Contraction Expressions under Memory Constraints. In: Proc. of IPDPS (2003)","DOI":"10.1109\/IPDPS.2003.1213121"},{"issue":"1","key":"7_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/77626.79170","volume":"16","author":"J.J. Dongarra","year":"1990","unstructured":"Dongarra, J.J., Croz, J.D., Duff, I.S., Hammarling, S.: A set of level-3 basic linear algebra subprograms. ACM Transactions on Mathematical Software\u00a016(1), 1\u201317 (1990)","journal-title":"ACM Transactions on Mathematical Software"},{"issue":"4","key":"7_CR9","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1002\/(SICI)1096-9128(199704)9:4<255::AID-CPE250>3.0.CO;2-2","volume":"9","author":"R.A. Geijn Van De","year":"1997","unstructured":"Van De Geijn, R.A., Watts, J.: SUMMA: scalable universal matrix multiplication algorithm. Concurrency: Practice and Experience\u00a09(4), 255\u2013274 (1997)","journal-title":"Concurrency: Practice and Experience"},{"key":"7_CR10","unstructured":"Intel Math Kernel Library, http:\/\/www.intel.com\/software\/products\/mkl\/features.htm"},{"key":"7_CR11","first-page":"344","volume-title":"Proc. of LCPC","author":"Y. Ju","year":"1992","unstructured":"Ju, Y., Dietz, H.: Reduction of cache coherence overhead by compiler data layout and loop transformation. In: Proc. of LCPC, pp. 344\u2013358. Springer, Heidelberg (1992)"},{"issue":"9","key":"7_CR12","doi-asserted-by":"publisher","first-page":"922","DOI":"10.1109\/TPDS.2001.1184186","volume":"12","author":"M. Kandemir","year":"2001","unstructured":"Kandemir, M., Banerjee, P., Choudhary, A., Ramanujam, J., Ayguade, E.: Static and dynamic locality optimizations using integer linear programming. IEEE Transactions on Parallel and Distributed Systems\u00a012(9), 922\u2013941 (2001)","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"7_CR13","doi-asserted-by":"crossref","unstructured":"Kandemir, M., Choudhary, A., Ramanujam, J., Banerjee, P.: Improving locality using loop and data transformations in an integrated framework. In: International Symposium on Microarchitecture, pp. 285\u2013297 (1998)","DOI":"10.1109\/MICRO.1998.742790"},{"issue":"2","key":"7_CR14","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1109\/71.752779","volume":"10","author":"M. Kandemir","year":"1999","unstructured":"Kandemir, M., Choudhary, A., Shenoy, N., Banerjee, P., Ramanujam, J.: A linear algebra framework for automatic determination of optimal data layouts. IEEE Transactions on Parallel and Distributed Systems\u00a010(2), 115\u2013135 (1999)","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"issue":"12","key":"7_CR15","first-page":"1803","volume":"61","author":"K. Kennedy","year":"2001","unstructured":"Kennedy, K., Broom, B., Cooper, K., Dongarra, J., Fowler, R., Gannon, D., Johnsson, L., Crummey, J.M., Torczon, L.: Telescoping languages: A strategy for automatic generation of scientific problem-solving systems from annotated libraries. JPDC\u00a061(12), 1803\u20131826 (2001)","journal-title":"JPDC"},{"key":"7_CR16","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"406","DOI":"10.1007\/978-3-540-24596-4_44","volume-title":"High Performance Computing - HiPC 2003","author":"S. Krishnan","year":"2003","unstructured":"Krishnan, S., Krishnamoorthy, S., Baumgartner, G., Cociorva, D., Lam, C., Sadayappan, P., Ramanujam, J., Bernholdt, D.E., Choppella, V.: Data Locality Optimization for Synthesis of Efficient Out-of-Core Algorithms. In: Pinkston, T.M., Prasanna, V.K. (eds.) HiPC 2003. LNCS (LNAI), vol.\u00a02913, pp. 406\u2013417. Springer, Heidelberg (2003)"},{"key":"7_CR17","first-page":"47","volume-title":"Quantum Mechanical Electronic Structure Calculations with Chemical Accuracy","author":"T.J. Lee","year":"1997","unstructured":"Lee, T.J., Scuseria, G.E.: Achieving chemical accuracy with coupled cluster theory. In: Langhoff, S.R. (ed.) Quantum Mechanical Electronic Structure Calculations with Chemical Accuracy, pp. 47\u2013109. Kluwer Academic Publishers, Dordrecht (1997)"},{"key":"7_CR18","unstructured":"Leung, S., Zahorjan, J.: Optimizing data locality by array restructuring. Technical Report TR-95-09-01, Dept. Computer Science, University of Washington, Seattle, WA (1995)"},{"key":"7_CR19","doi-asserted-by":"crossref","unstructured":"Frigo, M., Johnson, S.: FFTW: An adaptive software architecture for the FFT. In: Proc. of ICASSP 1998, vol.\u00a03, pp. 1381\u20131384 (1998)","DOI":"10.1109\/ICASSP.1998.681704"},{"key":"7_CR20","doi-asserted-by":"crossref","unstructured":"Nieplocha, J., Harrison, R.J., Littlefield, R.J.: Global arrays: a portable programming model for distributed memory computers. In: Supercomputing, pp. 340\u2013349 (1994)","DOI":"10.1145\/602770.602833"},{"key":"7_CR21","unstructured":"O\u2019Boyle, M.F.P., Knijnenburg, P.M.W.: Non-singular data transformations: definition, validity, applications. In: Proc. of CPC1996, pp. 287\u2013297 (1996)"},{"key":"7_CR22","doi-asserted-by":"crossref","unstructured":"Whaley, R., Dongarra, J.: Automatically Tuned Linear Algebra Software (ATLAS). In: Proc. of Supercomputing 1998 (1998)","DOI":"10.1109\/SC.1998.10004"},{"issue":"5","key":"7_CR23","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1145\/780822.781140","volume":"38","author":"K. Yotov","year":"2003","unstructured":"Yotov, K., Li, X., Ren, G., Cibulskis, M., DeJong, G., Garzaran, M., Padua, D., Pingali, K., Stodghill, P., Wu, P.: A comparison of empirical and model-driven optimization. SIGPLAN Not.\u00a038(5), 63\u201376 (2003)","journal-title":"SIGPLAN Not"}],"container-title":["Lecture Notes in Computer Science","Languages and Compilers for High Performance Computing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/11532378_7.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,23]],"date-time":"2025-02-23T04:53:46Z","timestamp":1740286426000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/11532378_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2005]]},"ISBN":["9783540280095","9783540318132"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/11532378_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2005]]}}}