{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T05:27:52Z","timestamp":1747805272827,"version":"3.33.0"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2007,9,1]],"date-time":"2007-09-01T00:00:00Z","timestamp":1188604800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J Comput Sci Technol"],"published-print":{"date-parts":[[2007,9]]},"DOI":"10.1007\/s11390-007-9090-y","type":"journal-article","created":{"date-parts":[[2007,9,18]],"date-time":"2007-09-18T00:53:36Z","timestamp":1190076816000},"page":"641-652","source":"Crossref","is-referenced-by-count":14,"title":["Server-Based Data Push Architecture for Multi-Processor Environments"],"prefix":"10.1007","volume":"22","author":[{"given":"Xian-He","family":"Sun","sequence":"first","affiliation":[]},{"given":"Surendra","family":"Byna","sequence":"additional","affiliation":[]},{"given":"Yong","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2007,9,25]]},"reference":[{"unstructured":"DARPA. High productivity computing systems (HPCS), vision: Focus on the lost dimension of HPC \u2014 \u201cUser & system efficiency and productivity\u201d. http:\/\/www.darpa.mil\/ipto\/programs\/hpcs\/vision.htm .","key":"9090_CR1"},{"unstructured":"John Hennessy, David Patterson. Computer Architecture: A Quantitative Approach. Fourth edition, Morgan Kaufmann, ISBN: 0123704901, 2006.","key":"9090_CR2"},{"issue":"1","key":"9090_CR3","doi-asserted-by":"crossref","first-page":"20","DOI":"10.1145\/216585.216588","volume":"23","author":"A Wulf Wm","year":"March 1995","unstructured":"Wm A Wulf, Sally A McKee. Hitting the memory wall: Implications of the obvious. ACM SIGARPH Computer Architecture News, March 1995, 23(1): 20\u201324.","journal-title":"ACM SIGARPH Computer Architecture News"},{"issue":"5","key":"9090_CR4","doi-asserted-by":"crossref","first-page":"609","DOI":"10.1109\/12.381947","volume":"44","author":"T F Chen","year":"1995","unstructured":"Chen T F, Baer J L. Effective hardware-based data prefetching for high performance processors. IEEE Transactions on Computers, 1995, 44(5): 609\u2013623.","journal-title":"IEEE Transactions on Computers"},{"doi-asserted-by":"crossref","unstructured":"Dahlgren F, Dubois M, Stenstr\u00f6m P. Fixed and adaptive sequential prefetching in shared-memory multiprocessors. In Proc. International Conference on Parallel Processing (ICPP), Los Alamitos, CA, USA, CRC Press, 1993, Vol.1, pp.56\u201363.","key":"9090_CR5","DOI":"10.1109\/ICPP.1993.92"},{"doi-asserted-by":"crossref","unstructured":"Fu J, Patel J H. Data prefetching in multiprocessor vector cache memories. In Proc. the 17th Annual International Symposium on Computer Architecture, Toronto, Canada, 1991, pp.54\u201363.","key":"9090_CR6","DOI":"10.1145\/115952.115959"},{"doi-asserted-by":"crossref","unstructured":"Joseph D, Grunwald D. Prefetching using Markov predictors. In Proc. the 24th International Symposium on Computer Architecture, Denver-Colorado, 1997, pp.252\u2013263.","key":"9090_CR7","DOI":"10.1145\/264107.264207"},{"doi-asserted-by":"crossref","unstructured":"Gokul Kandiraju, Anand Sivasubramaniam. Going the distance for TLB prefetching: An application-driven study. In Proc. the International Symposium on Computer Architecture, Anchorage, Alaska, 2002, p.195.","key":"9090_CR8","DOI":"10.1145\/545214.545237"},{"doi-asserted-by":"crossref","unstructured":"Alexander T, Kedem G. Distributed predictive cache design for high performance memory system. In Proc. the 2nd International Symposium on High Performance Computer Architecture (HPCA), San Jose, CA, 1996, pp.254\u2013263.","key":"9090_CR9","DOI":"10.1109\/HPCA.1996.501191"},{"doi-asserted-by":"crossref","unstructured":"Collins J, Tullsen D, Wang H, Shen J. Dynamic speculative precomputation. In Proc. the 34th International Symposium on Microarchitecture, Austin, Texas, 2001, pp.306\u2013317.","key":"9090_CR10","DOI":"10.1109\/MICRO.2001.991128"},{"doi-asserted-by":"crossref","unstructured":"Wessam Hassanein, Jos\u00e9 Fortes, Rudolf Eigenmann. Data forwarding through in-memory precomputation threads. In Proc. the International Conference on Supercomputing (ICS), 2004.","key":"9090_CR11","DOI":"10.1145\/1006209.1006239"},{"unstructured":"Hughes C J. Prefetching linked data structures in systems with merged DRAM-logic [Thesis]. University of Illinois at Urbana-Champaign, Technical Report UIUCDCS-R-2001-2221, May 2000.","key":"9090_CR12"},{"doi-asserted-by":"crossref","unstructured":"Liao S, Wang P, Wang H, Hoflehner G, Lavery D, Shen J. Post-pass binary adaptation tool for software-based speculative precomputation. In Proc. the ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI'02), Berlin, Germany, 2002, pp.117\u2013128.","key":"9090_CR13","DOI":"10.1145\/512541.512544"},{"doi-asserted-by":"crossref","unstructured":"Chi-Keung Luk. Tolerating memory latency through software-controlled pre-execution in simultaneous multithreading processors. In Proc. the 28th Annual International Symposium on Computer Architecture, G\u00f6eborg, Sweden, 2001, pp.40\u201351.","key":"9090_CR14","DOI":"10.1145\/379240.379250"},{"doi-asserted-by":"crossref","unstructured":"Amir Roth, Gurindar S Sohi. Speculative data-driven multithreading. In Proc. the 7th International Symposium on High Performance Computer Architecture, Nuevo Lenone, Mexico, 2001, p.37.","key":"9090_CR15","DOI":"10.1109\/HPCA.2001.903250"},{"doi-asserted-by":"crossref","unstructured":"Craig Zilles, Gurindar Sohi. Execution-based prediction using speculative slices. In Proc. the 28th Annual International Symposium on Computer Architecture (ISCA), G\u00f6eborg, Sweden, 2001, pp.2\u201313.","key":"9090_CR16","DOI":"10.1145\/379240.379246"},{"doi-asserted-by":"crossref","unstructured":"Yang C L, Lebeck A R. Push vs. pull: Data movement for linked data structures. In Proc. the International Conference on Supercomputing (ICS), Santa Fe, New Mexcio, 2000, pp.176\u2013186, pp.176\u2013186.","key":"9090_CR17","DOI":"10.1145\/335231.335248"},{"doi-asserted-by":"crossref","unstructured":"James E Smith. Decoupled access\/execute computer architectures. In Proc. the 9th Annual International Symposium on Computer Architecture (ISCA), Gold Coast, Queensland, 1982, pp.112\u2013119.","key":"9090_CR18","DOI":"10.1145\/1067649.801719"},{"unstructured":"Culler D, Singh J P, Gupta A. Parallel Computer Architecture: A Hardware\/Software Approach. Morgan Kaufmann, ISBN 1558603433, August 1998.","key":"9090_CR19"},{"unstructured":"Xian-He Sun, Surendra Byna. Data-access memory servers for multi-processor environments. IIT CS TR-2005-001, November 2005, http:\/\/www.cs.iit.edu\/\u223csuren\/research.html .","key":"9090_CR20"},{"unstructured":"Burger D C, Austin T M, Bennett S. Evaluating future microprocessors: The SimpleScalar tool set. Technical Report 1308, University of Wisconsin-Madison Computer Sciences, 1996.","key":"9090_CR21"},{"doi-asserted-by":"crossref","unstructured":"Surendra Byna, Xian-He Sun, William Gropp, Rajeev Thakur. Predicting the memory-access cost based on data access patterns. In Proc. the IEEE International Conference on Cluster Computing, San Diego, September 2004, pp.327\u2013336.","key":"9090_CR22","DOI":"10.1109\/CLUSTR.2004.1392630"},{"doi-asserted-by":"crossref","unstructured":"Annavaram M, Patel J M, Davidson E S. Data prefetching by dependence graph pre-computation. In Proc. the 28th International Symposium on Computer Architecture (ISCA), G\u00f6eborg, Sweden, 2001, pp.52\u201361.","key":"9090_CR23","DOI":"10.1145\/379240.379251"},{"doi-asserted-by":"crossref","unstructured":"Kohout N, Choi S, Kim D, Yeung D. Multi-chain prefetching: Effective exploitation of inter-chain memory parallelism for pointer-chasing codes. In Proc. the 10th International Conference on Parallel Architectures and Compilation Techniques, Barcelona, Spain, 2001, pp.268\u2013279.","key":"9090_CR24","DOI":"10.1109\/PACT.2001.953307"},{"doi-asserted-by":"crossref","unstructured":"Roth A, Moshovos A, Sohi G S. Dependence based prefetching for linked data structures. In Proc. the 8th International Conference on Architectural Support for Programming Languages and Operating Systems, San Jose, CA, 1998, pp.115\u2013126.","key":"9090_CR25","DOI":"10.1145\/291069.291034"},{"doi-asserted-by":"crossref","unstructured":"Ilya Ganusov, Martin Burtscher. Future execution: A hardware prefetching technique for chip multiprocessors. In Proc. the 14th Annual International Conference on Parallel Architectures and Compilation Techniques (PACT'05), Saint Louis, MO, 2005, pp.350\u2013360.","key":"9090_CR26","DOI":"10.1109\/PACT.2005.23"},{"doi-asserted-by":"crossref","unstructured":"Conway J H, Guy R K. The Book of Numbers. Springer-Verlag, New York, 1996, ISBN: 038797993X.","key":"9090_CR27","DOI":"10.1007\/978-1-4612-4072-3"},{"unstructured":"Box G E P X, Jenkins G M, Reinsel G C. Time Series Analysis: Forecasting and Control. 3rd ed, Prentice Hall, 1994.","key":"9090_CR28"},{"doi-asserted-by":"crossref","unstructured":"Jack Doweck. Inside Intel core microarchitecture and smart memory access. White paper, Intel Research website, Available online at http:\/\/download.intel.com\/technology\/architecture\/sma.pdf , 2006.","key":"9090_CR29","DOI":"10.1109\/HOTCHIPS.2006.7477876"},{"unstructured":"Sun Microsystems. UltraSPARC IV Processor Architecture Overview. www.sun.com\/processors\/white-papers\/us4_whitepaper.pdf","key":"9090_CR30"},{"unstructured":"IBM. Cell Broadband Engine resource center. http:\/\/www-128.ibm.com\/developerworks\/power\/cell\/ .","key":"9090_CR31"},{"doi-asserted-by":"crossref","unstructured":"Thomas R Puzak, A Hartstein, P G Emma, V Srinivasan. When prefetching improves\/degrades performance. In Proc. the 2nd Conference on Computing Frontiers, Ischia, Italy, May 04\u201306, 2005, pp.342\u2013352.","key":"9090_CR32","DOI":"10.1145\/1062261.1062317"},{"unstructured":"Standard Performance Evaluation Corporation. SPEC Benchmarks, http:\/\/www.spec.org\/ .","key":"9090_CR33"},{"issue":"1","key":"9090_CR34","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/77626.79170","volume":"16","author":"Jack J Dongarra","year":"1990","unstructured":"Jack J Dongarra, Jeremy Du Croz, Sven Hammarling, Iain Duff. A set of level 3 basic linear algebra subprograms. ACM Transactions on Mathematical Software, 1990, 16(1): 1\u201317.","journal-title":"ACM Transactions on Mathematical Software"},{"unstructured":"John D McCalpin. Memory bandwidth and machine balance in current high performance computers. IEEE Technical Committee on Computer Architecture, 1995, http:\/\/www.cs.virginia.edu\/stream .","key":"9090_CR35"},{"doi-asserted-by":"crossref","unstructured":"Sherwood T, Perelman E, Calder B. Basic block distribution analysis to find periodic behavior and simulation points in applications. In Proc. the International Conference on Parallel Architectures and Compilation Techniques, Barcelona, Spain, 2001, pp.3\u201314.","key":"9090_CR36","DOI":"10.1109\/PACT.2001.953283"},{"issue":"7","key":"9090_CR37","doi-asserted-by":"crossref","first-page":"733","DOI":"10.1109\/71.395402","volume":"6","author":"F Dahlgren","year":"1995","unstructured":"Dahlgren F, Dubois M, Stenstr\u00f6m P. Sequential hardware prefetching in shared-memory multiprocessors. IEEE Transactions on Parallel and Distributed Systems, 1995, 6(7): 733\u2013746.","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"doi-asserted-by":"crossref","unstructured":"Yue Liu, David R Kaeli. Branch-directed and stride-based data cache prefetching. In Proc. the 1996 International Conference on Computer Design, VLSI in Computers and Processors, October 7\u20139, 1996, pp.225\u2013230.","key":"9090_CR38","DOI":"10.1109\/ICCD.1996.563561"},{"issue":"2","key":"9090_CR39","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1016\/0743-7315(91)90014-Z","volume":"12","author":"T Mowry","year":"June 1991","unstructured":"Mowry T, Gupta A. Tolerating latency through software-controlled prefetching in shared-memory multiprocessors. Journal of Parallel and Distributed Computing, June 1991, 12(2): 87\u2013106.","journal-title":"Journal of Parallel and Distributed Computing"},{"issue":"2","key":"9090_CR40","doi-asserted-by":"crossref","first-page":"218","DOI":"10.1109\/12.752663","volume":"48","author":"V S Pai","year":"February 1999","unstructured":"Pai V S, Ranganathan P, Abdel-Shafi H, Adve S. The impact of exploiting instruction-level parallelism on shared-memory multiprocessors. IEEE Transactions on Computers, February 1999, 48(2): 218\u2013226.","journal-title":"IEEE Transactions on Computers"},{"doi-asserted-by":"crossref","unstructured":"Zhou H. Dual-core execution: Building a highly scalable single-thread instruction window. In Proc. the 2005 International Conference on Parallel Architectures and Compilation Techniques (PACT'05), Saint Louis, MO, 2005, pp.231\u2013242.","key":"9090_CR41","DOI":"10.1109\/PACT.2005.18"},{"doi-asserted-by":"crossref","unstructured":"Solihin Y, Lee J, Torrellas J. Using a user-level memory thread for correlation prefetching. In Proc. International Symposium on Computer Architecture, Anchorage, Alaska, May 2002, pp.171\u2013182.","key":"9090_CR42","DOI":"10.1109\/ISCA.2002.1003576"}],"container-title":["Journal of Computer Science and Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-007-9090-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11390-007-9090-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-007-9090-y","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,21]],"date-time":"2025-01-21T01:16:15Z","timestamp":1737422175000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11390-007-9090-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007,9]]},"references-count":42,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2007,9]]}},"alternative-id":["9090"],"URL":"https:\/\/doi.org\/10.1007\/s11390-007-9090-y","relation":{},"ISSN":["1000-9000","1860-4749"],"issn-type":[{"type":"print","value":"1000-9000"},{"type":"electronic","value":"1860-4749"}],"subject":[],"published":{"date-parts":[[2007,9]]}}}