{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,4,17]],"date-time":"2024-04-17T22:40:04Z","timestamp":1713393604590},"reference-count":59,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2012,1,17]],"date-time":"2012-01-17T00:00:00Z","timestamp":1326758400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2012,10]]},"DOI":"10.1007\/s11227-011-0735-9","type":"journal-article","created":{"date-parts":[[2012,1,16]],"date-time":"2012-01-16T13:39:27Z","timestamp":1326721167000},"page":"510-549","source":"Crossref","is-referenced-by-count":9,"title":["Active memory controller"],"prefix":"10.1007","volume":"62","author":[{"given":"Zhen","family":"Fang","sequence":"first","affiliation":[]},{"given":"Lixin","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"John B.","family":"Carter","sequence":"additional","affiliation":[]},{"given":"Sally A.","family":"McKee","sequence":"additional","affiliation":[]},{"given":"Ali","family":"Ibrahim","sequence":"additional","affiliation":[]},{"given":"Michael A.","family":"Parker","sequence":"additional","affiliation":[]},{"given":"Xiaowei","family":"Jiang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2012,1,17]]},"reference":[{"key":"735_CR1","doi-asserted-by":"crossref","first-page":"132","DOI":"10.1109\/HPCA.2005.30","volume-title":"Proceedings of the eleventh annual symposium on high performance computer architecture","author":"JH Ahn","year":"2005","unstructured":"Ahn JH, Erez M, Dally WJ (2005) Scatter-add in data parallel architectures. In: Proceedings of the eleventh annual symposium on high performance computer architecture, Feb 2005, pp 132\u2013142"},{"key":"735_CR2","first-page":"266","volume-title":"Proceedings of the 25th VLDB conference","author":"A Ailamaki","year":"1999","unstructured":"Ailamaki A, DeWitt D, Hill M, Wood DA (1999) DBMSs on a modern processor: where does time go. In: Proceedings of the 25th VLDB conference, Edinburgh, Scotland, Sept 1999, pp 266\u2013277"},{"key":"735_CR3","first-page":"194","volume-title":"Proceedings of the 1995 international conference on parallel architectures and compilation techniques","author":"DH Albonesi","year":"1995","unstructured":"Albonesi DH, Koren I (1995) An analytical model of high performance superscalar-based multiprocessors. In: Proceedings of the 1995 international conference on parallel architectures and compilation techniques, Sept 1995, pp 194\u2013203"},{"issue":"1","key":"735_CR4","doi-asserted-by":"crossref","first-page":"6","DOI":"10.1109\/71.80120","volume":"1","author":"T Anderson","year":"1990","unstructured":"Anderson T (1990) The performance of spin lock alternatives for shared-memory multiprocessors. IEEE Trans Parallel Distrib Syst 1(1):6\u201316","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"735_CR5","first-page":"3","volume-title":"Proceedings of the 25th annual international symposium on computer architecture","author":"LA Barroso","year":"1998","unstructured":"Barroso LA, Gharachorloo K, Bugnion E (1998) Memory system characterization of commercial workloads. In: Proceedings of the 25th annual international symposium on computer architecture, Barcelona, Spain, pp 3\u201314"},{"key":"735_CR6","first-page":"331","volume-title":"Proceedings of IEEE\/ACM 37th international symposium on microarchitecture","author":"C Batten","year":"2004","unstructured":"Batten C, Krashinsky R, Gerding S, Asanovic K (2004) Cache refill\/access decoupling for vector machines. In: Proceedings of IEEE\/ACM 37th international symposium on microarchitecture, Dec 2004, pp 331\u2013342"},{"key":"735_CR7","first-page":"23","volume-title":"Proceedings of the 20th international symposium on parallel algorithms and architectures","author":"G Blelloch","year":"2008","unstructured":"Blelloch G, Gibbons P, Vardhan S (2008) Combinable memory-block transactions. In: Proceedings of the 20th international symposium on parallel algorithms and architectures, June 2008, pp 23\u201334"},{"key":"735_CR8","first-page":"54","volume-title":"Proceedings of the 25th VLDB conference","author":"PA Boncz","year":"1999","unstructured":"Boncz PA, Manegold S, Kersten ML (1999) Database architecture optimized for the new bottleneck: memory access. In: Proceedings of the 25th VLDB conference, Edinburgh, Scotland, Sept 1999, pp 54\u201365"},{"key":"735_CR9","doi-asserted-by":"crossref","first-page":"454","DOI":"10.1145\/305138.305234","volume-title":"Proceedings of the 1999 international conference on supercomputing","author":"JB Brockman","year":"1999","unstructured":"Brockman JB, Kogge PM, Sterling TL, Freeh VW, Kuntz SK (1999) Microservers: a new memory semantics for massively parallel computing. In: Proceedings of the 1999 international conference on supercomputing, June 1999, pp 454\u2013463"},{"key":"735_CR10","first-page":"365","volume-title":"Proceedings of the 12th international workshop on languages and compilers for parallel computing","author":"C Cascaval","year":"1999","unstructured":"Cascaval C, Rose LD, Padua DA, Reed DA (1999) Compile-time based performance prediction. In: Proceedings of the 12th international workshop on languages and compilers for parallel computing, pp 365\u2013379"},{"issue":"1","key":"735_CR11","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1147\/rd.191.0043","volume":"19","author":"KM Chandy","year":"1975","unstructured":"Chandy KM, Herzog U, Woo LS (1975) Approximate analysis of general queuing networks. IBM J Res Dev 19(1):43\u201349","journal-title":"IBM J Res Dev"},{"key":"735_CR12","doi-asserted-by":"crossref","first-page":"666","DOI":"10.1109\/SUPERC.1990.130084","volume-title":"Proceedings of supercomputing \u201990","author":"S Chatterjee","year":"1990","unstructured":"Chatterjee S, Blelloch G, Zagha M (1990) Scan primitives for vector computers. In: Proceedings of supercomputing \u201990, June 1990, pp 666\u2013675"},{"key":"735_CR13","unstructured":"Fang Z (2006) Active memory operations. PhD thesis, University of Utah"},{"key":"735_CR14","doi-asserted-by":"crossref","first-page":"1158","DOI":"10.1016\/j.jpdc.2005.04.013","volume":"65","author":"Z Fang","year":"2005","unstructured":"Fang Z, Zhang L, Cheng L, Carter J, Parker M (2005) Fast synchronization on shared-memory multiprocessors: an architectural approach. J Parallel Distrib Comput 65:1158\u20131170","journal-title":"J Parallel Distrib Comput"},{"key":"735_CR15","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1109\/PACT.2001.953304","volume-title":"Proceedings of the 2001 international conference on parallel architectures and compilation techniques","author":"M Garzaran","year":"2001","unstructured":"Garzaran M, Prvulovic M, Zhang Y, Jula A, Yu H, Rauchwerger L, Torrellas J (2001) Architectural support for parallel reductions in scalable shared-memory multiprocessors. In: Proceedings of the 2001 international conference on parallel architectures and compilation techniques, Sept 2001, pp 243\u2013254"},{"issue":"2","key":"735_CR16","doi-asserted-by":"crossref","first-page":"164","DOI":"10.1145\/69624.357206","volume":"5","author":"A Gottlieb","year":"1983","unstructured":"Gottlieb A, Grishman R, Kruskal C, McAuliffe K, Rudolph L, Snir M (1983) The NYU multicomputer\u2014designing a MIMD shared-memory parallel machine. ACM Trans Program Lang Syst 5(2):164\u2013189","journal-title":"ACM Trans Program Lang Syst"},{"key":"735_CR17","volume-title":"The benchmark handbook for database and transaction systems, Chap\u00a06","year":"1993","unstructured":"Gray J (ed) (1993) The benchmark handbook for database and transaction systems, Chap\u00a06, 2nd edn. Morgan Kaufmann, San Mateo","edition":"2"},{"key":"735_CR18","volume-title":"Supercomputing\u201999","author":"M Hall","year":"1999","unstructured":"Hall M, Kogge P, Koller J, Diniz P, Chame J, Draper J, LaCoss J, Granacki J, Brockman J, Srivastava A, Athas W, Freeh V (1999) Mapping irregular appilcations to DIVA, a PIM-based data-intensive architecture. In: Supercomputing\u201999, Nov 1999"},{"key":"735_CR19","first-page":"365","volume-title":"Proceedings of the ninth annual symposium on high performance computer architecture","author":"M Hao","year":"2003","unstructured":"Hao M, Heinrich M (2003) Active I\/O switches in system area networks. In: Proceedings of the ninth annual symposium on high performance computer architecture, Feb 2003, pp 365\u2013376"},{"key":"735_CR20","unstructured":"Hewlett-Packard Inc (2011) The open source database benchmark"},{"key":"735_CR21","unstructured":"Intel Corporation (2011) Intel Itanium2 processor reference manual"},{"key":"735_CR22","unstructured":"International Technology Roadmap for Semiconductors (2011) Executive summary 2003 edition. http:\/\/public.itrs.net\/Files\/2003ITRS\/Home2003.htm"},{"issue":"2","key":"735_CR23","doi-asserted-by":"crossref","first-page":"40","DOI":"10.1109\/MM.2004.1289290","volume":"24","author":"R Kalla","year":"2004","unstructured":"Kalla R, Sinharoy B, Tendler JM (2004) IBM Power5 chip: a dual-core multithreaded processor. IEEE MICRO 24(2):40\u201347","journal-title":"IEEE MICRO"},{"key":"735_CR24","volume-title":"Workshop on workload characterization","author":"K Keeton","year":"1999","unstructured":"Keeton K, Patterson DA (1999) Towards a simplified database workloads for computer architecture evaluations. In: Workshop on workload characterization, Austin, TX, USA, Oct 1999"},{"issue":"2","key":"735_CR25","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1109\/40.755465","volume":"19","author":"RE Kessler","year":"1999","unstructured":"Kessler RE (1999) The Alpha 21264 microprocessor. IEEE MICRO 19(2):24\u201336","journal-title":"IEEE MICRO"},{"issue":"3","key":"735_CR26","doi-asserted-by":"crossref","first-page":"288","DOI":"10.1109\/TC.2004.1261836","volume":"53","author":"D Kim","year":"2004","unstructured":"Kim D, Chaudhuri M, Heinrich M, Speight E (2004) Architectural support for uniprocessor and multiprocessor active memory systems. IEEE Trans Comput 53(3):288\u2013307","journal-title":"IEEE Trans Comput"},{"key":"735_CR27","unstructured":"Koester D, Kepner J (2003) HPCS assessment framework and benchmarks. MITRE and MIT Lincoln Laboratory, Mar 2003"},{"key":"735_CR28","volume-title":"International conference on parallel processing","author":"P Kogge","year":"1994","unstructured":"Kogge P (1994) The EXECUBE approach to massively parallel processing. In: International conference on parallel processing, Aug 1994"},{"key":"735_CR29","first-page":"441","volume-title":"Proceedings of the 35th annual international symposium on computer architecture","author":"S Kumar","year":"2008","unstructured":"Kumar S et al (2008) Atomic vector operations on chip multiprocessors. In: Proceedings of the 35th annual international symposium on computer architecture, June 2008, pp 441\u2013452"},{"key":"735_CR30","doi-asserted-by":"crossref","first-page":"302","DOI":"10.1109\/ISCA.1994.288140","volume-title":"Proceedings of the 21st annual international symposium on computer architecture","author":"J Kuskin","year":"1994","unstructured":"Kuskin J et al (1994) The Stanford FLASH multiprocessor. In: Proceedings of the 21st annual international symposium on computer architecture, Chicago, IL, USA, May 1994, pp 302\u2013313"},{"key":"735_CR31","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1145\/264107.264206","volume-title":"ISCA97","author":"J Laudon","year":"1997","unstructured":"Laudon J, Lenoski D (1997) The SGI Origin: a ccNUMA highly scalable server. In: ISCA97, Denver, CO, USA, June 1997, pp 241\u2013251"},{"key":"735_CR32","doi-asserted-by":"crossref","first-page":"2","DOI":"10.1145\/1005686.1005691","volume-title":"Proceedings of the international conference on measurement and modeling of computer systems (Sigmetrics \u201904)","author":"G Marin","year":"2004","unstructured":"Marin G, Mellor-Crummey JM (2004) Cross-architecture performance predictions for scientific applications using parameterized models. In: Proceedings of the international conference on measurement and modeling of computer systems (Sigmetrics \u201904), June 2004, pp 2\u201313"},{"key":"735_CR33","unstructured":"McCalpin J (1999) Stream: sustainable memory bandwidth in high performance computers. http:\/\/www.cs.virginia.edu\/stream\/"},{"issue":"1","key":"735_CR34","doi-asserted-by":"crossref","first-page":"21","DOI":"10.1145\/103727.103729","volume":"9","author":"JM Mellor-Crummey","year":"1991","unstructured":"Mellor-Crummey JM, Scott ML (1991) Algorithms for scalable synchronization on shared-memory multiprocessors. ACM Trans Comput Syst 9(1):21\u201365","journal-title":"ACM Trans Comput Syst"},{"issue":"3","key":"735_CR35","doi-asserted-by":"crossref","first-page":"249","DOI":"10.1023\/A:1011168003859","volume":"29","author":"DS Nikolopoulos","year":"2001","unstructured":"Nikolopoulos DS, Papatheodorou TA (2001) The architecture and operating system implications on the performance of synchronization on ccNUMA multiprocessors. Int J Parallel Program 29(3):249\u2013282","journal-title":"Int J Parallel Program"},{"key":"735_CR36","first-page":"192","volume-title":"Proceedings of the 25th annual international symposium on computer architecture","author":"M Oskin","year":"1998","unstructured":"Oskin M, Chong F, Sherwood T (1998) Active pages: a model of computation for intelligent memory. In: Proceedings of the 25th annual international symposium on computer architecture, Barcelona, Spain, June 1998, pp 192\u2013203"},{"issue":"2","key":"735_CR37","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1109\/40.592312","volume":"17","author":"D Patterson","year":"1997","unstructured":"Patterson D, Anderson T, Cardwell N, Fromm R, Keaton K, Kozyrakis C, Thomas R, Yelick K (1997) A case for Intelligent RAM: IRAM. IEEE MICRO 17(2):34\u201344","journal-title":"IEEE MICRO"},{"key":"735_CR38","volume-title":"11th symposium on high performance interconnects","author":"F Petrini","year":"2003","unstructured":"Petrini F, Fernandez J, Frachtenberg E, Coll S (2003) Scalable collective communication on the ASCI Q machine. In: 11th symposium on high performance interconnects, Stanford, CA USA, Aug 2003"},{"key":"735_CR39","unstructured":"Pinkston T, Agarwal A, Dally W, Duato J, Horst B, Smith TB (2002) What will have the greatest impact in 2010: the processor, the memory, or the interconnect? HPCA8 Panel Session, Feb 2002"},{"key":"735_CR40","doi-asserted-by":"crossref","first-page":"90","DOI":"10.1145\/232973.232984","volume-title":"Proceedings of the 23rd annual international symposium on computer architecture","author":"A Saulsbury","year":"1996","unstructured":"Saulsbury A, Pong F, Nowatzyk A (1996) Missing the memory wall: the case for processor\/memory integration. In: Proceedings of the 23rd annual international symposium on computer architecture, May 1996, pp 90\u2013101"},{"key":"735_CR41","first-page":"26","volume-title":"Proceedings of the 7th symposium on architectural support for programming languages and operating systems","author":"S Scott","year":"1996","unstructured":"Scott S (1996) Synchronization and communication in the T3E multiprocessor. In: Proceedings of the 7th symposium on architectural support for programming languages and operating systems, Cambridge, MA, USA, Oct 1996, pp 26\u201336"},{"key":"735_CR42","unstructured":"Shao M, Ailamaki A, Falsafi B (2003) DBmbench: fast and accurate database workload representation on modern microarchitecture. Technical Report CMU-CS-03-161, Carnegie Mellon University"},{"key":"735_CR43","unstructured":"Silicon Graphics, Inc (2001) SGI\u2122Origin\u21223000 Series Technical Report, Jan 2001"},{"key":"735_CR44","unstructured":"Silicon Graphics, Inc (2001) SN2-MIPS Communication Protocol Specification, Revision 0.12, Nov 2001"},{"issue":"11","key":"735_CR45","doi-asserted-by":"crossref","first-page":"1248","DOI":"10.1109\/12.966498","volume":"50","author":"Y Solihin","year":"2001","unstructured":"Solihin Y, Lee J, Torrellas J (2001) Automatic code mapping on an intelligent memory architecture. IEEE Trans Comput 50(11):1248\u20131266","journal-title":"IEEE Trans Comput"},{"key":"735_CR46","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1109\/ISCA.2002.1003576","volume-title":"Proceedings of the 29th annual international symposium on computer architecture","author":"Y Solihin","year":"2002","unstructured":"Solihin Y, Lee J, Torrellas J (2002) Using a user-level memory thread for correlation prefetching. In: Proceedings of the 29th annual international symposium on computer architecture, May 2002, pp 171\u2013182"},{"issue":"2","key":"735_CR47","doi-asserted-by":"crossref","first-page":"166","DOI":"10.1109\/TPDS.2003.1178880","volume":"14","author":"DJ Sorin","year":"2003","unstructured":"Sorin DJ, Lemon J, Eager DL, Vernon MK (2003) Analytic evaluation of shared-memory architectures. IEEE Trans Parallel Distrib Syst 14(2):166\u2013180","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"735_CR48","first-page":"380","volume-title":"Proceedings of the 25th annual international symposium on computer architecture","author":"DJ Sorin","year":"1998","unstructured":"Sorin DJ, Pai VS, Adve SV, Vernon MK, Wood DA (1998) Analytic evaluation of shared-memory systems with ILP processors. In: Proceedings of the 25th annual international symposium on computer architecture, Barcelona, Spain, June 1998, pp 380\u2013390"},{"key":"735_CR49","first-page":"84a","volume-title":"Proceedings of the international parallel and distributed processing symposium","author":"V Tipparaju","year":"2003","unstructured":"Tipparaju V, Nieplocha J, Panda D (2003) Fast collective operations using shared and remote memory access protocols on clusters. In: Proceedings of the international parallel and distributed processing symposium, Apr 2003, p 84a"},{"key":"735_CR50","first-page":"163","volume-title":"Proceedings of the international conference on measurement and modeling of computer systems (Sigmetrics \u201990)","author":"J Torrellas","year":"1990","unstructured":"Torrellas J, Hennessy JL, Weil T (1990) Analysis of critical architectural and program parameters in a hierarchical shared memory multiprocessor. In: Proceedings of the international conference on measurement and modeling of computer systems (Sigmetrics \u201990), May 1990, pp 163\u2013172"},{"key":"735_CR51","first-page":"15","volume-title":"Proceedings of the seventh annual symposium on high performance computer architecture","author":"J Torrellas","year":"2000","unstructured":"Torrellas J, Nguyen A-T, Yang L (2000) Toward a cost-effective DSM organization that exploits processor-memory integration. In: Proceedings of the seventh annual symposium on high performance computer architecture, Jan 2000, pp 15\u201325"},{"key":"735_CR52","unstructured":"TPC-D, Past, Present and Future: An Interview between Berni Schiefer, Chair of the TPC-D Subcommittee and Kim Shanley, TPC Chief Operating Officer. (2011). available from http:\/\/www.tpc.org\/"},{"key":"735_CR53","doi-asserted-by":"crossref","first-page":"256","DOI":"10.1109\/ISCA.1992.753322","volume-title":"Proceedings of the 19th annual international symposium on computer architecture","author":"T Eicken von","year":"1992","unstructured":"von Eicken T, Culler DE, Goldstein SC, Schauser KE (1992) Active messages: a mechanism for integrated communication and computation. In: Proceedings of the 19th annual international symposium on computer architecture, Gold Coast, Australia, May 1992, pp 256\u2013266"},{"key":"735_CR54","unstructured":"Yoo J, Yoo S, Choi K (2011) Active memory processor for network-on-chip based architecture. IEEE Trans Comput Apr 2011"},{"key":"735_CR55","unstructured":"Zhang L (2003) UVSIM reference manual. Technical Report UUCS-03-011, University of Utah, May 2003"},{"key":"735_CR56","volume-title":"International parallel and distributed processing symposium","author":"L Zhang","year":"2004","unstructured":"Zhang L, Fang Z, Carter JB (2004) Highly efficient synchronization based on active memory operations. In: International parallel and distributed processing symposium, Apr 2004"},{"issue":"11","key":"735_CR57","doi-asserted-by":"crossref","first-page":"1117","DOI":"10.1109\/12.966490","volume":"50","author":"L Zhang","year":"2001","unstructured":"Zhang L, Fang Z, Parker M, Mathew B, Schaelicke L, Carter J, Hsieh W, McKee S (2001) The impulse memory controller. IEEE Trans Comput 50(11):1117\u20131132","journal-title":"IEEE Trans Comput"},{"key":"735_CR58","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1109\/ICCD.2005.64","volume-title":"Proceedings of the 23th international conference on computer design","author":"L Zhao","year":"2005","unstructured":"Zhao L, Iyer R, Makineni S, Bhuyan L, Newell D (2005) Hardware support for bulk data movement in server platforms. In: Proceedings of the 23th international conference on computer design, Oct 2005, pp 53\u201360"},{"issue":"9","key":"735_CR59","doi-asserted-by":"crossref","first-page":"1187","DOI":"10.1109\/TC.2010.15","volume":"59","author":"I Zotov","year":"2010","unstructured":"Zotov I (2010) Distributed virtual bit-slice synchronizer: a scalable hardware barrier mechanism for n-dimensional meshes. IEEE Trans Comput 59(9):1187\u20131199","journal-title":"IEEE Trans Comput"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-011-0735-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-011-0735-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-011-0735-9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,17]],"date-time":"2024-04-17T21:36:43Z","timestamp":1713389803000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-011-0735-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,1,17]]},"references-count":59,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2012,10]]}},"alternative-id":["735"],"URL":"https:\/\/doi.org\/10.1007\/s11227-011-0735-9","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,1,17]]}}}