{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,4,4]],"date-time":"2022-04-04T02:33:55Z","timestamp":1649039635734},"reference-count":17,"publisher":"Elsevier BV","issue":"1","license":[{"start":{"date-parts":[[1998,12,1]],"date-time":"1998-12-01T00:00:00Z","timestamp":912470400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Journal of Systems and Software"],"published-print":{"date-parts":[[1998,12]]},"DOI":"10.1016\/s0164-1212(98)10040-7","type":"journal-article","created":{"date-parts":[[2003,4,4]],"date-time":"2003-04-04T21:09:52Z","timestamp":1049490592000},"page":"17-29","source":"Crossref","is-referenced-by-count":0,"title":["Classifying and alleviating the communication overheads in matrix computations on large-scale NUMA multiprocessors"],"prefix":"10.1016","volume":"44","author":[{"given":"Yi-Min","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hsiao-Hsi","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruei-Chuan","family":"Chang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/S0164-1212(98)10040-7_BIB1","doi-asserted-by":"crossref","unstructured":"Agarwal, A., Bianchini, R., Chaiken, D., Johnson, K.L., Kranz, K., Kubiatowicz, J., Lim, B.H., Mackenzie, K., Yeung, D., 1995. The MIT Alewife machine: Architecture and performance. Proceedings of the 22nd International Symposium on Computer Architecture. pp. 2\u201313","DOI":"10.1145\/223982.223985"},{"key":"10.1016\/S0164-1212(98)10040-7_BIB2","doi-asserted-by":"crossref","unstructured":"Bianchini, R., Crovella, M.E., Kontothanassis, L., LeBlanc, T.J., 1994. Software interleaving. Proceedings of the 1994 Symposium on Parallel and Distributed Processing, pp. 56\u201365","DOI":"10.1109\/SPDP.1994.346181"},{"key":"10.1016\/S0164-1212(98)10040-7_BIB3","doi-asserted-by":"crossref","unstructured":"Dubnicki, C., 1993. The effects of block size on the performance of coherent caches in shared-memory multiprocessors. Ph.D. Thesis, University of Rochester, Computer Science Department","DOI":"10.21236\/ADA272838"},{"key":"10.1016\/S0164-1212(98)10040-7_BIB4","doi-asserted-by":"crossref","unstructured":"Harzallah, K., Sevcik, K.C., 1993. Hot spot analysis in large scale shared memory multiprocessors. Proceedings of the Supercomputing '93 Conference, pp. 895\u2013905","DOI":"10.1145\/169627.169857"},{"key":"10.1016\/S0164-1212(98)10040-7_BIB5","unstructured":"Hennessy, J.L., Patterson, D.A., 1990. Computer Architecture: A Quantitative Approach. Morgan Kaufmann, Los Altos, CA"},{"key":"10.1016\/S0164-1212(98)10040-7_BIB6","doi-asserted-by":"crossref","unstructured":"Lenoski, D., Laudon, J., Joe, T., Nakahira, D., Stevens, L., Gupta, A., and Hennessy, J., 1992. The Dash prototype: Implementation and performance. The 19th Annual International Symposium on Computer Architecture. pp. 92\u2013103","DOI":"10.1109\/ISCA.1992.753307"},{"key":"10.1016\/S0164-1212(98)10040-7_BIB7","doi-asserted-by":"crossref","unstructured":"Li, H., Tandri, S., Stumn, M., Sevcik, K.C., 1993. Locality and loop scheduling on NUMA multiprocessors. International Conference on Parallel Processing. pp. 140\u2013147","DOI":"10.1109\/ICPP.1993.112"},{"key":"10.1016\/S0164-1212(98)10040-7_BIB8","doi-asserted-by":"crossref","unstructured":"Markatos, E.P., LeBlanc, T.J., 1992. Shared-memory multiprocessors trends and the implications for parallel program performance. Technical Report 420. Computer Science Department, University of Rochester","DOI":"10.1109\/SPDP.1992.242736"},{"issue":"4","key":"10.1016\/S0164-1212(98)10040-7_BIB9","doi-asserted-by":"crossref","first-page":"379","DOI":"10.1109\/71.273046","article-title":"Using processor affinity in loop scheduling on shared-memory multiprocessors","volume":"5","author":"Markatos","year":"1994","journal-title":"IEEE Trans. on Parallel and Distributed Systems"},{"issue":"12","key":"10.1016\/S0164-1212(98)10040-7_BIB10","doi-asserted-by":"crossref","first-page":"1425","DOI":"10.1109\/TC.1987.5009495","article-title":"Guided self-scheduling: A practical scheduling scheme for parallel supercomputers","volume":"C\/36","author":"Polychronopoulos","year":"1987","journal-title":"IEEE Trans. on Computers"},{"key":"10.1016\/S0164-1212(98)10040-7_BIB11","unstructured":"Ramanujam, J., Sadayappan, P., 1990. Tiling of iteration spaces for multiprocessors. International Conference on Parallel Processing. pp. 178\u2013186"},{"key":"10.1016\/S0164-1212(98)10040-7_BIB12","doi-asserted-by":"crossref","unstructured":"Tzen, T.H., Ni, L.M., 1993. Trapezoid self-scheduling: A practical scheduling scheme for parallel compilers. IEEE Trans. on Parallel and Distributed Systems 4 (1), 87\u201398","DOI":"10.1109\/71.205655"},{"key":"10.1016\/S0164-1212(98)10040-7_BIB13","doi-asserted-by":"crossref","unstructured":"Veenstra, J.E., Fowler, R.J., 1994. MINT Tutorial and User Manual. Technical Report 452. Computer Science Department, University of Rochester","DOI":"10.1109\/MASCOT.1994.284422"},{"issue":"1","key":"10.1016\/S0164-1212(98)10040-7_BIB14","doi-asserted-by":"crossref","first-page":"72","DOI":"10.1109\/2.67196","article-title":"Hector: A hierarchically structured shared-memory multiprocessor","volume":"24","author":"Vranesic","year":"1991","journal-title":"IEEE Computer"},{"key":"10.1016\/S0164-1212(98)10040-7_BIB15","unstructured":"Vranesic, Z., Brown, S., Stumm, S., Caranci, S., Grbic, A., Grindley, R., Gusat, M., Krieger, O., Lemieux, G., Loveless, K., Manjikian, N., Zilic, Z., Abdelrahman, T., Gamsa, B., Pereira, P., Sevcik, K., Elkateeb, A., Srbljic, S., 1995. The NUMAchine Multiprocessor. Technical Report CSRI-324. Toronto University, Computer Systems Research Institute"},{"issue":"2","key":"10.1016\/S0164-1212(98)10040-7_BIB16","doi-asserted-by":"crossref","first-page":"231","DOI":"10.1142\/S0129053395000130","article-title":"A minimal synchronization overhead affinity scheduling algorithm for shared-memory multiprocessor","volume":"7","author":"Wang","year":"1995","journal-title":"International Journal of High Speed Computing"},{"key":"10.1016\/S0164-1212(98)10040-7_BIB17","doi-asserted-by":"crossref","unstructured":"Wang, Y.M., Wang, H.H., Chang, R.C., 1997. Clustered affinity scheduling on large-scale NUMA multiprocessors. The Journal of Systems and Software 39 (1), 61\u201370","DOI":"10.1016\/S0164-1212(96)00163-X"}],"container-title":["Journal of Systems and Software"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0164121298100407?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0164121298100407?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2019,4,19]],"date-time":"2019-04-19T01:56:19Z","timestamp":1555638979000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0164121298100407"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1998,12]]},"references-count":17,"journal-issue":{"issue":"1","published-print":{"date-parts":[[1998,12]]}},"alternative-id":["S0164121298100407"],"URL":"https:\/\/doi.org\/10.1016\/s0164-1212(98)10040-7","relation":{},"ISSN":["0164-1212"],"issn-type":[{"value":"0164-1212","type":"print"}],"subject":[],"published":{"date-parts":[[1998,12]]}}}