{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T22:40:02Z","timestamp":1750545602411,"version":"3.41.0"},"publisher-location":"Cham","reference-count":16,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030794774"},{"type":"electronic","value":"9783030794781"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-79478-1_15","type":"book-chapter","created":{"date-parts":[[2021,6,22]],"date-time":"2021-06-22T16:04:37Z","timestamp":1624377877000},"page":"170-181","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Segmented Merge: A New Primitive for\u00a0Parallel Sparse Matrix Computations"],"prefix":"10.1007","author":[{"given":"Haonan","family":"Ji","sequence":"first","affiliation":[]},{"given":"Shibo","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Kaixi","family":"Hou","sequence":"additional","affiliation":[]},{"given":"Hao","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Weifeng","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Brian","family":"Vinter","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,6,23]]},"reference":[{"key":"15_CR1","unstructured":"Blelloch, G.E., Heroux, M.A., Zagha, M.: Segmented operations for sparse matrix computation on vector multiprocessors. Technical report, CMU (1993)"},{"issue":"1","key":"15_CR2","first-page":"1:1","volume":"38","author":"TA Davis","year":"2011","unstructured":"Davis, T.A., Hu, Y.: The university of florida sparse matrix collection. ACM Trans. Math. Softw. 38(1), 1:1-1:25 (2011)","journal-title":"ACM Trans. Math. Softw."},{"key":"15_CR3","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1016\/j.parco.2018.06.009","volume":"78","author":"M Deveci","year":"2018","unstructured":"Deveci, M., Trott, C., Rajamanickam, S.: Multithreaded sparse matrix-matrix multiplication for many-core and GPU architectures. Parallel Comput. 78, 33\u201346 (2018)","journal-title":"Parallel Comput."},{"key":"15_CR4","doi-asserted-by":"crossref","unstructured":"Dotsenko, Y., Govindaraju, N.K., Sloan, P.P., Boyd, C., Manferdelli, J.: Fast scan algorithms on graphics processors. In: Proceedings of the 22nd Annual International Conference on Supercomputing, ICS 2008, pp. 205\u2013213 (2008)","DOI":"10.1145\/1375527.1375559"},{"key":"15_CR5","doi-asserted-by":"crossref","unstructured":"Green, O., McColl, R., Bader, D.A.: GPU merge path: A GPU merging algorithm. In: Proceedings of the 26th ACM International Conference on Supercomputing, ICS 2012, pp. 331\u2013340 (2012)","DOI":"10.1145\/2304576.2304621"},{"issue":"4","key":"15_CR6","doi-asserted-by":"publisher","first-page":"C429","DOI":"10.1137\/17M1121378","volume":"40","author":"F Gremse","year":"2018","unstructured":"Gremse, F., K\u00fcpper, K., Naumann, U.: Memory-efficient sparse matrix-matrix multiplication by row merging on many-core architectures. SIAM J. Sci. Comput. 40(4), C429\u2013C449 (2018)","journal-title":"SIAM J. Sci. Comput."},{"issue":"3","key":"15_CR7","doi-asserted-by":"publisher","first-page":"250","DOI":"10.1145\/355791.355796","volume":"4","author":"FG Gustavson","year":"1978","unstructured":"Gustavson, F.G.: Two fast algorithms for sparse matrices: multiplication and permuted transposition. ACM Trans. Math. Softw. 4(3), 250\u2013269 (1978)","journal-title":"ACM Trans. Math. Softw."},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Hou, K., Liu, W., Wang, H., Feng, W.c.: Fast segmented sort on GPUs. In: Proceedings of the International Conference on Supercomputing, ICS 2017 (2017)","DOI":"10.1145\/3079079.3079105"},{"key":"15_CR9","doi-asserted-by":"crossref","unstructured":"Liu, J., He, X., Liu, W., Tan, G.: Register-based implementation of the sparse general matrix-matrix multiplication on GPUs. In: Proceedings of the 23rd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPoPP 2018, pp. 407\u2013408 (2018)","DOI":"10.1145\/3178487.3178529"},{"key":"15_CR10","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1007\/s10766-018-0604-8","volume":"47","author":"J Liu","year":"2019","unstructured":"Liu, J., He, X., Liu, W., Tan, G.: Register-aware optimizations for parallel sparse matrix-matrix multiplication. Int. J. Parallel Program. 47, 403\u2013417 (2019)","journal-title":"Int. J. Parallel Program."},{"key":"15_CR11","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1016\/j.jpdc.2015.06.010","volume":"85","author":"W Liu","year":"2015","unstructured":"Liu, W., Vinter, B.: A framework for general sparse matrix-matrix multiplication on GPUs and heterogeneous processors. J. Parallel Distrib. Comput. 85, 47\u201361 (2015)","journal-title":"J. Parallel Distrib. Comput."},{"key":"15_CR12","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1016\/j.parco.2015.04.004","volume":"49","author":"W Liu","year":"2015","unstructured":"Liu, W., Vinter, B.: Speculative segmented sum for sparse matrix-vector multiplication on heterogeneous processors. Parallel Comput. 49, 179\u2013193 (2015)","journal-title":"Parallel Comput."},{"key":"15_CR13","doi-asserted-by":"crossref","unstructured":"Liu, W., Vinter, B.: CSR5: An efficient storage format for cross-platform sparse matrix-vector multiplication. In: Proceedings of the 29th ACM on International Conference on Supercomputing, ICS 2015, pp. 339\u2013350 (2015)","DOI":"10.1145\/2751205.2751209"},{"key":"15_CR14","doi-asserted-by":"crossref","unstructured":"Nagasaka, Y., Nukada, A., Matsuoka, S.: High-performance and memory-saving sparse general matrix-matrix multiplication for NVIDIA pascal GPU. In: 2017 46th International Conference on Parallel Processing (ICPP), pp. 101\u2013110 (2017)","DOI":"10.1109\/ICPP.2017.19"},{"key":"15_CR15","doi-asserted-by":"crossref","unstructured":"Wang, H., Liu, W., Hou, K., Feng, W.C.: Parallel transposition of sparse data structures. In: Proceedings of the 2016 International Conference on Supercomputing, ICS 2016, pp. 33:1\u201333:13 (2016)","DOI":"10.1145\/2925426.2926291"},{"key":"15_CR16","doi-asserted-by":"crossref","unstructured":"Xie, Z., Tan, G., Liu, W., Sun, N.: IA-SpGEMM: an input-aware auto-tuning framework for parallel sparse matrix-matrix multiplication. In: Proceedings of the ACM International Conference on Supercomputing, ICS 2019, pp. 94\u2013105 (2019)","DOI":"10.1145\/3330345.3330354"}],"container-title":["Lecture Notes in Computer Science","Network and Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-79478-1_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T22:03:19Z","timestamp":1750543399000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-79478-1_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030794774","9783030794781"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-79478-1_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"23 June 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NPC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"IFIP International Conference on Network and Parallel Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Zhengzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 September 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 September 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"npc2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ncic.ac.cn\/npc2020\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}