{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T22:43:24Z","timestamp":1772923404010,"version":"3.50.1"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319111964","type":"print"},{"value":"9783319111971","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-11197-1_4","type":"book-chapter","created":{"date-parts":[[2014,8,13]],"date-time":"2014-08-13T14:54:29Z","timestamp":1407941669000},"page":"42-56","source":"Crossref","is-referenced-by-count":4,"title":["Understanding the SIMD Efficiency of Graph Traversal on GPU"],"prefix":"10.1007","author":[{"given":"Yichao","family":"Cheng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hong","family":"An","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhitao","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Feng","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhaohui","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xia","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yi","family":"Peng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"4_CR1","unstructured":"10th dimacs implementation challenge, http:\/\/www.cc.gatech.edu\/dimacs10\/index.shtml (accessed: December 15, 2013)"},{"key":"4_CR2","unstructured":"9th dimacs implementation challenge, http:\/\/www.dis.uniroma1.it\/~challenge9\/download.shtml (accessed: December 15, 2013)"},{"key":"4_CR3","unstructured":"Stanford large network dataset collection, http:\/\/snap.stanford.edu\/data\/index.html (accessed: December 15, 2013)"},{"key":"4_CR4","unstructured":"Stanford network analysis platform, https:\/\/snap.stanford.edu\/snap\/index.html (accessed: December 15, 2013)"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Agarwal, V., Petrini, F., Pasetto, D., Bader, D.A.: Scalable graph exploration on multicore processors. In: Proceedings of the 2010 ACM\/IEEE International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201311. IEEE Computer Society (2010)","DOI":"10.1109\/SC.2010.46"},{"key":"4_CR6","doi-asserted-by":"crossref","unstructured":"Bader, D.A., Madduri, K.: Designing multithreaded algorithms for breadth-first search and st-connectivity on the Cray MTA-2. In: International Conference on Parallel Processing, ICPP 2006, pp. 523\u2013530. IEEE (2006)","DOI":"10.1109\/ICPP.2006.34"},{"key":"4_CR7","doi-asserted-by":"crossref","unstructured":"Beamer, S., Asanovic, K., Patterson, D.: Direction-optimizing breadth-first search. In: 2012 International Conference for High Performance Computing, Networking, Storage and Analysis (SC), pp. 1\u201310. IEEE (2012)","DOI":"10.1109\/SC.2012.50"},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Che, S., Boyer, M., Meng, J., Tarjan, D., Sheaffer, J.W., Lee, S.H., Skadron, K.: Rodinia: A benchmark suite for heterogeneous computing. In: IEEE International Symposium on Workload Characterization, IISWC 2009, pp. 44\u201354. IEEE (2009)","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"4_CR9","unstructured":"Cormen, T.H., Leiserson, C.E., Rivest, R.L., Stein, C., et al.: Introduction to algorithms, vol.\u00a02. MIT Press, Cambridge (2001)"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Deng, Y., Wang, B.D., Mu, S.: Taming irregular EDA applications on GPUs. In: IEEE\/ACM International Conference on Computer-Aided Design-Digest of Technical Papers, ICCAD 2009, pp. 539\u2013546. IEEE (2009)","DOI":"10.1145\/1687399.1687501"},{"key":"4_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1007\/978-3-540-77220-0_21","volume-title":"High Performance Computing \u2013 HiPC 2007","author":"P. Harish","year":"2007","unstructured":"Harish, P., Narayanan, P.J.: Accelerating large graph algorithms on the GPU using CUDA. In: Aluru, S., Parashar, M., Badrinath, R., Prasanna, V.K. (eds.) HiPC 2007. LNCS, vol.\u00a04873, pp. 197\u2013208. Springer, Heidelberg (2007)"},{"key":"4_CR12","unstructured":"Harish, P., Vineet, V., Narayanan, P.J.: Large graph algorithms for massively multithreaded architectures. Centre for Visual Information Technology, I. Institute of Information Technology, Hyderabad, India, Tech. Rep. IIIT\/TR\/2009\/74 (2009)"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Hassaan, M.A., Burtscher, M., Pingali, K.: Ordered vs. unordered: A comparison of parallelism and work-efficiency in irregular algorithms. In: Proceedings of the 16th ACM Symposium on Principles and Practice of Parallel Programming, pp. 3\u201312. ACM (2011)","DOI":"10.1145\/1941553.1941557"},{"issue":"12","key":"4_CR14","doi-asserted-by":"publisher","first-page":"655","DOI":"10.1016\/j.parco.2010.07.002","volume":"36","author":"K.A. Hawick","year":"2010","unstructured":"Hawick, K.A., Leist, A., Playne, D.P.: Parallel graph component labelling with gpus and cuda. Parallel Computing\u00a036(12), 655\u2013678 (2010)","journal-title":"Parallel Computing"},{"key":"4_CR15","doi-asserted-by":"crossref","unstructured":"Hong, S., Kim, S.K., Oguntebi, T., Olukotun, K.: Accelerating CUDA graph algorithms at maximum warp. In: Proceedings of the 16th ACM Symposium on Principles and Practice of Parallel Programming, pp. 267\u2013276. ACM (2011)","DOI":"10.1145\/1941553.1941590"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Hong, S., Oguntebi, T., Olukotun, K.: Efficient parallel graph exploration on multi-core CPU and GPU. In: 2011 International Conference on Parallel Architectures and Compilation Techniques (PACT), pp. 78\u201388. IEEE (2011)","DOI":"10.1109\/PACT.2011.14"},{"key":"4_CR17","unstructured":"Katz, G.J., Kider Jr., J.T.: All-pairs shortest-paths for large graphs on the GPU. In: Proceedings of the 23rd ACM SIGGRAPH\/EUROGRAPHICS Symposium on Graphics Hardware, pp. 47\u201355. Eurographics Association (2008)"},{"key":"4_CR18","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1145\/1594835.1504181","volume":"44","author":"M. Kulkarni","year":"2009","unstructured":"Kulkarni, M., Burtscher, M., Inkulu, R., Pingali, K., Cas\u00e7aval, C.: How much parallelism is there in irregular applications? ACM Sigplan Notices\u00a044, 3\u201314 (2009)","journal-title":"ACM Sigplan Notices"},{"key":"4_CR19","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1145\/1273442.1250759","volume":"42","author":"M. Kulkarni","year":"2007","unstructured":"Kulkarni, M., Pingali, K., Walter, B., Ramanarayanan, G., Bala, K., Chew, L.P.: Optimistic parallelism requires abstractions. ACM SIGPLAN Notices\u00a042, 211\u2013222 (2007)","journal-title":"ACM SIGPLAN Notices"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"Leiserson, C.E., Schardl, T.B.: A work-efficient parallel breadth-first search algorithm (or how to cope with the nondeterminism of reducers). In: Proceedings of the 22nd ACM Symposium on Parallelism in Algorithms and Architectures, pp. 303\u2013314. ACM (2010)","DOI":"10.1145\/1810479.1810534"},{"key":"4_CR21","doi-asserted-by":"crossref","unstructured":"Li, D., Becchi, M.: Deploying Graph Algorithms on GPUs: An Adaptive Solution. In: 2013 IEEE 27th International Symposium on Parallel and Distributed Processing, pp. 1013\u20131024 (May 2013), http:\/\/ieeexplore.ieee.org\/lpdocs\/epic03\/wrapper.htm?arnumber=6569881","DOI":"10.1109\/IPDPS.2013.101"},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Luo, L., Wong, M., Hwu, W.M.: An effective GPU implementation of breadth-first search. In: Proceedings of the 47th Design Automation Conference, pp. 52\u201355. ACM (2010)","DOI":"10.1145\/1837274.1837289"},{"key":"4_CR23","doi-asserted-by":"crossref","unstructured":"Merrill, D., Garland, M., Grimshaw, A.: High performance and scalable gpu graph traversal. Univ. of Virginia, Tech. Rep. UVA CS-2011-05 (2011)","DOI":"10.1145\/2145816.2145832"},{"key":"4_CR24","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1145\/2370036.2145832","volume":"47","author":"D. Merrill","year":"2012","unstructured":"Merrill, D., Garland, M., Grimshaw, A.: Scalable GPU graph traversal. ACM SIGPLAN Notices\u00a047, 117\u2013128 (2012)","journal-title":"ACM SIGPLAN Notices"},{"issue":"10","key":"4_CR25","doi-asserted-by":"publisher","first-page":"1381","DOI":"10.1109\/TPDS.2007.70811","volume":"19","author":"D.P. Scarpazza","year":"2008","unstructured":"Scarpazza, D.P., Villa, O., Petrini, F.: Efficient breadth-first search on the cell\/be processor. IEEE Transactions on Parallel and Distributed Systems\u00a019(10), 1381\u20131395 (2008)","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"issue":"6684","key":"4_CR26","doi-asserted-by":"publisher","first-page":"440","DOI":"10.1038\/30918","volume":"393","author":"D.J. Watts","year":"1998","unstructured":"Watts, D.J., Strogatz, S.H.: Collective dynamics of small-world networks. Nature\u00a0393(6684), 440\u2013442 (1998)","journal-title":"Nature"},{"key":"4_CR27","unstructured":"Xia, Y., Prasanna, V.K.: Topologically adaptive parallel breadth-first search on multicore processors. In: Proceedings of the 21st IASTED International Conference, vol.\u00a0668, p. 91 (2009)"},{"key":"4_CR28","doi-asserted-by":"crossref","unstructured":"Yoo, A., Chow, E., Henderson, K., McLendon, W., Hendrickson, B., Catalyurek, U.: A scalable distributed parallel breadth-first search algorithm on BlueGene\/L. In: Proceedings of the ACM\/IEEE SC 2005 Conference, Supercomputing, p. 25. IEEE (2005)","DOI":"10.1109\/SC.2005.4"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-11197-1_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,4]],"date-time":"2025-05-04T05:02:25Z","timestamp":1746334945000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-11197-1_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319111964","9783319111971"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-11197-1_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014]]}}}