{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,16]],"date-time":"2025-12-16T12:46:11Z","timestamp":1765889171505,"version":"3.40.3"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031408427"},{"type":"electronic","value":"9783031408434"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-40843-4_48","type":"book-chapter","created":{"date-parts":[[2023,8,24]],"date-time":"2023-08-24T12:02:32Z","timestamp":1692878552000},"page":"648-661","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Exploring the\u00a0Use of\u00a0Dataflow Architectures for\u00a0Graph Neural Network Workloads"],"prefix":"10.1007","author":[{"given":"Ryien","family":"Hosseini","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Filippo","family":"Simini","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Venkatram","family":"Vishwanath","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ramakrishnan","family":"Sivakumar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sanjif","family":"Shanmugavelu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhengyu","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lev","family":"Zlotnik","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingran","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Philip","family":"Colangelo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andrew","family":"Deng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Philip","family":"Lassen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shukur","family":"Pathan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,8,25]]},"reference":[{"key":"48_CR1","doi-asserted-by":"crossref","unstructured":"Abts, D., et al.: Think fast: a tensor streaming processor (tsp) for accelerating deep learning workloads. In: 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA), pp. 145\u2013158 (2020)","DOI":"10.1109\/ISCA45697.2020.00023"},{"issue":"1","key":"48_CR2","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1109\/MM.2019.2949986","volume":"40","author":"AA Awan","year":"2019","unstructured":"Awan, A.A., Jain, A., Chu, C.H., Subramoni, H., Panda, D.K.: Communication profiling and characterization of deep-learning workloads on clusters with high-performance interconnects. IEEE Micro 40(1), 35\u201343 (2019)","journal-title":"IEEE Micro"},{"key":"48_CR3","doi-asserted-by":"crossref","unstructured":"Baruah, T., et al.: GNNmark: a benchmark suite to characterize graph neural network training on GPUs. In: 2021 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), pp. 13\u201323. IEEE (2021)","DOI":"10.1109\/ISPASS51385.2021.00013"},{"issue":"10","key":"48_CR4","first-page":"1654","volume":"70","author":"M Blott","year":"2020","unstructured":"Blott, M., et al.: Evaluation of optimized CNNs on heterogeneous accelerators using a novel benchmarking approach. IEEE Trans. Comput. 70(10), 1654\u20131669 (2020)","journal-title":"IEEE Trans. Comput."},{"issue":"4","key":"48_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3358700","volume":"15","author":"M Blott","year":"2019","unstructured":"Blott, M., Halder, L., Leeser, M., Doyle, L.: QuTiBench: benchmarking neural networks on heterogeneous hardware. ACM J. Emerg. Technol. Comput. Syst. (JETC) 15(4), 1\u201338 (2019)","journal-title":"ACM J. Emerg. Technol. Comput. Syst. (JETC)"},{"key":"48_CR6","doi-asserted-by":"crossref","unstructured":"Chen, Z., Cao, Y., Liu, Y., Wang, H., Xie, T., Liu, X.: A comprehensive study on challenges in deploying deep learning based software. In: Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering, pp. 750\u2013762 (2020)","DOI":"10.1145\/3368089.3409759"},{"key":"48_CR7","doi-asserted-by":"publisher","first-page":"1900","DOI":"10.1016\/j.procs.2012.04.208","volume":"9","author":"M Ci\u017cnicki","year":"2012","unstructured":"Ci\u017cnicki, M., Kierzynka, M., Kopta, P., Kurowski, K., Gepner, P.: Benchmarking data and compute intensive applications on modern CPU and GPU architectures. Procedia Comput. Sci. 9, 1900\u20131909 (2012)","journal-title":"Procedia Comput. Sci."},{"key":"48_CR8","unstructured":"Corso, G., Cavalleri, L., Beaini, D., Li\u00f2, P., Veli\u010dkovi\u0107, P.: Principal neighbourhood aggregation for graph nets. In: Advances in Neural Information Processing Systems, vol. 33, pp. 13260\u201313271 (2020)"},{"issue":"1","key":"48_CR9","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1146\/annurev.cs.01.060186.001301","volume":"1","author":"DE Culler","year":"1986","unstructured":"Culler, D.E.: Dataflow architectures. Annu. Rev. Comput. Sci. 1(1), 225\u2013253 (1986)","journal-title":"Annu. Rev. Comput. Sci."},{"key":"48_CR10","unstructured":"Dang, V., Mohajerani, K., Gaj, K.: High-speed hardware architectures and fair FPGA benchmarking of CRYSTALS-kyber NTRU and saber. In: NIST 3rd PQC Standardization Conference (2021)"},{"key":"48_CR11","unstructured":"Dwivedi, V.P., Joshi, C.K., Laurent, T., Bengio, Y., Bresson, X.: Benchmarking graph neural networks. arXiv preprint arXiv:2003.00982 (2020)"},{"key":"48_CR12","unstructured":"Fey, M., Lenssen, J.E.: Fast graph representation learning with PyTorch geometric. arXiv preprint arXiv:1903.02428 (2019)"},{"issue":"10","key":"48_CR13","doi-asserted-by":"publisher","first-page":"3934","DOI":"10.1007\/s11227-015-1483-z","volume":"71","author":"J Filipovi\u010d","year":"2015","unstructured":"Filipovi\u010d, J., Madzin, M., Fousek, J., Matyska, L.: Optimizing CUDA code by kernel fusion: application on BLAS. J. Supercomput. 71(10), 3934\u20133957 (2015)","journal-title":"J. Supercomput."},{"key":"48_CR14","doi-asserted-by":"crossref","unstructured":"Gale, T., Zaharia, M., Young, C., Elsen, E.: Sparse GPU kernels for deep learning. In: SC20: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201314. IEEE (2020)","DOI":"10.1109\/SC41405.2020.00021"},{"key":"48_CR15","unstructured":"Gwennap, L.: Groq rocks neural networks. Microprocessor Report, Technical report (2020)"},{"key":"48_CR16","unstructured":"Hamilton, W., Ying, Z., Leskovec, J.: Inductive representation learning on large graphs. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"48_CR17","unstructured":"Hosseini, R., Simini, F., Vishwanath, V.: Operation-level performance benchmarking of graph neural networks for scientific applications. arXiv preprint arXiv:2207.09955 (2022)"},{"key":"48_CR18","unstructured":"Ivanov, S., Sviridov, S., Burnaev, E.: Understanding isomorphism bias in graph data sets (2019)"},{"key":"48_CR19","doi-asserted-by":"crossref","unstructured":"Karunaratne, M., Mohite, A.K., Mitra, T., Peh, L.S.: HyCUBE: A CGRA with reconfigurable single-cycle multi-hop interconnect. In: Proceedings of the 54th Annual Design Automation Conference 2017, pp. 1\u20136 (2017)","DOI":"10.1145\/3061639.3062262"},{"key":"48_CR20","unstructured":"Paszke, A., et al.: PyTorch: an imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"48_CR21","doi-asserted-by":"crossref","unstructured":"Prabhakar, R., Jairath, S.: SambaNova SN10 RDU: accelerating software 2.0 with dataflow. In: 2021 IEEE Hot Chips 33 Symposium (HCS), pp. 1\u201337. IEEE (2021)","DOI":"10.1109\/HCS52781.2021.9567250"},{"key":"48_CR22","doi-asserted-by":"crossref","unstructured":"Prabhakar, R., Jairath, S., Shin, J.L.: Sambanova sn10 RDU: a 7 nm dataflow architecture to accelerate software 2.0. In: 2022 IEEE International Solid-State Circuits Conference (ISSCC), vol. 65, pp. 350\u2013352. IEEE (2022)","DOI":"10.1109\/ISSCC42614.2022.9731612"},{"key":"48_CR23","doi-asserted-by":"crossref","unstructured":"Qiao, B., Reiche, O., Hannig, F., Teich, J.: From loop fusion to kernel fusion: a domain-specific approach to locality optimization. In: 2019 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO), pp. 242\u2013253 (2019)","DOI":"10.1109\/CGO.2019.8661176"},{"key":"48_CR24","doi-asserted-by":"crossref","unstructured":"Ramakrishnan, R., Dral, P.O., Rupp, M., von Lilienfeld, O.A.: Quantum chemistry structures and properties of 134 kilo molecules. Sci. Data 1, 1\u20137 (2014)","DOI":"10.1038\/sdata.2014.22"},{"key":"48_CR25","doi-asserted-by":"crossref","unstructured":"Reuther, A., Michaleas, P., Jones, M., Gadepally, V., Samsi, S., Kepner, J.: Survey and benchmarking of machine learning accelerators. In: 2019 IEEE High Performance Extreme Computing Conference (HPEC), pp. 1\u20139. IEEE (2019)","DOI":"10.1109\/HPEC.2019.8916327"},{"key":"48_CR26","doi-asserted-by":"crossref","unstructured":"Reuther, A., Michaleas, P., Jones, M., Gadepally, V., Samsi, S., Kepner, J.: Survey of machine learning accelerators. In: 2020 IEEE High Performance Extreme Computing Conference (HPEC), pp. 1\u201312. IEEE (2020)","DOI":"10.1109\/HPEC43674.2020.9286149"},{"key":"48_CR27","doi-asserted-by":"crossref","unstructured":"Wang, G., Lin, Y., Yi, W.: Kernel fusion: an effective method for better power efficiency on multithreaded GPU. In: 2010 IEEE\/ACM International Conference on Green Computing and Communications & International Conference on Cyber, Physical and Social Computing, pp. 344\u2013350. IEEE (2010)","DOI":"10.1109\/GreenCom-CPSCom.2010.102"},{"key":"48_CR28","unstructured":"Wang, Y.E., Wei, G.Y., Brooks, D.: Benchmarking TPU, GPU, and CPU platforms for deep learning. arXiv preprint arXiv:1907.10701 (2019)"},{"key":"48_CR29","unstructured":"Wang, Y., Feng, B., Ding, Y.: TC-GNN: accelerating sparse graph neural network computation via dense tensor core on GPUs. arXiv preprint arXiv:2112.02052 (2021)"},{"issue":"1","key":"48_CR30","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1109\/TNNLS.2020.2978386","volume":"32","author":"Z Wu","year":"2020","unstructured":"Wu, Z., Pan, S., Chen, F., Long, G., Zhang, C., Philip, S.Y.: A comprehensive survey on graph neural networks. IEEE Trans. Neural Netw. Learn. Syst. 32(1), 4\u201324 (2020)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"48_CR31","doi-asserted-by":"publisher","first-page":"145301","DOI":"10.1103\/PhysRevLett.120.145301","volume":"120","author":"T Xie","year":"2018","unstructured":"Xie, T., Grossman, J.C.: Crystal graph convolutional neural networks for an accurate and interpretable prediction of material properties. Phys. Rev. Lett. 120, 145301 (2018). https:\/\/doi.org\/10.1103\/PhysRevLett.120.145301","journal-title":"Phys. Rev. Lett."},{"key":"48_CR32","unstructured":"Xu, K., Hu, W., Leskovec, J., Jegelka, S.: How powerful are graph neural networks? arXiv preprint arXiv:1810.00826 (2018)"},{"key":"48_CR33","unstructured":"Yang, C.: Hierarchical roofline analysis: How to collect data using performance tools on intel CPUs and NVIDIA GPUs. arXiv preprint arXiv:2009.02449 (2020)"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-40843-4_48","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,24]],"date-time":"2023-08-24T12:08:52Z","timestamp":1692878932000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-40843-4_48"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031408427","9783031408434"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-40843-4_48","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"25 August 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ISC High Performance","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on High Performance Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hamburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 May 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 May 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"38","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"supercomputing2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.isc-hpc.com\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Linklings","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"70","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"49","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"70% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}