{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T10:44:49Z","timestamp":1743072289374,"version":"3.40.3"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030787127"},{"type":"electronic","value":"9783030787134"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-78713-4_10","type":"book-chapter","created":{"date-parts":[[2021,6,16]],"date-time":"2021-06-16T23:06:15Z","timestamp":1623884775000},"page":"176-194","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["HTA: A Scalable High-Throughput Accelerator for Irregular HPC Workloads"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5891-4003","authenticated-orcid":false,"given":"Pouya","family":"Fotouhi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1896-1489","authenticated-orcid":false,"given":"Marjan","family":"Fariborz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6378-7005","authenticated-orcid":false,"given":"Roberto","family":"Proietti","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8880-8703","authenticated-orcid":false,"given":"Jason","family":"Lowe-Power","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3014-5326","authenticated-orcid":false,"given":"Venkatesh","family":"Akella","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7420-1871","authenticated-orcid":false,"given":"S. J. Ben","family":"Yoo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,6,17]]},"reference":[{"key":"10_CR1","unstructured":"AMD: Introducing RDNA architecture (2019). https:\/\/www.amd.com\/system\/files\/documents\/rdna-whitepaper.pdf. Accessed 10 Dec 2020"},{"key":"10_CR2","unstructured":"AMD: Introducing AMD CDNA architecture (2020). https:\/\/www.amd.com\/system\/files\/documents\/amd-cdna-whitepaper.pdf. Accessed 12 Dec 2020"},{"issue":"2","key":"10_CR3","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1145\/3140659.3080231","volume":"45","author":"A Arunkumar","year":"2017","unstructured":"Arunkumar, A., et al.: MCM-GPU: multi-chip-module GPUs for continued performance scalability. ACM SIGARCH Comput. Archit. News 45(2), 320\u2013332 (2017)","journal-title":"ACM SIGARCH Comput. Archit. News"},{"key":"10_CR4","doi-asserted-by":"crossref","unstructured":"Arunkumar, A., et al.: Understanding the future of energy efficiency in multi-module GPUs. In: 2019 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 519\u2013532. IEEE (2019)","DOI":"10.1109\/HPCA.2019.00063"},{"key":"10_CR5","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-9335-9","volume-title":"Photonic Network-on-Chip Design","author":"K Bergman","year":"2014","unstructured":"Bergman, K., et al.: Photonic Network-on-Chip Design. Springer, New York (2014). https:\/\/doi.org\/10.1007\/978-1-4419-9335-9"},{"key":"10_CR6","doi-asserted-by":"crossref","unstructured":"Bojnordi, M.N., Ipek, E.: PARDIS: a programmable memory controller for the DDRx interfacing standards. In: 2012 39th Annual International Symposium on Computer Architecture (ISCA), pp. 13\u201324 (2012)","DOI":"10.1109\/ISCA.2012.6237002"},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Chatterjee, N., et al.: Managing DRAM latency divergence in irregular GPGPU applications. In: SC 2014: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 128\u2013139. IEEE (2014)","DOI":"10.1109\/SC.2014.16"},{"issue":"4","key":"10_CR8","doi-asserted-by":"publisher","first-page":"310","DOI":"10.1109\/JSTQE.2013.2295879","volume":"20","author":"S Cheung","year":"2013","unstructured":"Cheung, S., et al.: Ultra-compact silicon photonic 512 $$\\times $$ 512 25 GHZ arrayed waveguide grating router. IEEE J. Sel. Top. Quantum Electron. 20(4), 310\u2013316 (2013)","journal-title":"IEEE J. Sel. Top. Quantum Electron."},{"key":"10_CR9","unstructured":"Cutress, I.: Intel launches stratix-10-TX leveraging EMIB with 58G transceivers. https:\/\/www.anandtech.com\/show\/12477\/intel-launches-stratix-10-tx-leveraging-emib-with-58g-transceivers-. Accessed 28 Nov 2020"},{"key":"10_CR10","doi-asserted-by":"crossref","unstructured":"Danalis, A., et al.: The scalable heterogeneous computing (SHOC) benchmark suite. In: Proceedings of the 3rd Workshop on General-Purpose Computation on Graphics Processing Units, pp. 63\u201374 (2010)","DOI":"10.1145\/1735688.1735702"},{"issue":"4","key":"10_CR11","doi-asserted-by":"publisher","first-page":"4736","DOI":"10.1364\/OE.23.004736","volume":"23","author":"R Dangel","year":"2015","unstructured":"Dangel, R., et al.: Polymer waveguides for electro-optical integration in data centers and high-performance computers. Opt. Express 23(4), 4736\u20134750 (2015)","journal-title":"Opt. Express"},{"issue":"4","key":"10_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/JSTQE.2018.2812603","volume":"24","author":"R Dangel","year":"2018","unstructured":"Dangel, R., et al.: Polymer waveguides enabling scalable low-loss adiabatic optical coupling for silicon photonics. IEEE J. Sel. Top. Quantum Electron. 24(4), 1\u201311 (2018)","journal-title":"IEEE J. Sel. Top. Quantum Electron."},{"key":"10_CR13","unstructured":"Das, S.: It\u2019s time for disaggregated silicon! (2018). https:\/\/www.netronome.com\/blog\/its-time-disaggregated-silicon\/. Accessed 28 Nov 2020"},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Fotouhi, P., et al.: Enabling scalable chiplet-based uniform memory architectures with silicon photonics. In: Proceedings of the International Symposium on Memory Systems, pp. 222\u2013334 (2019)","DOI":"10.1145\/3357526.3357564"},{"issue":"7","key":"10_CR15","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1364\/JOCN.11.000333","volume":"11","author":"P Fotouhi","year":"2019","unstructured":"Fotouhi, P., et al.: Enabling scalable disintegrated computing systems with AWGR-based 2.5 D interconnection networks. IEEE\/OSA J. Opt. Commun. Netw. 11(7), 333\u2013346 (2019)","journal-title":"IEEE\/OSA J. Opt. Commun. Netw."},{"key":"10_CR16","doi-asserted-by":"crossref","unstructured":"Grani, P., et al.: Bit-parallel all-to-all and flexible AWGR-based optical interconnects. In: Optical Fiber Communication Conference, pp. M3K-4. Optical Society of America (2017)","DOI":"10.1364\/OFC.2017.M3K.4"},{"key":"10_CR17","doi-asserted-by":"crossref","unstructured":"Grani, P., et al.: Design and evaluation of AWGR-based photonic NoC architectures for 2.5 D integrated high performance computing systems. In: 2017 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 289\u2013300. IEEE (2017)","DOI":"10.1109\/HPCA.2017.17"},{"key":"10_CR18","doi-asserted-by":"crossref","unstructured":"Hashemi, M., et al.: Accelerating dependent cache misses with an enhanced memory controller. In: 2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA), pp. 444\u2013455 (2016)","DOI":"10.1109\/ISCA.2016.46"},{"key":"10_CR19","doi-asserted-by":"crossref","unstructured":"Hussain, T., et al.: Advanced pattern based memory controller for FPGA based HPC applications. In: 2014 International Conference on High Performance Computing Simulation (HPCS), pp. 287\u2013294 (2014)","DOI":"10.1109\/HPCSim.2014.6903697"},{"key":"10_CR20","unstructured":"Jeppix: Cost roadmap. https:\/\/www.jeppix.eu\/wp-content\/uploads\/2020\/04\/JePPIXRoadmap2012.pdf. Accessed 28 Nov 2020"},{"key":"10_CR21","unstructured":"Jia, Z., et al.: Dissecting the NVIDIA volta GPU architecture via microbenchmarking. arXiv preprint arXiv:1804.06826 (2018)"},{"issue":"2","key":"10_CR22","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1109\/LCA.2020.2973991","volume":"19","author":"S Li","year":"2020","unstructured":"Li, S., et al.: DRAMsim3: a cycle-accurate, thermal-capable DRAM simulator. IEEE Comput. Archit. Lett. 19(2), 106\u2013109 (2020)","journal-title":"IEEE Comput. Archit. Lett."},{"key":"10_CR23","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Get out of the valley: power-efficient address mapping for GPUs. In: 2018 ACM\/IEEE 45th Annual International Symposium on Computer Architecture (ISCA), pp. 166\u2013179. IEEE (2018)","DOI":"10.1109\/ISCA.2018.00024"},{"key":"10_CR24","doi-asserted-by":"crossref","unstructured":"Milic, U., et al.: Beyond the socket: NUMA-aware GPUs. In: Proceedings of the 50th Annual IEEE\/ACM International Symposium on Microarchitecture, pp. 123\u2013135 (2017)","DOI":"10.1145\/3123939.3124534"},{"issue":"7","key":"10_CR25","doi-asserted-by":"publisher","first-page":"1166","DOI":"10.1109\/JPROC.2009.2014298","volume":"97","author":"DA Miller","year":"2009","unstructured":"Miller, D.A.: Device requirements for optical interconnects to silicon chips. Proc. IEEE 97(7), 1166\u20131185 (2009)","journal-title":"Proc. IEEE"},{"key":"10_CR26","unstructured":"NVIDIA: A100 tensor core GPU architecture. https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/Data-Center\/nvidia-ampere-architecture-whitepaper.pdf. Accessed 31 Nov 2020"},{"key":"10_CR27","doi-asserted-by":"crossref","unstructured":"Oh, B., et al.: A load balancing technique for memory channels. In: Proceedings of the International Symposium on Memory Systems, pp. 55\u201366 (2018)","DOI":"10.1145\/3240302.3240306"},{"key":"10_CR28","doi-asserted-by":"crossref","unstructured":"Pal, S., et al.: Architecting waferscale processors - a GPU case study. In: 2019 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 250\u2013263 (2019)","DOI":"10.1109\/HPCA.2019.00042"},{"issue":"7","key":"10_CR29","doi-asserted-by":"publisher","first-page":"B49","DOI":"10.1364\/JOCN.10.000B49","volume":"10","author":"R Proietti","year":"2018","unstructured":"Proietti, R., et al.: Experimental demonstration of a 64-port wavelength routing thin-CLOS system for data center switching architectures. J. Opt. Commun. Netw. 10(7), B49\u2013B57 (2018)","journal-title":"J. Opt. Commun. Netw."},{"issue":"2","key":"10_CR30","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1145\/342001.339668","volume":"28","author":"S Rixner","year":"2000","unstructured":"Rixner, S., et al.: Memory access scheduling. ACM SIGARCH Comput. Archit. News 28(2), 128\u2013138 (2000)","journal-title":"ACM SIGARCH Comput. Archit. News"},{"issue":"5","key":"10_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/JPHOT.2017.2751003","volume":"9","author":"K Shang","year":"2017","unstructured":"Shang, K., et al.: Low-loss compact silicon nitride arrayed waveguide gratings for photonic integrated circuits. IEEE Photonics J. 9(5), 1\u20135 (2017)","journal-title":"IEEE Photonics J."},{"issue":"10","key":"10_CR32","doi-asserted-by":"publisher","first-page":"12801","DOI":"10.1364\/OE.26.012801","volume":"26","author":"T Su","year":"2018","unstructured":"Su, T., et al.: Interferometric imaging using Si$$_3$$N$$_4$$ photonic integrated circuits for a SPIDER imager. Opt. Express 26(10), 12801\u201312812 (2018)","journal-title":"Opt. Express"},{"key":"10_CR33","doi-asserted-by":"crossref","unstructured":"Sun, Y., et al.: Hetero-mark, a benchmark suite for CPU-GPU collaborative computing. In: 2016 IEEE International Symposium on Workload Characterization (IISWC), pp. 1\u201310. IEEE (2016)","DOI":"10.1109\/IISWC.2016.7581262"},{"key":"10_CR34","doi-asserted-by":"crossref","unstructured":"Sun, Y., et al.: MGPUsim: enabling multi-GPU performance modeling and optimization. In: Proceedings of the 46th International Symposium on Computer Architecture, pp. 197\u2013209 (2019)","DOI":"10.1145\/3307650.3322230"},{"key":"10_CR35","doi-asserted-by":"crossref","unstructured":"Tian, Y., et al.: Adaptive GPU cache bypassing. In: Proceedings of the 8th Workshop on General Purpose Processing Using GPUS, pp. 25\u201335 (2015)","DOI":"10.1145\/2716282.2716283"},{"key":"10_CR36","unstructured":"TSMC: Enhancing the CoWoS platform (2020). https:\/\/pr.tsmc.com\/english\/news\/2026. Accessed 14 Dec 2020"},{"key":"10_CR37","doi-asserted-by":"crossref","unstructured":"Vijayaraghavan, T., et al.: Design and analysis of an APU for exascale computing. In: 2017 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 85\u201396 (2017)","DOI":"10.1109\/HPCA.2017.42"},{"issue":"2","key":"10_CR38","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1109\/MM.2020.2976067","volume":"40","author":"M Wade","year":"2020","unstructured":"Wade, M., et al.: TeraPHY: a chiplet technology for low-power, high-bandwidth in-package optical I\/O. IEEE Micro 40(2), 63\u201371 (2020)","journal-title":"IEEE Micro"},{"issue":"19","key":"10_CR39","doi-asserted-by":"publisher","first-page":"1267","DOI":"10.1016\/j.scib.2018.05.038","volume":"63","author":"J Wang","year":"2018","unstructured":"Wang, J., Long, Y.: On-chip silicon photonic signaling and processing: a review. Sci. Bull. 63(19), 1267\u20131310 (2018)","journal-title":"Sci. Bull."},{"key":"10_CR40","doi-asserted-by":"crossref","unstructured":"Werner, S., et al.: Towards energy-efficient high-throughput photonic NoCs for 2.5 D integrated systems: a case for AWGRs. In: 2018 Twelfth IEEE\/ACM International Symposium on Networks-on-Chip (NOCS), pp. 1\u20138. IEEE (2018)","DOI":"10.1109\/NOCS.2018.8512157"},{"issue":"5","key":"10_CR41","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/JSTQE.2019.2910415","volume":"25","author":"Y Zhang","year":"2019","unstructured":"Zhang, Y., et al.: Foundry-enabled scalable all-to-all optical interconnects using silicon nitride arrayed waveguide router interposers and silicon photonic transceivers. IEEE J. Sel. Top. Quantum Electron. 25(5), 1\u20139 (2019)","journal-title":"IEEE J. Sel. Top. Quantum Electron."}],"container-title":["Lecture Notes in Computer Science","High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-78713-4_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,29]],"date-time":"2023-03-29T07:06:14Z","timestamp":1680073574000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-78713-4_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030787127","9783030787134"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-78713-4_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"17 June 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ISC High Performance","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on High Performance Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 June 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 July 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"36","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"supercomputing2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.isc-hpc.com\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Linklings","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"74","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"32% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.28","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.13","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"In the ISC High Performance Workshop, there were 49 submissions, out of which 35  were accepted.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}