{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T15:44:35Z","timestamp":1772725475993,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":25,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819510207","type":"print"},{"value":"9789819510214","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T00:00:00Z","timestamp":1762214400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T00:00:00Z","timestamp":1762214400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-1021-4_3","type":"book-chapter","created":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T10:28:43Z","timestamp":1762165723000},"page":"34-46","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Segmentation-Aware Optimization of\u00a0Collective for\u00a0Waferscale Chips"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-2221-4706","authenticated-orcid":false,"given":"Qize","family":"Yang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9405-8548","authenticated-orcid":false,"given":"Jiaxin","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3501-3148","authenticated-orcid":false,"given":"Taiquan","family":"Wei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0222-2717","authenticated-orcid":false,"given":"Yuxin","family":"Jin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8438-8588","authenticated-orcid":false,"given":"Shouyi","family":"Yin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6942-4395","authenticated-orcid":false,"given":"Yang","family":"Hu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,11,4]]},"reference":[{"key":"3_CR1","unstructured":"Achiam, J., et\u00a0al.: Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"issue":"2","key":"3_CR2","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1145\/3140659.3080231","volume":"45","author":"A Arunkumar","year":"2017","unstructured":"Arunkumar, A., et al.: MCM-GPU: multi-chip-module gpus for continued performance scalability. ACM SIGARCH Comput. Arch. News 45(2), 320\u2013332 (2017)","journal-title":"ACM SIGARCH Comput. Arch. News"},{"key":"3_CR3","doi-asserted-by":"crossref","unstructured":"Chen, S., Pal, S., Kumar, R.: Waferscale network switches. In: 2024 ACM\/IEEE 51st Annual International Symposium on Computer Architecture (ISCA), pp. 215\u2013229. IEEE (2024)","DOI":"10.1109\/ISCA59077.2024.00025"},{"key":"3_CR4","doi-asserted-by":"crossref","unstructured":"Cho, S., Son, H., Kim, J.: Logical\/physical topology-aware collective communication in deep learning training. In: 2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA), pp. 56\u201368. IEEE (2023)","DOI":"10.1109\/HPCA56546.2023.10071117"},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Chun, S.R., et al.: Info_sow (system-on-wafer) for high performance computing. In: 2020 IEEE 70th Electronic Components and Technology Conference (ECTC), pp.\u00a01\u20136. IEEE (2020)","DOI":"10.1109\/ECTC32862.2020.00013"},{"key":"3_CR6","unstructured":"De\u00a0Sensi, D., Bonato, T., Saam, D., Hoefler, T.: Swing: short-cutting rings for higher bandwidth allreduce. In: 21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24), pp. 1445\u20131462 (2024)"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Deng, J., et\u00a0al.: Efficient orchestrated ai workflows execution on scale-out spatial architecture. IEEE Trans. Circ. Syst. Artif. Intell.(2024)","DOI":"10.1109\/TCASAI.2024.3476237"},{"key":"3_CR8","unstructured":"Devlin, J.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"3_CR9","unstructured":"Fang, J., et al.: Palm: a efficient performance simulator for tiled accelerators with large-scale model training. arXiv preprint arXiv:2406.03868 (2024)"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Fei, J., Ho, C.Y., Sahu, A.N., Canini, M., Sapio, A.: Efficient sparse collective communication and its application to accelerate distributed deep learning. In: Proceedings of the 2021 ACM SIGCOMM 2021 Conference, pp. 676\u2013691 (2021)","DOI":"10.1145\/3452296.3472904"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Feng, Y., Ma, K.: Switch-less dragonfly on wafers: a scalable interconnection architecture based on wafer-scale integration. arXiv preprint arXiv:2407.10290 (2024)","DOI":"10.1109\/SC41406.2024.00102"},{"issue":"1","key":"3_CR12","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1109\/MCAS.2024.3349669","volume":"24","author":"Y Hu","year":"2024","unstructured":"Hu, Y., et al.: Wafer-scale computing: advancements, challenges, and future perspectives [feature]. IEEE Circuits Syst. Mag. 24(1), 52\u201381 (2024)","journal-title":"IEEE Circuits Syst. Mag."},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Hu, Y., Song, M., Li, T.: Towards \u201cfull containerization\" in containerized network function virtualization. In: Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems, pp. 467\u2013481 (2017)","DOI":"10.1145\/3037697.3037713"},{"key":"3_CR14","doi-asserted-by":"crossref","unstructured":"Laskar, S., Majhi, P., Kim, S., Mahmud, F., Muzahid, A., Kim, E.J.: Enhancing collective communication in mcm accelerators for deep learning training. In: 2024 IEEE International Symposium on High-Performance Computer Architecture (HPCA), pp. 1\u201316. IEEE (2024)","DOI":"10.1109\/HPCA57654.2024.00069"},{"issue":"1","key":"3_CR15","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1109\/TPDS.2019.2928289","volume":"31","author":"A Li","year":"2019","unstructured":"Li, A., et al.: Evaluating modern gpu interconnect: Pcie, nvlink, nv-sli, nvswitch and gpudirect. IEEE Trans. Parallel Distrib. Syst. 31(1), 94\u2013110 (2019)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"3_CR16","doi-asserted-by":"crossref","unstructured":"Lim, D., Kim, J.: Tidalmesh: topology-driven allreduce collective communication for mesh topology. In: 2025 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 1526\u20131540. IEEE Computer Society (2025)","DOI":"10.1109\/HPCA61900.2025.00114"},{"key":"3_CR17","doi-asserted-by":"crossref","unstructured":"Qin, Y., et al.: Fact: FFN-attention co-optimized transformer architecture with eager correlation prediction. In: Proceedings of the 50th Annual International Symposium on Computer Architecture, pp. 1\u201314 (2023)","DOI":"10.1145\/3579371.3589057"},{"key":"3_CR18","doi-asserted-by":"crossref","unstructured":"Song, M., Zhao, J., Hu, Y., Zhang, J., Li, T.: Prediction based execution on deep neural networks. In: 2018 ACM\/IEEE 45th Annual International Symposium on Computer Architecture (ISCA), pp. 752\u2013763. IEEE (2018)","DOI":"10.1109\/ISCA.2018.00068"},{"key":"3_CR19","doi-asserted-by":"crossref","unstructured":"Song, M., et al.: In-situ AI: towards autonomous and incremental deep learning for IoT systems. In: 2018 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 92\u2013103. IEEE (2018)","DOI":"10.1109\/HPCA.2018.00018"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Talpes, E., Williams, D., Sarma, D.D.: Dojo: the microarchitecture of tesla\u2019s exa-scale computer. In: 2022 IEEE Hot Chips 34 Symposium (HCS), pp. 1\u201328. IEEE Computer Society (2022)","DOI":"10.1109\/HCS55958.2022.9895534"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Ueno, Y., Yokota, R.: Exhaustive study of hierarchical allreduce patterns for large messages between gpus. In: 2019 19th IEEE\/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID), pp. 430\u2013439. IEEE (2019)","DOI":"10.1109\/CCGRID.2019.00057"},{"key":"3_CR22","doi-asserted-by":"crossref","unstructured":"Wang, H., et\u00a0al.: Tmac: training-targeted mapping and architecture co-exploration for wafer-scale chips. Integrat. Circ. Syst. (2024)","DOI":"10.23919\/ICS.2024.3515003"},{"key":"3_CR23","doi-asserted-by":"crossref","unstructured":"Won, W., et al.: Tacos: topology-aware collective algorithm synthesizer for distributed machine learning. arXiv preprint arXiv:2304.05301 (2023)","DOI":"10.1109\/MICRO61859.2024.00068"},{"key":"3_CR24","doi-asserted-by":"crossref","unstructured":"Yinxiao, F., Kaisheng, M.: Chiplet actuary: a quantitative cost model and multi-chiplet architecture exploration. In: The 59th ACM\/IEEE Design Automation Conference, San Francisco, USA, pp. 121\u2013126 (2022)","DOI":"10.1145\/3489517.3530428"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"Zhu, J., Xue, C., Chen, Y., Wang, Z., Sun, G.: Theseus: exploring efficient wafer-scale chip design for large language models. arXiv preprint arXiv:2407.02079 (2024)","DOI":"10.1109\/TCAD.2025.3566297"}],"container-title":["Lecture Notes in Computer Science","Advanced Parallel Processing Technologies"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-1021-4_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,3]],"date-time":"2025-11-03T10:28:59Z","timestamp":1762165739000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-1021-4_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,4]]},"ISBN":["9789819510207","9789819510214"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-1021-4_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,4]]},"assertion":[{"value":"4 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"APPT","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Advanced Parallel Processing Technologies","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"appt2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.appt-conference.com\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}