{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:39:50Z","timestamp":1766219990273,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,8]]},"DOI":"10.1145\/3754598.3754642","type":"proceedings-article","created":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:34:32Z","timestamp":1766219672000},"page":"741-752","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Accelerating an Electromagnetic Simulation via Memory-Constrained Task-Based Load Balancing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-6653-2314","authenticated-orcid":false,"given":"Jonathan","family":"Lifflander","sequence":"first","affiliation":[{"name":"Sandia National Laboratories, Livermore, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0663-5547","authenticated-orcid":false,"given":"Nicole","family":"Slattengren","sequence":"additional","affiliation":[{"name":"Sandia National Laboratories, Livermove, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2311-3775","authenticated-orcid":false,"given":"Philippe","family":"Pebay","sequence":"additional","affiliation":[{"name":"NexGen Analytics, Sheridan, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2033-4911","authenticated-orcid":false,"given":"Pierre","family":"Pebay","sequence":"additional","affiliation":[{"name":"NexGen Analytics, Sheridan, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8395-9346","authenticated-orcid":false,"given":"Caleb","family":"Schilly","sequence":"additional","affiliation":[{"name":"NexGen Analytics, Sheridan, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6075-0394","authenticated-orcid":false,"given":"Robert","family":"Pfeiffer","sequence":"additional","affiliation":[{"name":"Sandia National Laboratories, Albuquerque, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3257-4741","authenticated-orcid":false,"given":"Joseph","family":"Kotulski","sequence":"additional","affiliation":[{"name":"Sandia National Laboratories, Albuquerque, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,12,20]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"[n. d.]. NNSA Warhead Modernization. https:\/\/www.energy.gov\/nnsa\/articles\/warhead-activities-fact-sheet Accessed: 2024-03-20."},{"key":"e_1_3_3_2_3_2","volume-title":"TOP500 LIST","year":"2022","unstructured":"2022. TOP500 LIST. https:\/\/www.top500.org\/lists\/top500\/2022\/06\/"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"crossref","unstructured":"Umut\u00a0A. Acar Guy\u00a0E. Blelloch and Robert\u00a0D. Blumofe. 2000. The data locality of work stealing(SPAA \u201900). 1\u201312.","DOI":"10.1145\/341800.341801"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"crossref","unstructured":"Alok Aggarwal Ashok\u00a0K Chandra and Marc Snir. 1990. Communication complexity of PRAMs. Theoretical Computer Science 71 1 (1990) 3\u201328.","DOI":"10.1016\/0304-3975(90)90188-N"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"crossref","unstructured":"C\u00e9dric Augonnet Samuel Thibault Raymond Namyst and Pierre-Andr\u00e9 Wacrenier. 2010. StarPU: A Unified Platform for Task Scheduling on Heterogeneous Multicore Architectures. Concurrency and Computation: Practice and Experience Euro-Par 2009 best papers issue (2010). Accepted for publication to appear.","DOI":"10.1002\/cpe.1631"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2012.71"},{"key":"e_1_3_3_2_8_2","unstructured":"Samuel\u00a0Adam Blake William\u00a0L. Langston Joseph\u00a0D. Kotulski Vinh\u00a0Quang Dang Salvatore Campione Robert\u00a0Anthony Pfeiffer and Brian\u00a0Frederick Zinser. 2020. Exascale Method of Moments for Linear Electromagnetics with Gemma. Sandia National Lab. (SNL-NM) Albuquerque NM (United States). https:\/\/www.osti.gov\/biblio\/1761284"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/209936.209958"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/2503210.2503289"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"crossref","unstructured":"George Bosilca Aurelien Bouteiller Anthony Danalis Mathieu Faverge Thomas H\u00e9rault and Jack\u00a0J Dongarra. 2013. Parsec: Exploiting heterogeneity to enhance scalability. Computing in Science & Engineering 15 6 (2013) 36\u201345.","DOI":"10.1109\/MCSE.2013.98"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2007.370258"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"crossref","unstructured":"\u00dcmit\u00a0V. \u00c7ataly\u00fcrek and Cevdet Aykanat. 1999. Hypergraph-Partitioning-Based Decomposition for Parallel Sparse-Matrix Vector Multiplication. IEEE Trans. Parallel Distrib. Syst. 10 7 (1999) 673\u2013693.","DOI":"10.1109\/71.780863"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"\u00dcmit\u00a0V. \u00c7ataly\u00fcrek Erik\u00a0G. Boman Karen\u00a0D. Devine Doruk Bozdag Robert\u00a0T. Heaphy and Lee\u00a0Ann Riesen. 2009. A repartitioning hypergraph model for dynamic load balancing. J. Parallel Distrib. Comput. 69 8 (2009) 711\u2013724.","DOI":"10.1016\/j.jpdc.2009.04.011"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","unstructured":"Thomas\u00a0M. Conte Elie Track and Erik DeBenedictis. 2015. Rebooting Computing: New Strategies for Technology Scaling. Computer 48 12 (2015) 10\u201313. 10.1109\/MC.2015.363","DOI":"10.1109\/MC.2015.363"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/155332.155333"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"crossref","unstructured":"George Cybenko. 1989. Dynamic load balancing for distributed memory multiprocessors. Journal of parallel and distributed computing 7 2 (1989) 279\u2013301.","DOI":"10.1016\/0743-7315(89)90021-X"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"crossref","unstructured":"Alain Darte John\u00a0M. Mellor-Crummey Robert\u00a0J. Fowler and Daniel\u00a0G. Chavarr\u00eda-Miranda. 2003. Generalized multipartitioning of multi-dimensional arrays for parallelizing line-sweep computations. J. Parallel Distrib. Comput. 63 9 (2003) 887\u2013911.","DOI":"10.1016\/S0743-7315(03)00103-5"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/SPLC.1993.365583"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/41840.41841"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/335231.335242"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"James Dinan D.\u00a0Brian Larkins P. Sadayappan Sriram Krishnamoorthy and Jarek Nieplocha. 2009. Scalable work stealing(SC \u201909). Article 53 11\u00a0pages.","DOI":"10.1145\/1654059.1654113"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"crossref","unstructured":"Maxime Gonthier. 2023. Scheduling Under Memory Constraint in Task-based Runtime Systems. Ph.\u00a0D. Dissertation. Ecole normale sup\u00e9rieure de lyon-ENS LYON.","DOI":"10.1016\/j.future.2023.01.024"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"crossref","unstructured":"Roger\u00a0F. Harrington. 1989. Boundary Integral Formulations for Homogeneous Material Bodies. Journal of Electromagnetic Waves and Applications 3 1 (1989) 1\u201315.","DOI":"10.1163\/156939389X00016"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","unstructured":"Bruce Hendrickson and Robert Leland. 1995. An improved spectral graph partitioning algorithm for mapping parallel computations. SIAM J. Sci. Comput. 16 (March 1995) 452\u2013469. Issue 2. 10.1137\/0916028","DOI":"10.1137\/0916028"},{"key":"e_1_3_3_2_26_2","volume-title":"frugally-deep","author":"Hermann Tobias","unstructured":"Tobias Hermann. [n. d.]. frugally-deep. https:\/\/github.com\/Dobiasd\/frugally-deep"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","unstructured":"Kurt Hornik Maxwell Stinchcombe and Halbert White. 1989. Multilayer feedforward networks are universal approximators. Neural Networks 2 5 (1989) 359\u2013366. 10.1016\/0893-6080(89)90020-8","DOI":"10.1016\/0893-6080(89)90020-8"},{"key":"e_1_3_3_2_28_2","first-page":"448","volume-title":"International Conference on Machine Learning","volume":"37","author":"Ioffe Sergey","year":"2015","unstructured":"Sergey Ioffe and Christian Szegedy. 2015. Batch normalization: accelerating deep network training by reducing internal covariate shift. In International Conference on Machine Learning, Vol.\u00a037. 448\u2013456."},{"key":"e_1_3_3_2_29_2","first-page":"225","volume-title":"International Conference on Parallel Processing and Applied Mathematics","author":"John Joseph","year":"2022","unstructured":"Joseph John, Josh Milthorpe, and Peter Strazdins. 2022. Distributed Work Stealing in a Task-Based Dataflow Runtime. In International Conference on Parallel Processing and Applied Mathematics. Springer, 225\u2013236."},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"crossref","unstructured":"G. Karypis K. Schloegel and V. Kumar. 1997. Parmetis: Parallel graph partitioning and sparse matrix ordering library. Version 1.0 Dept. of Computer Science University of Minnesota (1997).","DOI":"10.1006\/jpdc.1997.1403"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607096"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/2287076.2287103"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/ExaMPI52011.2020.00009"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/Cluster48925.2021.00039"},{"key":"e_1_3_3_2_35_2","volume-title":"International Conference on Learning Representations","author":"Loshchilov Ilya","year":"2019","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled Weight Decay Regularization. In International Conference on Learning Representations."},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/2503210.2503284"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","unstructured":"S. Rao D. Wilton and A. Glisson. 1982. Electromagnetic scattering by surfaces of arbitrary shape. IEEE Transactions on Antennas and Propagation 30 3 (May 1982) 409\u2013418. 10.1109\/TAP.1982.1142818","DOI":"10.1109\/TAP.1982.1142818"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/2597652.2597673"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607079"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/APS.1997.625445"},{"key":"e_1_3_3_2_41_2","unstructured":"Nitish Srivastava Geoffrey Hinton Krizhevsky Alex Ilya Sutskever and Ruslan Salakhutdinov. 2014. Dropout: A Simple Way to Prevent Neural Networks from Overfitting. Journal of Machine Learning Research 15 (2014) 1929\u20131958."},{"key":"e_1_3_3_2_42_2","unstructured":"Amin Vahdat David Becker et\u00a0al. 2000. Epidemic routing for partially connected ad hoc networks."},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","unstructured":"Roy\u00a0D. Williams. 1991. Performance of dynamic load balancing algorithms for unstructured mesh calculations. Concurrency: Pract. Exper. 3 (October 1991) 457\u2013481. Issue 5. 10.1002\/cpe.4330030502","DOI":"10.1002\/cpe.4330030502"},{"key":"e_1_3_3_2_44_2","unstructured":"Bing Xu Naiyan Wang Tianqi Chen and Mu Li. 2015. Empirical evaluation of rectified activations in convolutional network. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1505.00853 (2015)."},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-13374-9_12"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"crossref","unstructured":"Yunquan Zhang Guoliang Chen Guangzhong Sun and Qiankun Miao. 2007. Models of parallel computation: a survey and classification. Frontiers of Computer Science in China 1 (2007) 156\u2013165.","DOI":"10.1007\/s11704-007-0016-1"},{"key":"e_1_3_3_2_47_2","unstructured":"Gengbin Zheng Abhinav Bhatele Esteban Meneses and Laxmikant\u00a0V. Kale. 2010. Periodic Hierarchical Load Balancing for Large Supercomputers. International Journal of High Performance Computing Applications (IJHPCA) (2010)."}],"event":{"name":"ICPP '25: 54th International Conference on Parallel Processing","location":"San Diego CA USA","acronym":"ICPP '25"},"container-title":["Proceedings of the 54th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3754598.3754642","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:37:04Z","timestamp":1766219824000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3754598.3754642"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,8]]},"references-count":46,"alternative-id":["10.1145\/3754598.3754642","10.1145\/3754598"],"URL":"https:\/\/doi.org\/10.1145\/3754598.3754642","relation":{},"subject":[],"published":{"date-parts":[[2025,9,8]]},"assertion":[{"value":"2025-12-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}