{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T07:24:38Z","timestamp":1768029878953,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,8,9]],"date-time":"2021-08-09T00:00:00Z","timestamp":1628467200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1942892"],"award-info":[{"award-number":["1942892"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,8,9]]},"DOI":"10.1145\/3472456.3472472","type":"proceedings-article","created":{"date-parts":[[2021,10,5]],"date-time":"2021-10-05T18:46:04Z","timestamp":1633459564000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Parallel Tucker Decomposition with Numerically Accurate SVD"],"prefix":"10.1145","author":[{"given":"Zitong","family":"Li","sequence":"first","affiliation":[{"name":"Wake Forest University, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiming","family":"Fang","sequence":"additional","affiliation":[{"name":"Wake Forest University, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Grey","family":"Ballard","sequence":"additional","affiliation":[{"name":"Wake Forest University, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,10,5]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3058103"},{"key":"e_1_3_2_1_2_1","unstructured":"H. Al\u00a0Daas G. Ballard and P. Benner. 2020. Parallel Algorithms for Tensor Train Arithmetic. Technical Report. arXiv. https:\/\/arxiv.org\/abs\/2011.06532  H. Al\u00a0Daas G. Ballard and P. Benner. 2020. Parallel Algorithms for Tensor Train Arithmetic. Technical Report. arXiv. https:\/\/arxiv.org\/abs\/2011.06532"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"M. Anderson G. Ballard J. Demmel and K. Keutzer. 2011. Communication-Avoiding QR Decomposition for GPUs. In IPDPS. IEEE Computer Society 48\u201358. https:\/\/doi.org\/10.1109\/IPDPS.2011.15  M. Anderson G. Ballard J. Demmel and K. Keutzer. 2011. Communication-Avoiding QR Decomposition for GPUs. In IPDPS. IEEE Computer Society 48\u201358. https:\/\/doi.org\/10.1109\/IPDPS.2011.15","DOI":"10.1109\/IPDPS.2011.15"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"W. Austin G. Ballard and T.\u00a0G. Kolda. 2016. Parallel Tensor Compression for Large-Scale Scientific Data. In IPDPS. 912\u2013922. https:\/\/doi.org\/10.1109\/IPDPS.2016.67  W. Austin G. Ballard and T.\u00a0G. Kolda. 2016. Parallel Tensor Compression for Large-Scale Scientific Data. In IPDPS. 912\u2013922. https:\/\/doi.org\/10.1109\/IPDPS.2016.67","DOI":"10.1109\/IPDPS.2016.67"},{"key":"e_1_3_2_1_5_1","volume-title":"Reconstructing Householder Vectors from Tall-Skinny QR. J. Parallel and Distrib. Comput. 85 (August","author":"Ballard G.","year":"2015","unstructured":"G. Ballard , J. Demmel , L. Grigori , N. Knight , M. Jacquelin , and H.\u00a0 D. Nguyen . 2015. Reconstructing Householder Vectors from Tall-Skinny QR. J. Parallel and Distrib. Comput. 85 (August 2015 ), 3\u201331. https:\/\/doi.org\/10.1016\/j.jpdc.2015.06.003 G. Ballard, J. Demmel, L. Grigori, N. Knight, M. Jacquelin, and H.\u00a0D. Nguyen. 2015. Reconstructing Householder Vectors from Tall-Skinny QR. J. Parallel and Distrib. Comput. 85 (August 2015), 3\u201331. https:\/\/doi.org\/10.1016\/j.jpdc.2015.06.003"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3378445"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00371-015-1130-y"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.combustflame.2013.12.027"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"V.\u00a0T. Chakaravarthy J.\u00a0W. Choi D.\u00a0J. Joseph X. Liu P. Murali Y. Sabharwal and D. Sreedhar. 2017. On Optimizing Distributed Tucker Decomposition for Dense Tensors. In IPDPS. 1038\u20131047. https:\/\/doi.org\/10.1109\/IPDPS.2017.86  V.\u00a0T. Chakaravarthy J.\u00a0W. Choi D.\u00a0J. Joseph X. Liu P. Murali Y. Sabharwal and D. Sreedhar. 2017. On Optimizing Distributed Tucker Decomposition for Dense Tensors. In IPDPS. 1038\u20131047. https:\/\/doi.org\/10.1109\/IPDPS.2017.86","DOI":"10.1109\/IPDPS.2017.86"},{"key":"e_1_3_2_1_10_1","unstructured":"J. Choi X. Liu and V. Chakaravarthy. 2018. High-performance Dense Tucker Decomposition on GPU Clusters. In SC. IEEE Press Article 42 11\u00a0pages. https:\/\/dl.acm.org\/doi\/10.1109\/SC.2018.00045  J. Choi X. Liu and V. Chakaravarthy. 2018. High-performance Dense Tucker Decomposition on GPU Clusters. In SC. IEEE Press Article 42 11\u00a0pages. https:\/\/dl.acm.org\/doi\/10.1109\/SC.2018.00045"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"P.\u00a0G. Constantine and D.\u00a0F. Gleich. 2011. Tall and Skinny QR Factorizations in MapReduce Architectures. In MapReduce. ACM 43\u201350. https:\/\/doi.org\/10.1145\/1996092.1996103  P.\u00a0G. Constantine and D.\u00a0F. Gleich. 2011. Tall and Skinny QR Factorizations in MapReduce Architectures. In MapReduce. ACM 43\u201350. https:\/\/doi.org\/10.1145\/1996092.1996103","DOI":"10.1145\/1996092.1996103"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"J. Demmel. 1997. Applied Numerical Linear Algebra. SIAM.  J. Demmel. 1997. Applied Numerical Linear Algebra. SIAM.","DOI":"10.1137\/1.9781611971446"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1137\/080731992"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1137\/18M1163658"},{"key":"e_1_3_2_1_15_1","volume-title":"Matrix Computations","author":"Golub G.H.","unstructured":"G.H. Golub and C.F. Van\u00a0Loan . 2013. Matrix Computations . JHU Press . G.H. Golub and C.F. Van\u00a0Loan. 2013. Matrix Computations. JHU Press."},{"key":"e_1_3_2_1_16_1","volume-title":"Extended Abstract: Shared-memory Parallelization of MTTKRP for Dense Tensors. In PPoPP. ACM, 393\u2013394","author":"Hayashi K.","year":"2018","unstructured":"K. Hayashi , G. Ballard , Y. Jiang , and M.\u00a0 J. Tobia . 2018 . Extended Abstract: Shared-memory Parallelization of MTTKRP for Dense Tensors. In PPoPP. ACM, 393\u2013394 . http:\/\/doi.acm.org\/10.1145\/3178487.3178522 K. Hayashi, G. Ballard, Y. Jiang, and M.\u00a0J. Tobia. 2018. Extended Abstract: Shared-memory Parallelization of MTTKRP for Dense Tensors. In PPoPP. ACM, 393\u2013394. http:\/\/doi.acm.org\/10.1145\/3178487.3178522"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1137\/07070111X"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1080\/00102202.2016.1197211"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479896305696"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"J. Li C. Battaglino I. Perros J. Sun and R. Vuduc. 2015. An Input-Adaptive and In-Place Approach to Dense Tensor-Times-Matrix Multiply. In SC. ACM Article 76 12\u00a0pages. https:\/\/doi.org\/10.1145\/2807591.2807671  J. Li C. Battaglino I. Perros J. Sun and R. Vuduc. 2015. An Input-Adaptive and In-Place Approach to Dense Tensor-Times-Matrix Multiply. In SC. ACM Article 76 12\u00a0pages. https:\/\/doi.org\/10.1145\/2807591.2807671","DOI":"10.1145\/2807591.2807671"},{"key":"e_1_3_2_1_21_1","unstructured":"O.\u00a0A. Malik and S. Becker. 2018. Low-Rank Tucker Decomposition of Large Tensors Using TensorSketch. In NeurIPS Vol.\u00a031. https:\/\/dl.acm.org\/doi\/10.5555\/3327546.3327674  O.\u00a0A. Malik and S. Becker. 2018. Low-Rank Tucker Decomposition of Large Tensors Using TensorSketch. In NeurIPS Vol.\u00a031. https:\/\/dl.acm.org\/doi\/10.5555\/3327546.3327674"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1137\/19M1261043"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"M. Mohiyuddin M. Hoemmen J. Demmel and K. Yelick. 2009. Minimizing communication in sparse matrix solvers. In SC. Article 36 12\u00a0pages. https:\/\/doi.org\/10.1145\/1654059.1654096  M. Mohiyuddin M. Hoemmen J. Demmel and K. Yelick. 2009. Minimizing communication in sparse matrix solvers. In SC. Article 36 12\u00a0pages. https:\/\/doi.org\/10.1145\/1654059.1654096","DOI":"10.1145\/1654059.1654096"},{"key":"e_1_3_2_1_24_1","first-page":"19","article-title":"Fast Alternating LS Algorithms for High Order CANDECOMP\/PARAFAC Tensor Factorizations","volume":"61","author":"Phan A.-H.","year":"2013","unstructured":"A.-H. Phan , P. Tichavsky , and A. Cichocki . 2013 . Fast Alternating LS Algorithms for High Order CANDECOMP\/PARAFAC Tensor Factorizations . IEEE TSP 61 , 19 (Oct 2013), 4834\u20134846. https:\/\/doi.org\/10.1109\/TSP.2013.2269903 A.-H. Phan, P. Tichavsky, and A. Cichocki. 2013. Fast Alternating LS Algorithms for High Order CANDECOMP\/PARAFAC Tensor Factorizations. IEEE TSP 61, 19 (Oct 2013), 4834\u20134846. https:\/\/doi.org\/10.1109\/TSP.2013.2269903","journal-title":"IEEE TSP"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1137\/19M1257718"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"L.\u00a0N. Trefethen and D. Bau. 1997. Numerical Linear Algebra. SIAM.  L.\u00a0N. Trefethen and D. Bau. 1997. Numerical Linear Algebra. SIAM.","DOI":"10.1137\/1.9780898719574"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF02289464"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1137\/110836067"}],"event":{"name":"ICPP 2021: 50th International Conference on Parallel Processing","location":"Lemont IL USA","acronym":"ICPP 2021"},"container-title":["50th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3472456.3472472","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3472456.3472472","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3472456.3472472","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:48:11Z","timestamp":1750193291000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3472456.3472472"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,9]]},"references-count":28,"alternative-id":["10.1145\/3472456.3472472","10.1145\/3472456"],"URL":"https:\/\/doi.org\/10.1145\/3472456.3472472","relation":{},"subject":[],"published":{"date-parts":[[2021,8,9]]},"assertion":[{"value":"2021-10-05","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}