{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,30]],"date-time":"2025-08-30T17:06:22Z","timestamp":1756573582785,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":137,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,4]],"date-time":"2023-06-04T00:00:00Z","timestamp":1685836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,4]]},"DOI":"10.1145\/3555041.3589407","type":"proceedings-article","created":{"date-parts":[[2023,6,5]],"date-time":"2023-06-05T16:25:14Z","timestamp":1685982314000},"page":"53-59","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Optimizing Tensor Computations: From Applications to Compilation and Runtime Techniques"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1344-3663","authenticated-orcid":false,"given":"Matthias","family":"Boehm","sequence":"first","affiliation":[{"name":"TU Berlin, Berlin, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5756-8321","authenticated-orcid":false,"given":"Matteo","family":"Interlandi","sequence":"additional","affiliation":[{"name":"Mircosoft GSL, Los Angeles, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5458-9370","authenticated-orcid":false,"given":"Chris","family":"Jermaine","sequence":"additional","affiliation":[{"name":"Rice University, Houston, TX, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,6,5]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Benoit Steiner, Paul A. Tucker, Vijay Vasudevan, Pete Warden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng.","author":"Abadi Mart'i","year":"2016","unstructured":"Mart'i n Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, Manjunath Kudlur, Josh Levenberg, Rajat Monga, Sherry Moore, Derek Gordon Murray, Benoit Steiner, Paul A. Tucker, Vijay Vasudevan, Pete Warden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. 2016. TensorFlow: A System for Large-Scale Machine Learning. In OSDI. 265--283."},{"key":"e_1_3_2_1_2_1","volume-title":"Machine Learning for Precipitation Now- casting from Radar Images. CoRR","author":"Agrawal Shreya","year":"2019","unstructured":"Shreya Agrawal, Luke Barrington, Carla Bromberg, John Burge, Cenk Gazen, and Jason Hickey. 2019. Machine Learning for Precipitation Now- casting from Radar Images. CoRR , Vol. abs\/1912.12132 (2019)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-014-0357-y"},{"key":"e_1_3_2_1_4_1","unstructured":"Randy Allen and Ken Kennedy. 2001. Optimizing Compilers for Modern Architectures: A Dependence-based Approach. Morgan Kaufmann."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","unstructured":"Arash Ashari Naser Sedaghati John Eisenlohr and P. Sadayappan. 2014. An efficient two-dimensional blocking strategy for sparse matrix-vector multiplication on GPUs. In ICS. 273--282. https:\/\/doi.org\/10.1145\/2597652.2597678","DOI":"10.1145\/2597652.2597678"},{"key":"e_1_3_2_1_6_1","volume-title":"Chandramohan A. Thekkath, and Yonghui Wu.","author":"Barham Paul","year":"2022","unstructured":"Paul Barham, Aakanksha Chowdhery, Jeff Dean, Sanjay Ghemawat, Steven Hand, Dan Hurt, Michael Isard, Hyeontaek Lim, Ruoming Pang, Sudip Roy, Brennan Saeta, Parker Schuh, Ryan Sepassi, Laurent El Shafey, Chandramohan A. Thekkath, and Yonghui Wu. 2022. Pathways: Asynchronous Distributed Dataflow for ML. In MLSys. https:\/\/proceedings.mlsys.org\/paper\/2022\/hash\/"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588682"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457549"},{"key":"e_1_3_2_1_9_1","unstructured":"Denis Baylor Eric Breck Heng-Tze Cheng Noah Fiedel Chuan Yu Foo Zakaria Haque Salem Haykal Mustafa Ispir Vihan Jain Levent Koc Chiu Yuen Koo Lukasz Lew Clemens Mewald Akshay Naresh Modi Neoklis Polyzotis Sukriti Ramesh Sudip Roy Steven Euijong Whang Martin Wicke Jarek Wilkiewicz Xin Zhang and Martin Zinkevich. 2017. TFX: A TensorFlow-Based Production-Scale Machine Learning Platform. In SIGKDD. 1387--1395."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/1654059.1654119"},{"key":"e_1_3_2_1_11_1","volume-title":"Kevin Innerebner, Florijan Klezin, Stefanie N. Lindstaedt, Arnab Phani, Benjamin Rath, Berthold Reinwald, Shafaq Siddiqui, and Sebastian Benjamin Wrede.","author":"Boehm Matthias","year":"2020","unstructured":"Matthias Boehm, Iulian Antonov, Sebastian Baunsgaard, Mark Dokter, Robert Ginth\u00f6 r, Kevin Innerebner, Florijan Klezin, Stefanie N. Lindstaedt, Arnab Phani, Benjamin Rath, Berthold Reinwald, Shafaq Siddiqui, and Sebastian Benjamin Wrede. 2020. SystemDS: A Declarative Machine Learning System for the End-to-End Data Science Lifecycle. In CIDR."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.14778\/3007263.3007279"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.2200\/S00895ED1V01Y201901DTM057"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.14778\/3229863.3229865"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.14778\/2732286.2732292"},{"key":"e_1_3_2_1_16_1","first-page":"52","article-title":"SystemML's Optimizer: Plan Generation for Large-Scale Machine Learning Programs","volume":"37","author":"Matthias B\u00f6","year":"2014","unstructured":"Matthias B\u00f6 hm, Douglas R. Burdick, Alexandre V. Evfimievski, Berthold Reinwald, Frederick R. Reiss, Prithviraj Sen, Shirish Tatikonda, and Yuanyuan Tian. 2014. SystemML's Optimizer: Plan Generation for Large-Scale Machine Learning Programs. IEEE Data Eng. Bull. , Vol. 37, 3 (2014), 52--62. http:\/\/sites.computer.org\/debull\/A14sept\/p52.pdf","journal-title":"IEEE Data Eng. Bull."},{"key":"e_1_3_2_1_17_1","volume-title":"David Petrou, Daniel Ramage, and Jason Roselander.","author":"Bonawitz Keith","year":"2019","unstructured":"Keith Bonawitz, Hubert Eichner, Wolfgang Grieskamp, Dzmitry Huba, Alex Ingerman, Vladimir Ivanov, Chlo\u00e9 Kiddon, Jakub Konecn\u00fd , Stefano Mazzocchi, Brendan McMahan, Timon Van Overveldt, David Petrou, Daniel Ramage, and Jason Roselander. 2019. Towards Federated Learning at Scale: System Design. In MLSys."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2463676.2465283"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.14778\/3137628.3137641"},{"key":"e_1_3_2_1_20_1","volume-title":"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In OSDI. 578--594. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/chen","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Q. Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. 2018. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In OSDI. 578--594. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/chen"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3276493"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1006\/jcss.1997.1534"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1009716300509"},{"key":"e_1_3_2_1_24_1","volume-title":"Cooper and Linda Torczon","author":"Keith","year":"2004","unstructured":"Keith D. Cooper and Linda Torczon. 2004. Engineering a Compiler. Morgan Kaufmann."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.14778\/2824032.2824045"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00020"},{"key":"e_1_3_2_1_27_1","unstructured":"William J. Dally. 2018. Hardware for Deep Learning. https:\/\/youtu.be\/zDBF0xwQW-0 MLSys Keynote."},{"key":"e_1_3_2_1_28_1","volume-title":"DAPHNE: An Open and Extensible System Infrastructure for Integrated Data Analysis Pipelines. In CIDR. https:\/\/www.cidrdb.org\/cidr2022\/papers\/p4-damme.pdf","author":"Patrick Damme","year":"2022","unstructured":"Patrick Damme et al. 2022. DAPHNE: An Open and Extensible System Infrastructure for Integrated Data Analysis Pipelines. In CIDR. https:\/\/www.cidrdb.org\/cidr2022\/papers\/p4-damme.pdf"},{"key":"e_1_3_2_1_29_1","volume-title":"ICML (Proceedings of Machine Learning Research","volume":"1537","author":"Dao Tri","year":"2019","unstructured":"Tri Dao, Albert Gu, Alexander Ratner, Virginia Smith, Chris De Sa, and Christopher R\u00e9. 2019. A Kernel Theory of Modern Data Augmentation. In ICML (Proceedings of Machine Learning Research, Vol. 97). 1528--1537. http:\/\/proceedings.mlr.press\/v97\/dao19b.html"},{"key":"e_1_3_2_1_30_1","volume-title":"Ng","author":"Dean Jeffrey","year":"2012","unstructured":"Jeffrey Dean, Greg Corrado, Rajat Monga, Kai Chen, Matthieu Devin, Quoc V. Le, Mark Z. Mao, Marc'Aurelio Ranzato, Andrew W. Senior, Paul A. Tucker, Ke Yang, and Andrew Y. Ng. 2012. Large Scale Distributed Deep Networks. In NeurIPS. 1232--1240."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","unstructured":"Xin Luna Dong and Theodoros Rekatsinas. 2018. Data Integration and Machine Learning: A Natural Synergy. In SIGMOD. 1645--1650. https:\/\/doi.org\/10.1145\/3183713.3197387","DOI":"10.1145\/3183713.3197387"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.14778\/3236187.3236194"},{"key":"e_1_3_2_1_33_1","volume-title":"SPOOF: Sum-Product Optimization and Operator Fusion for Large-Scale Machine Learning. In CIDR","author":"Elgamal Tarek","year":"2017","unstructured":"Tarek Elgamal, Shangyu Luo, Matthias Boehm, Alexandre V. Evfimievski, Shirish Tatikonda, Berthold Reinwald, and Prithviraj Sen. 2017. SPOOF: Sum-Product Optimization and Operator Fusion for Large-Scale Machine Learning. In CIDR. http:\/\/cidrdb.org\/cidr2017\/papers\/p3-elgamal-cidr17.pdf"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318221"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.14778\/3407790.3407857"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.14778\/3503585.3503590"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2211.02753"},{"key":"e_1_3_2_1_38_1","volume-title":"Jesus Camacho-Rodriguez, and Matteo Interlandi.","author":"Gandhi Apurva","year":"2023","unstructured":"Apurva Gandhi, Yuki Asada, Victor Fu, Advitya Gemawat, Lihao Zhang, Rathijit Sen, Carlo Curino, Jesus Camacho-Rodriguez, and Matteo Interlandi. 2023. The Tensor Data Platform: Towards an AI-centric Database System. In CIDR. https:\/\/www.cidrdb.org\/cidr2023\/papers\/p68-gandhi.pdf"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3035918.3035937"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","unstructured":"Rainer Gemulla Erik Nijkamp Peter J. Haas and Yannis Sismanis. 2011. Large-scale matrix factorization with distributed stochastic gradient descent. In SIGKDD. 69--77. https:\/\/doi.org\/10.1145\/2020408.2020426","DOI":"10.1145\/2020408.2020426"},{"volume-title":"Inside TensorFlow: tf.distribute.Strategy. https:\/\/www.youtube.com\/watch?v=jKV53r9-H14","key":"e_1_3_2_1_41_1","unstructured":"Google. 2019. Inside TensorFlow: tf.distribute.Strategy. https:\/\/www.youtube.com\/watch?v=jKV53r9-H14"},{"key":"e_1_3_2_1_42_1","unstructured":"Google. 2020. TensorFlow Federated: Machine Learning on Decentralized Data. https:\/\/www.tensorflow.org\/federated"},{"key":"e_1_3_2_1_43_1","unstructured":"Google. 2022. DTensor Concepts. https:\/\/www.tensorflow.org\/guide\/dtensor_overview"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.14778\/3551793.3551833"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","unstructured":"Alireza Heidari Joshua McGrath Ihab F. Ilyas and Theodoros Rekatsinas. 2019. HoloDetect: Few-Shot Learning for Error Detection. In SIGMOD. 829--846. https:\/\/doi.org\/10.1145\/3299869.3319888","DOI":"10.1145\/3299869.3319888"},{"key":"e_1_3_2_1_46_1","volume-title":"Podracer architectures for scalable Reinforcement Learning. CoRR","author":"Hessel Matteo","year":"2021","unstructured":"Matteo Hessel, Manuel Kroiss, Aidan Clark, Iurii Kemaev, John Quan, Thomas Keck, Fabio Viola, and Hado van Hasselt. 2021. Podracer architectures for scalable Reinforcement Learning. CoRR , Vol. abs\/2104.06272 (2021). https:\/\/arxiv.org\/abs\/2104.06272"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3329785.3329932"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3517869"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","unstructured":"Botong Huang Shivnath Babu and Jun Yang. 2013. Cumulon: optimizing statistical data analysis in the cloud. In SIGMOD. 1--12. https:\/\/doi.org\/10.1145\/2463676.2465273","DOI":"10.1145\/2463676.2465273"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330993"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/1376616.1376686"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.14778\/3317315.3317323"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.14778\/3450980.3450991"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","unstructured":"Chris Jermaine. 2021. The Tensor-Relational Algebra and Other Ideas in Machine Learning System Design. In SSDBM. 270. https:\/\/doi.org\/10.1145\/3468791.3472262","DOI":"10.1145\/3468791.3472262"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","unstructured":"Zhihao Jia Oded Padon James Thomas Todd Warszawski Matei Zaharia and Alex Aiken. 2019a. TASO: optimizing deep learning computation with automatic generation of graph substitutions. In SOSP. 47--62. https:\/\/doi.org\/10.1145\/3341301.3359630","DOI":"10.1145\/3341301.3359630"},{"key":"e_1_3_2_1_56_1","unstructured":"Zhihao Jia James Thomas Todd Warszawski Mingyu Gao Matei Zaharia and Alex Aiken. 2019b. Optimizing DNN Computation with Relaxed Graph Substitutions. In MLSys. https:\/\/proceedings.mlsys.org\/book\/276.pdf"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Jiawei Jiang Bin Cui Ce Zhang and Lele Yu. 2017. Heterogeneity-aware Distributed Parameter Servers. In SIGMOD. 463--478.","DOI":"10.1145\/3035918.3035933"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","unstructured":"Jiawei Jiang Shaoduo Gan Yue Liu Fanlin Wang Gustavo Alonso Ana Klimovic Ankit Singla Wentao Wu and Ce Zhang. 2021. Towards Demystifying Serverless Machine Learning Training. In SIGMOD. 857--871. https:\/\/doi.org\/10.1145\/3448016.3459240","DOI":"10.1145\/3448016.3459240"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.14778\/3503585.3503597"},{"key":"e_1_3_2_1_60_1","volume-title":"More Dots: Syntactic Loop Fusion in Julia. https:\/\/julialang.org\/blog\/2017\/01\/moredots\/","author":"Johnson Steven G.","year":"2017","unstructured":"Steven G. Johnson. 2017. More Dots: Syntactic Loop Fusion in Julia. https:\/\/julialang.org\/blog\/2017\/01\/moredots\/"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3360307"},{"key":"e_1_3_2_1_63_1","unstructured":"Julia. 2022. Parallel Computing. https:\/\/docs.julialang.org\/en\/v1\/manual\/parallel-computing\/"},{"key":"e_1_3_2_1_64_1","unstructured":"Peter Kairouz Brendan McMahan and Virginia Smith. 2020. Federated Learning Tutorial. In NeurIPS. https:\/\/slideslive.com\/38935813\/federated-learning-tutorial"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2012.290"},{"volume-title":"CIDR","author":"Karanasos Konstantinos","key":"e_1_3_2_1_66_1","unstructured":"Konstantinos Karanasos, Matteo Interlandi, Fotis Psallidas, Rathijit Sen, Kwanghyun Park, Ivan Popivanov, Doris Xin, Supun Nakandala, Subru Krishnan, Markus Weimer, Yuan Yu, Raghu Ramakrishnan, and Carlo Curino. 2020. Extending Relational Query Processing with ML Inference. In CIDR. http:\/\/cidrdb.org\/cidr2020\/papers\/p24-karanasos-cidr20.pdf"},{"key":"e_1_3_2_1_67_1","volume-title":"Frank K\u00f6 hler, and Wolfgang Lehner","author":"Kernert David","year":"2015","unstructured":"David Kernert, Frank K\u00f6 hler, and Wolfgang Lehner. 2015. SpMacho - Optimizing Sparse Linear Algebra Expressions with Probabilistic Density Estimation. In EDBT. 289--300."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"crossref","unstructured":"David Kernert Wolfgang Lehner and Frank K\u00f6 hler. 2016. Topology-Aware Optimization of Big Sparse Matrices and Matrix Multiplications on Main-Memory Systems. In ICDE. 823--834.","DOI":"10.1109\/ICDE.2016.7498293"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/2902251.2902280"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3133901"},{"key":"e_1_3_2_1_71_1","unstructured":"Urs K\u00f6 ster Tristan Webb Xin Wang Marcel Nassar Arjun K. Bansal William Constable Oguz Elibol Stewart Hall Luke Hornof Amir Khosrowshahi Carey Kloss Ruby J. Pai and Naveen Rao. 2017. Flexpoint: An Adaptive Numerical Format for Efficient Training of Deep Neural Networks. In NeurIPS. 1742--1752. https:\/\/proceedings.neurips.cc\/paper\/2017\/hash\/a0160709701140704575d499c997b6ca-Abstract.html"},{"key":"e_1_3_2_1_72_1","volume-title":"Hinton","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E. Hinton. 2012. ImageNet Classification with Deep Convolutional Neural Networks. In NeurIPS. 1106--1114. https:\/\/proceedings.neurips.cc\/paper\/2012\/hash\/c399862d3b9d6b76c8436e924a68c45b-Abstract.html"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","unstructured":"Arun Kumar Matthias Boehm and Jun Yang. 2017. Data Management in Machine Learning: Challenges Techniques and Systems. In SIGMOD. 1717--1722. https:\/\/doi.org\/10.1145\/3035918.3054775","DOI":"10.1145\/3035918.3054775"},{"key":"e_1_3_2_1_74_1","unstructured":"Rasmus Munk Larsen and Tatiana Shpeisman. 2019. TensorFlow Graph Optimizations. https:\/\/web.stanford.edu\/class\/cs245\/slides\/TFGraphOptimizationsStanford.pdf Guest Lecture Stanford."},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"e_1_3_2_1_76_1","volume-title":"Patel","author":"Li Fengan","year":"2019","unstructured":"Fengan Li, Lingjiao Chen, Yijing Zeng, Arun Kumar, Xi Wu, Jeffrey F. Naughton, and Jignesh M. Patel. 2019. Tuple-oriented Compression for Large-scale Mini-batch Stochastic Gradient Descent. In SIGMOD. 1517--1534."},{"key":"e_1_3_2_1_77_1","volume-title":"Alexander J. Smola, Amr Ahmed, Vanja Josifovski, James Long, Eugene J. Shekita, and Bor-Yiing Su.","author":"Li Mu","year":"2014","unstructured":"Mu Li, David G. Andersen, Jun Woo Park, Alexander J. Smola, Amr Ahmed, Vanja Josifovski, James Long, Eugene J. Shekita, and Bor-Yiing Su. 2014. Scaling Distributed Machine Learning with the Parameter Server. In OSDI. 583--598."},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","unstructured":"Peng Li Xi Rao Jennifer Blase Yue Zhang Xu Chu and Ce Zhang. 2021. CleanML: A Study for Evaluating the Impact of Data Cleaning on ML Classification Tasks. In ICDE. 13--24. https:\/\/doi.org\/10.1109\/ICDE51399.2021.00009","DOI":"10.1109\/ICDE51399.2021.00009"},{"key":"e_1_3_2_1_79_1","first-page":"3059","article-title":"RLlib: Abstractions for Distributed Reinforcement Learning","volume":"80","author":"Liang Eric","year":"2018","unstructured":"Eric Liang, Richard Liaw, Robert Nishihara, Philipp Moritz, Roy Fox, Ken Goldberg, Joseph Gonzalez, Michael I. Jordan, and Ion Stoica. 2018. RLlib: Abstractions for Distributed Reinforcement Learning. In ICML, Vol. 80. 3059--3068. http:\/\/proceedings.mlr.press\/v80\/liang18b.html","journal-title":"ICML"},{"key":"e_1_3_2_1_80_1","unstructured":"Weifeng Liu and Brian Vinter. 2014. An Efficient GPU General Sparse Matrix-Matrix Multiplication for Irregular Data. In IPDPS. 370--381."},{"key":"e_1_3_2_1_81_1","volume-title":"Lundberg and Su-In Lee","author":"Scott","year":"2017","unstructured":"Scott M. Lundberg and Su-In Lee. 2017. A Unified Approach to Interpreting Model Predictions. In NeurIPS. 4765--4774. https:\/\/proceedings.neurips.cc\/paper\/2017\/hash\/8a20a8621978632d76c43dfd28b67767-Abstract.html"},{"key":"e_1_3_2_1_82_1","volume-title":"Luis Leopoldo Perez, and Christopher M. Jermaine","author":"Luo Shangyu","year":"2017","unstructured":"Shangyu Luo, Zekai J. Gao, Michael N. Gubanov, Luis Leopoldo Perez, and Christopher M. Jermaine. 2017. Scalable Linear Algebra on a Relational Database System. In ICDE. 523--534."},{"key":"e_1_3_2_1_83_1","first-page":"2430","article-title":"Device Placement Optimization with Reinforcement Learning","volume":"70","author":"Mirhoseini Azalia","year":"2017","unstructured":"Azalia Mirhoseini, Hieu Pham, Quoc V. Le, Benoit Steiner, Rasmus Larsen, Yuefeng Zhou, Naveen Kumar, Mohammad Norouzi, Samy Bengio, and Jeff Dean. 2017. Device Placement Optimization with Reinforcement Learning. In ICML, Vol. 70. 2430--2439.","journal-title":"ICML"},{"key":"e_1_3_2_1_84_1","volume-title":"Ray: A Distributed Framework for Emerging AI Applications. In OSDI. 561--577. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/nishihara","author":"Moritz Philipp","year":"2018","unstructured":"Philipp Moritz, Robert Nishihara, Stephanie Wang, Alexey Tumanov, Richard Liaw, Eric Liang, Melih Elibol, Zongheng Yang, William Paul, Michael I. Jordan, and Ion Stoica. 2018. Ray: A Distributed Framework for Emerging AI Applications. In OSDI. 561--577. https:\/\/www.usenix.org\/conference\/osdi18\/presentation\/nishihara"},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","unstructured":"Supun Nakandala Arun Kumar and Yannis Papakonstantinou. 2019a. Incremental and Approximate Inference for Faster Occlusion-based Deep CNN Explanations. In SIGMOD. 1589--1606. https:\/\/doi.org\/10.1145\/3299869.3319874","DOI":"10.1145\/3299869.3319874"},{"key":"e_1_3_2_1_86_1","volume-title":"Markus Weimer, and Matteo Interlandi.","author":"Nakandala Supun","year":"2020","unstructured":"Supun Nakandala, Karla Saur, Gyeong-In Yu, Konstantinos Karanasos, Carlo Curino, Markus Weimer, and Matteo Interlandi. 2020a. A Tensor Compiler for Unified Machine Learning Prediction Serving. In OSDI. 899--917. https:\/\/www.usenix.org\/conference\/osdi20\/presentation\/nakandala"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.1145\/3329486.3329496"},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.14778\/3407790.3407816"},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","unstructured":"Felix Neutatz Felix Biessmann and Ziawasch Abedjan. 2021. Enforcing Constraints for Machine Learning Systems via Declarative Feature Selection: An Experimental Study. In SIGMOD. 1345--1358. https:\/\/doi.org\/10.1145\/3448016.3457295","DOI":"10.1145\/3448016.3457295"},{"key":"e_1_3_2_1_90_1","doi-asserted-by":"crossref","unstructured":"Milos Nikolic Mohammed Elseidy and Christoph Koch. 2014. LINVIEW: incremental view maintenance for complex analytical queries. In SIGMOD. 253--264.","DOI":"10.1145\/2588555.2610519"},{"key":"e_1_3_2_1_91_1","unstructured":"NVIDIA. 2020. A100 Tensor Core GPU Architecture."},{"key":"e_1_3_2_1_92_1","unstructured":"NVIDIA. 2022. TensorRT Developer Guide. https:\/\/docs.nvidia.com\/deeplearning\/tensorrt\/pdf\/TensorRT-Developer-Guide.pdf"},{"key":"e_1_3_2_1_93_1","unstructured":"Kunle Olukotun. 2021. \"Let the Data Flow!\". In CIDR."},{"key":"e_1_3_2_1_94_1","doi-asserted-by":"publisher","DOI":"10.14778\/3213880.3213890"},{"volume-title":"CIDR","author":"Palkar Shoumik","key":"e_1_3_2_1_95_1","unstructured":"Shoumik Palkar, James Thomas, Anil Shanbhag, Malte Schwarzkopf, Saman P. Amarasinghe, and Matei Zaharia. 2017. A Common Runtime for High Performance Data Analysis. In CIDR. http:\/\/cidrdb.org\/cidr2017\/papers\/p127-palkar-cidr17.pdf"},{"key":"e_1_3_2_1_96_1","doi-asserted-by":"publisher","DOI":"10.14778\/3025111.3025117"},{"key":"e_1_3_2_1_97_1","unstructured":"Adam Paszke et al. 2019. PyTorch: An Imperative Style High- Performance Deep Learning Library. In NeurIPS."},{"key":"e_1_3_2_1_98_1","doi-asserted-by":"publisher","DOI":"10.5555\/1953048.2078195"},{"key":"e_1_3_2_1_99_1","volume-title":"Battaglia","author":"Pfaff Tobias","year":"2021","unstructured":"Tobias Pfaff, Meire Fortunato, Alvaro Sanchez-Gonzalez, and Peter W. Battaglia. 2021. Learning Mesh-Based Simulation with Graph Networks. ICLR (2021)."},{"key":"e_1_3_2_1_100_1","doi-asserted-by":"publisher","DOI":"10.14778\/3551793.3551842"},{"key":"e_1_3_2_1_101_1","doi-asserted-by":"publisher","unstructured":"Alexander Renz-Wieland Rainer Gemulla Zoi Kaoudi and Volker Markl. 2022. NuPS: A Parameter Server for Machine Learning with Non-Uniform Parameter Access. In SIGMOD. 481--495. https:\/\/doi.org\/10.1145\/3514221.3517860","DOI":"10.1145\/3514221.3517860"},{"key":"e_1_3_2_1_102_1","doi-asserted-by":"publisher","DOI":"10.14778\/3407790.3407796"},{"key":"e_1_3_2_1_103_1","doi-asserted-by":"publisher","unstructured":"Marco T\u00fa lio Ribeiro Sameer Singh and Carlos Guestrin. 2016. \"Why Should I Trust You?\": Explaining the Predictions of Any Classifier. In SIGKDD. 1135--1144. https:\/\/doi.org\/10.1145\/2939672.2939778","DOI":"10.1145\/2939672.2939778"},{"key":"e_1_3_2_1_104_1","volume-title":"Dask: Parallel Computation with Blocked algorithms and Task Scheduling. In SCIPY.","author":"Rocklin Matthew","year":"2015","unstructured":"Matthew Rocklin. 2015. Dask: Parallel Computation with Blocked algorithms and Task Scheduling. In SCIPY."},{"key":"e_1_3_2_1_105_1","doi-asserted-by":"publisher","DOI":"10.1145\/2967938.2967943"},{"key":"e_1_3_2_1_106_1","doi-asserted-by":"publisher","unstructured":"Svetlana Sagadeeva and Matthias Boehm. 2021. SliceLine: Fast Linear-Algebra-based Slice Finding for ML Model Debugging. In SIGMOD. 2290--2299. https:\/\/doi.org\/10.1145\/3448016.3457323","DOI":"10.1145\/3448016.3457323"},{"key":"e_1_3_2_1_107_1","doi-asserted-by":"publisher","DOI":"10.14778\/3461535.3463474"},{"volume-title":"CIDR","author":"Schelter Sebastian","key":"e_1_3_2_1_108_1","unstructured":"Sebastian Schelter. 2020. \"Amnesia\" - Machine Learning Models That Can Forget User Data Very Fast. In CIDR. http:\/\/cidrdb.org\/cidr2020\/papers\/p32-schelter-cidr20.pdf"},{"key":"e_1_3_2_1_109_1","doi-asserted-by":"publisher","unstructured":"Sebastian Schelter Stefan Grafberger and Ted Dunning. 2021. HedgeCut: Maintaining Randomised Trees for Low-Latency Machine Unlearning. In SIGMOD. 1545--1557. https:\/\/doi.org\/10.1145\/3448016.3457239","DOI":"10.1145\/3448016.3457239"},{"key":"e_1_3_2_1_110_1","doi-asserted-by":"publisher","DOI":"10.14778\/3229863.3229867"},{"key":"e_1_3_2_1_111_1","volume-title":"Samsara: Declarative Machine Learning on Distributed Dataflow Systems.","author":"Schelter Sebastian","year":"2016","unstructured":"Sebastian Schelter, Andrew Palumbo, Shannon Quinn, Suneel Marthi, and Andrew Musselman. 2016. Samsara: Declarative Machine Learning on Distributed Dataflow Systems."},{"key":"e_1_3_2_1_112_1","doi-asserted-by":"publisher","DOI":"10.5441\/002"},{"key":"e_1_3_2_1_113_1","doi-asserted-by":"publisher","unstructured":"Vraj Shah Jonathan Lacanlale Premanand Kumar Kevin Yang and Arun Kumar. 2021. Towards Benchmarking Feature Type Inference for AutoML Platforms. In SIGMOD. 1584--1596. https:\/\/doi.org\/10.1145\/3448016.3457274","DOI":"10.1145\/3448016.3457274"},{"key":"e_1_3_2_1_114_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10766-008-0082--5"},{"key":"e_1_3_2_1_115_1","doi-asserted-by":"publisher","DOI":"10.14778\/1920841.1920931"},{"key":"e_1_3_2_1_116_1","doi-asserted-by":"publisher","DOI":"10.1145\/3299869.3319854"},{"key":"e_1_3_2_1_117_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457244"},{"key":"e_1_3_2_1_118_1","doi-asserted-by":"crossref","unstructured":"Michael Stonebraker Paul Brown Alex Poliakov and Suchi Raman. 2011. The Architecture of SciDB. In SSDBM. 1--16.","DOI":"10.1007\/978-3-642-22351-8_1"},{"key":"e_1_3_2_1_119_1","unstructured":"Arvind K. Sujeeth HyoukJoong Lee Kevin J. Brown Tiark Rompf Hassan Chafi Michael Wu Anand R. Atreya Martin Odersky and Kunle Olukotun. 2011. OptiML: An Implicitly Parallel Domain-Specific Language for Machine Learning. In ICML. 609--616. https:\/\/icml.cc\/2011\/papers\/373_icmlpaper.pdf"},{"key":"e_1_3_2_1_120_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3452792"},{"key":"e_1_3_2_1_121_1","first-page":"1","article-title":"mice: Multivariate Imputation by Chained Equations in R","volume":"45","author":"van Buuren Stef","year":"2011","unstructured":"Stef van Buuren and Karin Groothuis-Oudshoorn. 2011. mice: Multivariate Imputation by Chained Equations in R. Journal of Statistical Software, Articles , Vol. 45, 3 (2011), 1--67.","journal-title":"Journal of Statistical Software, Articles"},{"key":"e_1_3_2_1_122_1","volume-title":"Tensor Comprehensions: Framework-Agnostic High-Performance Machine Learning Abstractions. CoRR","author":"Vasilache Nicolas","year":"2018","unstructured":"Nicolas Vasilache, Oleksandr Zinenko, Theodoros Theodoridis, Priya Goyal, Zachary DeVito, William S. Moses, Sven Verdoolaege, Andrew Adams, and Albert Cohen. 2018. Tensor Comprehensions: Framework-Agnostic High-Performance Machine Learning Abstractions. CoRR , Vol. abs\/1802.04730 (2018). http:\/\/arxiv.org\/abs\/1802.04730"},{"key":"e_1_3_2_1_123_1","volume-title":"Ripley","author":"Venables William N.","year":"2002","unstructured":"William N. Venables and Brian D. Ripley. 2002. Modern Applied Statistics with S, 4th Ed. Springer.","edition":"4"},{"key":"e_1_3_2_1_124_1","unstructured":"Naigang Wang Jungwook Choi Daniel Brand Chia-Yu Chen and Kailash Gopalakrishnan. 2018. Training Deep Neural Networks with 8-bit Floating Point Numbers. In NeurIPS. 7686--7695. https:\/\/proceedings.neurips.cc\/paper\/2018\/hash\/335d3d1cd7ef05ec77714a215134914c-Abstract.html"},{"key":"e_1_3_2_1_125_1","doi-asserted-by":"publisher","DOI":"10.14778\/3407790.3407799"},{"key":"e_1_3_2_1_126_1","doi-asserted-by":"publisher","unstructured":"Da Yan Yingyi Bu Yuanyuan Tian Amol Deshpande and James Cheng. 2016. Big Graph Analytics Systems. In SIGMOD. 2241--2243. https:\/\/doi.org\/10.1145\/2882903.2912566","DOI":"10.1145\/2882903.2912566"},{"key":"e_1_3_2_1_127_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2016.0179"},{"key":"e_1_3_2_1_128_1","unstructured":"Yongyang Yu MingJie Tang Walid G. Aref Qutaibah M. Malluhi Mostafa M. Abbas and Mourad Ouzzani. 2017. In-Memory Distributed Matrix Computation Processing and Optimization. In ICDE."},{"key":"e_1_3_2_1_129_1","doi-asserted-by":"publisher","DOI":"10.14778\/3457390.3457399"},{"key":"e_1_3_2_1_130_1","doi-asserted-by":"publisher","DOI":"10.14778\/3529337.3529343"},{"key":"e_1_3_2_1_131_1","unstructured":"Matei Zaharia Mosharaf Chowdhury Tathagata Das Ankur Dave Justin Ma Murphy McCauly Michael J. Franklin Scott Shenker and Ion Stoica. 2012. Resilient Distributed Datasets: A Fault-Tolerant Abstraction for In-Memory Cluster Computing. In NSDI. 15--28."},{"key":"e_1_3_2_1_132_1","doi-asserted-by":"publisher","DOI":"10.14778\/3407790.3407793"},{"key":"e_1_3_2_1_133_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3452787"},{"key":"e_1_3_2_1_134_1","first-page":"4035","article-title":"ZipML: Training Linear Models with End-to-End Low Precision, and a Little Bit of Deep Learning","volume":"70","author":"Zhang Hantian","year":"2017","unstructured":"Hantian Zhang, Jerry Li, Kaan Kara, Dan Alistarh, Ji Liu, and Ce Zhang. 2017. ZipML: Training Linear Models with End-to-End Low Precision, and a Little Bit of Deep Learning. In ICML, Vol. 70. 4035--4043. http:\/\/proceedings.mlr.press\/v70\/zhang17e.html","journal-title":"ICML"},{"key":"e_1_3_2_1_135_1","doi-asserted-by":"publisher","DOI":"10.14778\/2994509.2994511"},{"key":"e_1_3_2_1_136_1","doi-asserted-by":"publisher","DOI":"10.14778\/3402707.3402743"},{"key":"e_1_3_2_1_137_1","doi-asserted-by":"publisher","unstructured":"Jia Zou R. Matthew Barnett Tania Lorido-Botran Shangyu Luo Carlos Monroy Sourav Sikdar Kia Teymourian Binhang Yuan and Chris Jermaine. 2018. PlinyCompute: A Platform for High-Performance Distributed Data-Intensive Tool Development. In SIGMOD. https:\/\/doi.org\/10.1145\/3183713.3196933 io","DOI":"10.1145\/3183713.3196933"}],"event":{"name":"SIGMOD\/PODS '23: International Conference on Management of Data","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"],"location":"Seattle WA USA","acronym":"SIGMOD\/PODS '23"},"container-title":["Companion of the 2023 International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3555041.3589407","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3555041.3589407","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T18:43:58Z","timestamp":1750272238000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3555041.3589407"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,4]]},"references-count":137,"alternative-id":["10.1145\/3555041.3589407","10.1145\/3555041"],"URL":"https:\/\/doi.org\/10.1145\/3555041.3589407","relation":{},"subject":[],"published":{"date-parts":[[2023,6,4]]},"assertion":[{"value":"2023-06-05","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}