{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T18:26:30Z","timestamp":1750703190424,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,17]],"date-time":"2022-10-17T00:00:00Z","timestamp":1665964800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"\u00d6sterreichische Forschungsf\u00f6rderungsgesellschaft (FFG)","award":["873838"],"award-info":[{"award-number":["873838"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,17]]},"DOI":"10.1145\/3511808.3557162","type":"proceedings-article","created":{"date-parts":[[2022,10,16]],"date-time":"2022-10-16T01:29:57Z","timestamp":1665883797000},"page":"4813-4817","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Federated Data Preparation, Learning, and Debugging in Apache SystemDS"],"prefix":"10.1145","author":[{"given":"Sebastian","family":"Baunsgaard","sequence":"first","affiliation":[{"name":"Graz University of Technology, Graz, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Matthias","family":"Boehm","sequence":"additional","affiliation":[{"name":"Graz University of Technology, Graz, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kevin","family":"Innerebner","sequence":"additional","affiliation":[{"name":"Graz University of Technology, Graz, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mito","family":"Kehayov","sequence":"additional","affiliation":[{"name":"Graz University of Technology, Graz, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Florian","family":"Lackner","sequence":"additional","affiliation":[{"name":"Graz University of Technology, Graz, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Olga","family":"Ovcharenko","sequence":"additional","affiliation":[{"name":"Graz University of Technology, Graz, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Arnab","family":"Phani","sequence":"additional","affiliation":[{"name":"Graz University of Technology, Graz, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tobias","family":"Rieger","sequence":"additional","affiliation":[{"name":"Graz University of Technology, Graz, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Weissteiner","sequence":"additional","affiliation":[{"name":"Graz University of Technology, Graz, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sebastian Benjamin","family":"Wrede","sequence":"additional","affiliation":[{"name":"Graz University of Technology, Graz, Austria"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Martin Abadi et al. 2016. TensorFlow: A System for Large-Scale Machine Learning. In OSDI. 265--283. https:\/\/www.usenix.org\/conference\/osdi16\/technical-sessions\/presentation\/abadi  Martin Abadi et al. 2016. TensorFlow: A System for Large-Scale Machine Learning. In OSDI. 265--283. https:\/\/www.usenix.org\/conference\/osdi16\/technical-sessions\/presentation\/abadi"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3214303"},{"key":"#cr-split#-e_1_3_2_1_3_1.1","doi-asserted-by":"crossref","unstructured":"Sebastian Baunsgaard et al. 2021. ExDRa: Exploratory Data Science on Federated Raw Data. In SIGMOD. 2450--2463. https:\/\/doi.org\/10.1145\/3448016.3457549 10.1145\/3448016.3457549","DOI":"10.1145\/3448016.3457549"},{"key":"#cr-split#-e_1_3_2_1_3_1.2","doi-asserted-by":"crossref","unstructured":"Sebastian Baunsgaard et al. 2021. ExDRa: Exploratory Data Science on Federated Raw Data. In SIGMOD. 2450--2463. https:\/\/doi.org\/10.1145\/3448016.3457549","DOI":"10.1145\/3448016.3457549"},{"key":"e_1_3_2_1_4_1","unstructured":"Matthias Boehm et al. 2020. SystemDS: A Declarative Machine Learning System for the End-to-End Data Science Lifecycle. In CIDR. http:\/\/cidrdb.org\/cidr2020\/papers\/p22-boehm-cidr20.pdf  Matthias Boehm et al. 2020. SystemDS: A Declarative Machine Learning System for the End-to-End Data Science Lifecycle. In CIDR. http:\/\/cidrdb.org\/cidr2020\/papers\/p22-boehm-cidr20.pdf"},{"key":"#cr-split#-e_1_3_2_1_5_1.1","unstructured":"Matthias Boehm Alexandre V. Evfimievski and Berthold Reinwald. 2019. Efficient Data-Parallel Cumulative Aggregates for Large-Scale Machine Learning. In BTW. 267--286. https:\/\/doi.org\/10.18420\/btw2019--17 10.18420\/btw2019--17"},{"key":"#cr-split#-e_1_3_2_1_5_1.2","unstructured":"Matthias Boehm Alexandre V. Evfimievski and Berthold Reinwald. 2019. Efficient Data-Parallel Cumulative Aggregates for Large-Scale Machine Learning. In BTW. 267--286. https:\/\/doi.org\/10.18420\/btw2019--17"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.14778\/3229863.3229865"},{"key":"e_1_3_2_1_7_1","volume-title":"Scale: System Design. In MLSys. https:\/\/proceedings.mlsys.org\/book\/271.pdf","author":"Keith Bonawitz","year":"2019","unstructured":"Keith Bonawitz et al. 2019 . Towards Federated Learning at Scale: System Design. In MLSys. https:\/\/proceedings.mlsys.org\/book\/271.pdf Keith Bonawitz et al. 2019. Towards Federated Learning at Scale: System Design. In MLSys. https:\/\/proceedings.mlsys.org\/book\/271.pdf"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-70278-0_1"},{"key":"#cr-split#-e_1_3_2_1_9_1.1","doi-asserted-by":"crossref","unstructured":"Graham Cormode and Divesh Srivastava. 2009. Anonymized data: generation models usage. In SIGMOD. 1015--1018. https:\/\/doi.org\/10.1145\/1559845.1559968 10.1145\/1559845.1559968","DOI":"10.1145\/1559845.1559968"},{"key":"#cr-split#-e_1_3_2_1_9_1.2","doi-asserted-by":"crossref","unstructured":"Graham Cormode and Divesh Srivastava. 2009. Anonymized data: generation models usage. In SIGMOD. 1015--1018. https:\/\/doi.org\/10.1145\/1559845.1559968","DOI":"10.1145\/1559845.1559968"},{"key":"e_1_3_2_1_10_1","unstructured":"Data.Nashville.gov. 2020. Nashville Traffic Accidents Dataset. https:\/\/data.nashville.gov\/Police\/Traffic-Accidents\/6v6w-hpcw  Data.Nashville.gov. 2020. Nashville Traffic Accidents Dataset. https:\/\/data.nashville.gov\/Police\/Traffic-Accidents\/6v6w-hpcw"},{"key":"e_1_3_2_1_11_1","volume-title":"Ng","author":"Dean Jeffrey","year":"2012","unstructured":"Jeffrey Dean , Greg Corrado , Rajat Monga , Kai Chen , Matthieu Devin , Quoc V. Le , Mark Z. Mao , Marc'Aurelio Ranzato , Andrew W. Senior , Paul A. Tucker , Ke Yang , and Andrew Y . Ng . 2012 . Large Scale Distributed Deep Networks. In NeurIPS. 1232--1240. https:\/\/proceedings.neurips.cc\/paper\/2012\/hash\/6aca97005c68f1206823815f66102863-Abstract.html Jeffrey Dean, Greg Corrado, Rajat Monga, Kai Chen, Matthieu Devin, Quoc V. Le, Mark Z. Mao, Marc'Aurelio Ranzato, Andrew W. Senior, Paul A. Tucker, Ke Yang, and Andrew Y. Ng. 2012. Large Scale Distributed Deep Networks. In NeurIPS. 1232--1240. https:\/\/proceedings.neurips.cc\/paper\/2012\/hash\/6aca97005c68f1206823815f66102863-Abstract.html"},{"key":"#cr-split#-e_1_3_2_1_12_1.1","doi-asserted-by":"crossref","unstructured":"Fangcheng Fu Yingxia Shao Lele Yu Jiawei Jiang Huanran Xue Yangyu Tao and Bin Cui. 2021. VF(^2 )Boost: Very Fast Vertical Federated Gradient Boosting for Cross-Enterprise Learning. In SIGMOD. 563--576. https:\/\/doi.org\/10.1145\/3448016.3457241 10.1145\/3448016.3457241","DOI":"10.1145\/3448016.3457241"},{"key":"#cr-split#-e_1_3_2_1_12_1.2","doi-asserted-by":"crossref","unstructured":"Fangcheng Fu Yingxia Shao Lele Yu Jiawei Jiang Huanran Xue Yangyu Tao and Bin Cui. 2021. VF(^2 )Boost: Very Fast Vertical Federated Gradient Boosting for Cross-Enterprise Learning. In SIGMOD. 563--576. https:\/\/doi.org\/10.1145\/3448016.3457241","DOI":"10.1145\/3448016.3457241"},{"key":"#cr-split#-e_1_3_2_1_13_1.1","doi-asserted-by":"crossref","unstructured":"Fangcheng Fu Huanran Xue Yong Cheng Yangyu Tao and Bin Cui. 2022. BlindFL: Vertical Federated Machine Learning without Peeking into Your Data. In SIGMOD. 1316--1330. https:\/\/doi.org\/10.1145\/3514221.3526127 10.1145\/3514221.3526127","DOI":"10.1145\/3514221.3526127"},{"key":"#cr-split#-e_1_3_2_1_13_1.2","doi-asserted-by":"crossref","unstructured":"Fangcheng Fu Huanran Xue Yong Cheng Yangyu Tao and Bin Cui. 2022. BlindFL: Vertical Federated Machine Learning without Peeking into Your Data. In SIGMOD. 1316--1330. https:\/\/doi.org\/10.1145\/3514221.3526127","DOI":"10.1145\/3514221.3526127"},{"key":"e_1_3_2_1_14_1","unstructured":"Google. 2020. TensorFlow Federated: Machine Learning on Decentralized Data. https:\/\/www.tensorflow.org\/federated  Google. 2020. TensorFlow Federated: Machine Learning on Decentralized Data. https:\/\/www.tensorflow.org\/federated"},{"key":"e_1_3_2_1_15_1","volume-title":"Zachary Chase Lipton, and Charles Elkan","author":"Ji Zhanglong","year":"2014","unstructured":"Zhanglong Ji , Zachary Chase Lipton, and Charles Elkan . 2014 . Differential Privacy and Machine Learning: a Survey and Review. CoRR , Vol. abs\/ 1412 .7584 (2014). http:\/\/arxiv.org\/abs\/1412.7584 Zhanglong Ji, Zachary Chase Lipton, and Charles Elkan. 2014. Differential Privacy and Machine Learning: a Survey and Review. CoRR, Vol. abs\/1412.7584 (2014). http:\/\/arxiv.org\/abs\/1412.7584"},{"key":"e_1_3_2_1_16_1","unstructured":"Peter Kairouz Brendan McMahan and Virginia Smith. 2020. Federated Learning Tutorial. In NeurIPS. https:\/\/slideslive.com\/38935813\/federated-learning-tutorial  Peter Kairouz Brendan McMahan and Virginia Smith. 2020. Federated Learning Tutorial. In NeurIPS. https:\/\/slideslive.com\/38935813\/federated-learning-tutorial"},{"key":"e_1_3_2_1_17_1","unstructured":"Criteo AI Lab. 2020. Criteo 1TB Click Logs Dataset. https:\/\/ailab.criteo.com\/download-criteo-1tb-click-logs-dataset\/  Criteo AI Lab. 2020. Criteo 1TB Click Logs Dataset. https:\/\/ailab.criteo.com\/download-criteo-1tb-click-logs-dataset\/"},{"key":"e_1_3_2_1_18_1","volume-title":"Alexander J. Smola, Amr Ahmed, Vanja Josifovski, James Long, Eugene J. Shekita, and Bor-Yiing Su.","author":"Li Mu","year":"2014","unstructured":"Mu Li , David G. Andersen , Jun Woo Park , Alexander J. Smola, Amr Ahmed, Vanja Josifovski, James Long, Eugene J. Shekita, and Bor-Yiing Su. 2014 . Scaling Distributed Machine Learning with the Parameter Server. In OSDI. 583--598. https:\/\/www.usenix.org\/conference\/osdi14\/technical-sessions\/presentation\/li_mu Mu Li, David G. Andersen, Jun Woo Park, Alexander J. Smola, Amr Ahmed, Vanja Josifovski, James Long, Eugene J. Shekita, and Bor-Yiing Su. 2014. Scaling Distributed Machine Learning with the Parameter Server. In OSDI. 583--598. https:\/\/www.usenix.org\/conference\/osdi14\/technical-sessions\/presentation\/li_mu"},{"key":"e_1_3_2_1_19_1","first-page":"3005","article-title":"PyTorch Distributed","volume":"13","author":"Li Shen","year":"2020","unstructured":"Shen Li , Yanli Zhao , Rohan Varma , Omkar Salpekar , Pieter Noordhuis , Teng Li , Adam Paszke , Jeff Smith , Brian Vaughan , Pritam Damania , and Soumith Chintala . 2020 . PyTorch Distributed : Experiences on Accelerating Data Parallel Training. PVLDB , Vol. 13 , 12 (2020), 3005 -- 3018 . https:\/\/doi.org\/10.14778\/3415478.3415530 10.14778\/3415478.3415530 Shen Li, Yanli Zhao, Rohan Varma, Omkar Salpekar, Pieter Noordhuis, Teng Li, Adam Paszke, Jeff Smith, Brian Vaughan, Pritam Damania, and Soumith Chintala. 2020. PyTorch Distributed: Experiences on Accelerating Data Parallel Training. PVLDB, Vol. 13, 12 (2020), 3005--3018. https:\/\/doi.org\/10.14778\/3415478.3415530","journal-title":"Experiences on Accelerating Data Parallel Training. PVLDB"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1002\/int.22818"},{"key":"e_1_3_2_1_21_1","volume-title":"SecureML: A System for Scalable Privacy-Preserving Machine Learning. In IEEE Symp. on Security and Privacy. 19--38","author":"Mohassel Payman","year":"2017","unstructured":"Payman Mohassel and Yupeng Zhang . 2017 . SecureML: A System for Scalable Privacy-Preserving Machine Learning. In IEEE Symp. on Security and Privacy. 19--38 . https:\/\/doi.org\/10.1109\/SP.2017.12 10.1109\/SP.2017.12 Payman Mohassel and Yupeng Zhang. 2017. SecureML: A System for Scalable Privacy-Preserving Machine Learning. In IEEE Symp. on Security and Privacy. 19--38. https:\/\/doi.org\/10.1109\/SP.2017.12"},{"key":"e_1_3_2_1_22_1","volume-title":"LIMA: Fine-grained Lineage Tracing and Reuse in Machine Learning Systems. In SIGMOD. 1426--1439. https:\/\/doi.org\/10.1145\/3448016.3452788","author":"Phani Arnab","year":"2021","unstructured":"Arnab Phani , Benjamin Rath , and Matthias Boehm . 2021 . LIMA: Fine-grained Lineage Tracing and Reuse in Machine Learning Systems. In SIGMOD. 1426--1439. https:\/\/doi.org\/10.1145\/3448016.3452788 10.1145\/3448016.3452788 Arnab Phani, Benjamin Rath, and Matthias Boehm. 2021. LIMA: Fine-grained Lineage Tracing and Reuse in Machine Learning Systems. In SIGMOD. 1426--1439. https:\/\/doi.org\/10.1145\/3448016.3452788"},{"key":"e_1_3_2_1_23_1","volume-title":"Sanjiv Kumar, and Hugh Brendan McMahan.","author":"Reddi Sashank J.","year":"2021","unstructured":"Sashank J. Reddi , Zachary Charles , Manzil Zaheer , Zachary Garrett , Keith Rush , Jakub Konevc n\u00fd , Sanjiv Kumar, and Hugh Brendan McMahan. 2021 . Adaptive Federated Optimization. In ICLR. https:\/\/openreview.net\/forum?id=LkFG3lB13U5 Sashank J. Reddi, Zachary Charles, Manzil Zaheer, Zachary Garrett, Keith Rush, Jakub Konevc n\u00fd, Sanjiv Kumar, and Hugh Brendan McMahan. 2021. Adaptive Federated Optimization. In ICLR. https:\/\/openreview.net\/forum?id=LkFG3lB13U5"},{"key":"e_1_3_2_1_24_1","volume-title":"Dask: Parallel Computation with Blocked algorithms and Task Scheduling. In SciPy.","author":"Rocklin Matthew","year":"2015","unstructured":"Matthew Rocklin . 2015 . Dask: Parallel Computation with Blocked algorithms and Task Scheduling. In SciPy. Matthew Rocklin. 2015. Dask: Parallel Computation with Blocked algorithms and Task Scheduling. In SciPy."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3015958"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.14778\/1920841.1920931"},{"key":"e_1_3_2_1_27_1","unstructured":"UCI. 2020. Adult Data Set. https:\/\/archive.ics.uci.edu\/ml\/datasets\/adult  UCI. 2020. Adult Data Set. https:\/\/archive.ics.uci.edu\/ml\/datasets\/adult"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.14778\/3407790.3407811"},{"key":"e_1_3_2_1_29_1","unstructured":"Matei Zaharia Mosharaf Chowdhury Tathagata Das Ankur Dave Justin Ma Murphy McCauly Michael J. Franklin Scott Shenker and Ion Stoica. 2012. Resilient Distributed Datasets: A Fault-Tolerant Abstraction for In-Memory Cluster Computing. In NSDI. 15--28. https:\/\/www.usenix.org\/conference\/nsdi12\/technical-sessions\/presentation\/zaharia  Matei Zaharia Mosharaf Chowdhury Tathagata Das Ankur Dave Justin Ma Murphy McCauly Michael J. Franklin Scott Shenker and Ion Stoica. 2012. Resilient Distributed Datasets: A Fault-Tolerant Abstraction for In-Memory Cluster Computing. In NSDI. 15--28. https:\/\/www.usenix.org\/conference\/nsdi12\/technical-sessions\/presentation\/zaharia"},{"key":"e_1_3_2_1_30_1","unstructured":"Chengliang Zhang Suyi Li Junzhe Xia Wei Wang Feng Yan and Yang Liu. 2020. BatchCrypt: Efficient Homomorphic Encryption for Cross-Silo Federated Learning. In ATC. 493--506. https:\/\/www.usenix.org\/conference\/atc20\/presentation\/zhang-chengliang  Chengliang Zhang Suyi Li Junzhe Xia Wei Wang Feng Yan and Yang Liu. 2020. BatchCrypt: Efficient Homomorphic Encryption for Cross-Silo Federated Learning. In ATC. 493--506. https:\/\/www.usenix.org\/conference\/atc20\/presentation\/zhang-chengliang"}],"event":{"name":"CIKM '22: The 31st ACM International Conference on Information and Knowledge Management","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Atlanta GA USA","acronym":"CIKM '22"},"container-title":["Proceedings of the 31st ACM International Conference on Information &amp; Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3511808.3557162","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3511808.3557162","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:49:06Z","timestamp":1750182546000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3511808.3557162"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,17]]},"references-count":35,"alternative-id":["10.1145\/3511808.3557162","10.1145\/3511808"],"URL":"https:\/\/doi.org\/10.1145\/3511808.3557162","relation":{},"subject":[],"published":{"date-parts":[[2022,10,17]]},"assertion":[{"value":"2022-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}