{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T11:34:23Z","timestamp":1778758463830,"version":"3.51.4"},"publisher-location":"New York, New York, USA","reference-count":16,"publisher":"ACM Press","license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100007688","name":"American University of Beirut","doi-asserted-by":"publisher","award":["URB-AUB-2018\/2019"],"award-info":[{"award-number":["URB-AUB-2018\/2019"]}],"id":[{"id":"10.13039\/100007688","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1145\/3331076.3331110","type":"proceedings-article","created":{"date-parts":[[2019,7,19]],"date-time":"2019-07-19T17:40:26Z","timestamp":1563558026000},"page":"1-5","source":"Crossref","is-referenced-by-count":7,"title":["Chi squared feature selection over Apache Spark"],"prefix":"10.1145","author":[{"given":"Mohamed","family":"Nassar","sequence":"first","affiliation":[{"name":"American University of Beirut (AUB)"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haidar","family":"Safa","sequence":"additional","affiliation":[{"name":"American University of Beirut (AUB)"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alaa Al","family":"Mutawa","sequence":"additional","affiliation":[{"name":"American University of Beirut (AUB)"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ahmed","family":"Helal","sequence":"additional","affiliation":[{"name":"American University of Beirut (AUB)"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Iskander","family":"Gaba","sequence":"additional","affiliation":[{"name":"American University of Beirut (AUB)"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","reference":[{"key":"key-10.1145\/3331076.3331110-1","unstructured":"Libsvm data: Classification, regression, and multi-label. https:\/\/www.csie.ntu.edu.tw\/~cjlin\/libsvmtools\/datasets\/."},{"key":"key-10.1145\/3331076.3331110-2","unstructured":"Partitioning in apache spark. https:\/\/medium.com\/parrot-prediction\/partitioning-in-apache-spark-8134ad840b0."},{"key":"key-10.1145\/3331076.3331110-3","unstructured":"sklearn.feature_selection.chi2. https:\/\/scikit-learn.org\/stable\/modules\/generated\/sklearn.feature_selection.chi2.html. Accessed: 2019-03-11."},{"key":"key-10.1145\/3331076.3331110-4","unstructured":"Spark docs - basic statistics - hypothesis testing. https:\/\/spark.apache.org\/docs\/2.2.0\/ml-statistics.html. Accessed: 2019-03-11."},{"key":"key-10.1145\/3331076.3331110-5","unstructured":"A tale of three apache spark apis: Rdds vs dataframes and datasets. https:\/\/databricks.com\/blog\/2016\/07\/14\/a-tale-of-three-apache-spark-apis-rdds-dataframes-and-datasets.html."},{"key":"key-10.1145\/3331076.3331110-6","doi-asserted-by":"crossref","unstructured":"M. Armbrust, R. S. Xin, C. Lian, Y. Huai, D. Liu, J. K. Bradley, X. Meng, T. Kaftan, M. J. Franklin, A. Ghodsi, et al. Spark sql: Relational data processing in spark. In Proceedings of the 2015 ACM SIGMOD international conference on management of data, pages 1383--1394. ACM, 2015.","DOI":"10.1145\/2723372.2742797"},{"key":"key-10.1145\/3331076.3331110-7","doi-asserted-by":"crossref","unstructured":"R. Bosagh Zadeh, X. Meng, A. Ulanov, B. Yavuz, L. Pu, S. Venkataraman, E. Sparks, A. Staple, and M. Zaharia. Matrix computations and optimization in apache spark. In Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pages 31--38. ACM, 2016.","DOI":"10.1145\/2939672.2939675"},{"key":"key-10.1145\/3331076.3331110-8","unstructured":"L. Dali. Lessons learned while implementing a sparse logistic regression algorithm in apache spark. https:\/\/databricks.com\/session\/lessons-learned-while-implementing-a-sparse-logistic-regression-algorithm-in-apache-spark."},{"key":"key-10.1145\/3331076.3331110-9","doi-asserted-by":"crossref","unstructured":"M. Interlandi, S. D. Tetali, M. A. Gulzar, J. Noor, T. Condie, M. Kim, and T. Millstein. Optimizing interactive development of data-intensive applications. In Proceedings of the Seventh ACM Symposium on Cloud Computing, pages 510--522. ACM, 2016.","DOI":"10.1145\/2987550.2987565"},{"key":"key-10.1145\/3331076.3331110-10","doi-asserted-by":"crossref","unstructured":"M. Jaber, M. Nassar, W. A. R. Al Orabi, B. A. Farraj, M. O. Kayali, and C. Helwe. Reconfigurable and adaptive spark applications. In CLOSER, pages 84--91, 2017.","DOI":"10.5220\/0006289901120119"},{"key":"key-10.1145\/3331076.3331110-11","doi-asserted-by":"crossref","unstructured":"Z. A. Kocsis, J. H. Drake, D. Carson, and J. Swan. Automatic improvement of apache spark queries using semantics-preserving program reduction. In Proceedings of the 2016 on Genetic and Evolutionary Computation Conference Companion, pages 1141--1146. ACM, 2016.","DOI":"10.1145\/2908961.2931692"},{"key":"key-10.1145\/3331076.3331110-12","doi-asserted-by":"crossref","unstructured":"Z. Shmeis and M. Jaber. Fine and coarse grained composition and adaptation of spark applications. Future Generation Computer Systems, 86:629--640, 2018.","DOI":"10.1016\/j.future.2018.04.048"},{"key":"key-10.1145\/3331076.3331110-13","doi-asserted-by":"crossref","unstructured":"M. Wang, S. B. Handurukande, and M. Nassar. Rpig: A scalable framework for machine learning and advanced statistical functionalities. In 4th IEEE International Conference on Cloud Computing Technology and Science Proceedings, pages 293--300. IEEE, 2012.","DOI":"10.1109\/CloudCom.2012.6427480"},{"key":"key-10.1145\/3331076.3331110-14","unstructured":"Y. Yang and J. O. Pedersen. A comparative study on feature selection in text categorization. In Icml, volume 97, page 35, 1997."},{"key":"key-10.1145\/3331076.3331110-15","unstructured":"M. Zaharia, R. S. Xin, P. Wendell, T. Das, M. Armbrust, A. Dave, X. Meng, J. Rosen, S. Venkataraman, M. J. Franklin, et al. Apache spark: a unified engine for big data processing. Communications of the ACM, 59(11):56--65, 2016."},{"key":"key-10.1145\/3331076.3331110-16","doi-asserted-by":"crossref","unstructured":"W. Zhu, H. Chen, and F. Hu. Asc: Improving spark driver performance with automatic spark checkpoint. In 2016 18th International Conference on Advanced Communication Technology (ICACT), pages 607--611. IEEE, 2016.","DOI":"10.1109\/ICACT.2016.7423489"}],"event":{"name":"the 23rd International Database Applications & Engineering Symposium","location":"Athens, Greece","acronym":"IDEAS '19","number":"23","start":{"date-parts":[[2019,6,10]]},"end":{"date-parts":[[2019,6,12]]}},"container-title":["Proceedings of the 23rd International Database Applications &amp; Engineering Symposium on   - IDEAS '19"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3331076.3331110","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/dl.acm.org\/ft_gateway.cfm?id=3331110&ftid=2073424&dwn=1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T00:26:06Z","timestamp":1750206366000},"score":1,"resource":{"primary":{"URL":"http:\/\/dl.acm.org\/citation.cfm?doid=3331076.3331110"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"references-count":16,"URL":"https:\/\/doi.org\/10.1145\/3331076.3331110","relation":{},"subject":[],"published":{"date-parts":[[2019]]}}}