{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T06:29:07Z","timestamp":1778048947480,"version":"3.51.4"},"reference-count":109,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,4,12]],"date-time":"2024-04-12T00:00:00Z","timestamp":1712880000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,4,12]],"date-time":"2024-04-12T00:00:00Z","timestamp":1712880000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1741022"],"award-info":[{"award-number":["1741022"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2107290"],"award-info":[{"award-number":["2107290"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1934565"],"award-info":[{"award-number":["1934565"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2107050"],"award-info":[{"award-number":["2107050"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"University of Rochester","award":["Schwarz Discover Grant (2022)"],"award-info":[{"award-number":["Schwarz Discover Grant (2022)"]}]},{"DOI":"10.13039\/100006785","name":"Google","doi-asserted-by":"publisher","award":["Google Research Scholar Award (2021)"],"award-info":[{"award-number":["Google Research Scholar Award (2021)"]}],"id":[{"id":"10.13039\/100006785","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["The VLDB Journal"],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1007\/s00778-024-00849-w","type":"journal-article","created":{"date-parts":[[2024,4,12]],"date-time":"2024-04-12T13:01:45Z","timestamp":1712926905000},"page":"1283-1306","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Data distribution tailoring revisited: cost-efficient integration of representative data"],"prefix":"10.1007","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3945-925X","authenticated-orcid":false,"given":"Jiwon","family":"Chang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bohan","family":"Cui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4710-8719","authenticated-orcid":false,"given":"Fatemeh","family":"Nargesian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5251-6186","authenticated-orcid":false,"given":"Abolfazl","family":"Asudeh","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0724-5214","authenticated-orcid":false,"given":"H. V.","family":"Jagadish","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,4,12]]},"reference":[{"key":"849_CR1","doi-asserted-by":"publisher","unstructured":"Nargesian, F., Asudeh, A., Jagadish, H.V.: Tailoring data source distributions for fairness-aware data integration. Proceed. VLDB Endow. 14(11), 2519\u20132532 (2021). https:\/\/doi.org\/10.14778\/3476249.3476299","DOI":"10.14778\/3476249.3476299"},{"key":"849_CR2","unstructured":"Rose, A.: Are face-detection cameras racist? Time Business (2010)"},{"key":"849_CR3","unstructured":"Mulshine, M.: A major flaw in google\u2019s algorithm allegedly tagged two black people\u2019s faces with the word \u2019gorillas\u2019. Business Insider (2015)"},{"key":"849_CR4","unstructured":"Townsend, T.: Most engineers are white and so are the faces they use to train software. Recode (2017)"},{"key":"849_CR5","unstructured":"Dastin, J.: Amazon scraps secret ai recruiting tool that showed bias against women. Reuters (2018)"},{"issue":"3","key":"849_CR6","first-page":"333","volume":"40","author":"D Holt","year":"1991","unstructured":"Holt, D., Elliot, D.: Methods of weighting for unit non-response. J. R. Stat. Soc. Series D (The Statistician) 40(3), 333\u2013342 (1991)","journal-title":"J. R. Stat. Soc. Series D (The Statistician)"},{"key":"849_CR7","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla, N.V., Bowyer, K.W., Hall, L.O., Kegelmeyer, W.P.: Smote: synthetic minority over-sampling technique. J. Artif. Intell. Res. 16, 321\u2013357 (2002)","journal-title":"J. Artif. Intell. Res."},{"issue":"1","key":"849_CR8","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1145\/1007730.1007735","volume":"6","author":"GE Batista","year":"2004","unstructured":"Batista, G.E., Prati, R.C., Monard, M.C.: A study of the behavior of several methods for balancing machine learning training data. ACM SIGKDD Explorations Newsl 6(1), 20\u201329 (2004)","journal-title":"ACM SIGKDD Explorations Newsl"},{"key":"849_CR9","doi-asserted-by":"publisher","first-page":"202","DOI":"10.1016\/j.aap.2019.05.014","volume":"129","author":"AB Parsa","year":"2019","unstructured":"Parsa, A.B., Taghipour, H., Derrible, S., Mohammadian, A.K.: Real-time accident detection: coping with imbalanced data. Accident Anal. Prevent. 129, 202\u2013210 (2019)","journal-title":"Accident Anal. Prevent."},{"key":"849_CR10","doi-asserted-by":"crossref","unstructured":"Chung, Y., Kraska, T., Polyzotis, N., Tae, K.H., Whang, S.E.: Slice finder: Automated data slicing for model validation. In: 2019 IEEE 35th International Conference on Data Engineering (ICDE), pp. 1550\u20131553. IEEE (2019)","DOI":"10.1109\/ICDE.2019.00139"},{"key":"849_CR11","doi-asserted-by":"crossref","unstructured":"Sagadeeva, S., Boehm, M.: Sliceline: Fast, linear-algebra-based slice finding for ml model debugging. In: Proceedings of the 2021 International Conference on Management of Data, pp. 2290\u20132299 (2021)","DOI":"10.1145\/3448016.3457323"},{"key":"849_CR12","doi-asserted-by":"crossref","unstructured":"Tae, K.H., Whang, S.E.: Slice tuner: A selective data acquisition framework for accurate and fair machine learning models. In: Proceedings of the 2021 International Conference on Management of Data, pp. 1771\u20131783 (2021)","DOI":"10.1145\/3448016.3452792"},{"key":"849_CR13","doi-asserted-by":"crossref","unstructured":"Bartlett, R., Morse, A., Stanton, R., Wallace, N.: Consumer-lending discrimination in the fintech era. Tech. rep, National Bureau of Economic Research (2019)","DOI":"10.3386\/w25943"},{"key":"849_CR14","unstructured":"Dawex: Dawex: Sell, buy and share data. https:\/\/www.dawex.com\/en"},{"key":"849_CR15","unstructured":"Xignite: Market data solutions. https:\/\/www.xignite.com\/"},{"key":"849_CR16","unstructured":"WorldQuant: Worldquant. https:\/\/www.worldquant.com"},{"key":"849_CR17","unstructured":"Singer, N.: A data broker offers a peek behind the curtain. The New York Times (2013)"},{"key":"849_CR18","unstructured":"of\u00a0California, S.: Data broker registry. https:\/\/oag.ca.gov\/data-brokers (2020)"},{"key":"849_CR19","unstructured":"Turk, A.M.: Amazon mechanical turk. Retrieved August 17, 2012 (2012)"},{"issue":"7","key":"849_CR20","first-page":"813","volume":"11","author":"F Nargesian","year":"2018","unstructured":"Nargesian, F., Zhu, E., Pu, K.Q., Miller, R.J.: Table union search on open data. PVLDB 11(7), 813\u2013825 (2018)","journal-title":"PVLDB"},{"key":"849_CR21","unstructured":"Rapid: Google flights api: Incorporate travel data into your app. The Rapid API Blog (2020)"},{"key":"849_CR22","doi-asserted-by":"crossref","unstructured":"Chai, C., Fan, J., Li, G.: Incentive-based entity collection using crowdsourcing. In: ICDE, pp. 341\u2013352 (2018)","DOI":"10.1109\/ICDE.2018.00039"},{"issue":"7","key":"849_CR23","doi-asserted-by":"publisher","first-page":"1312","DOI":"10.1109\/TKDE.2016.2611509","volume":"31","author":"J Fan","year":"2019","unstructured":"Fan, J., Wei, Z., Zhang, D., Yang, J., Du, X.: Distribution-aware crowdsourced entity collection. IEEE Trans. Knowl. Data Eng. 31(7), 1312\u20131326 (2019)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"849_CR24","doi-asserted-by":"crossref","unstructured":"Chai, C., Li, G., Li, J., Deng, D., Feng, J.: Cost-effective crowdsourced entity resolution: a partial-order approach. In: SIGMOD, pp. 969\u2013984 (2016)","DOI":"10.1145\/2882903.2915252"},{"issue":"11","key":"849_CR25","doi-asserted-by":"publisher","first-page":"3137","DOI":"10.14778\/3551793.3551858","volume":"15","author":"A Asudeh","year":"2022","unstructured":"Asudeh, A., Nargesian, F.: Towards distribution-aware query answering in data markets. Proc. VLDB Endow. 15(11), 3137\u20133144 (2022)","journal-title":"Proc. VLDB Endow."},{"key":"849_CR26","unstructured":"The texas tribune data set. https:\/\/salaries.texastribune.org (2021)"},{"key":"849_CR27","doi-asserted-by":"crossref","unstructured":"Luo, G., Ellmann, C.J., Haas, P.J., Naughton, J.F.: A scalable hash ripple join algorithm. In: SIGMOD, pp. 252\u2013262 (2002)","DOI":"10.1145\/564691.564721"},{"key":"849_CR28","doi-asserted-by":"crossref","unstructured":"Li, F., Wu, B., Yi, K., Zhao, Z.: Wander join: online aggregation via random walks. In: SIGMOD, pp. 615\u2013629 (2016)","DOI":"10.1145\/2882903.2915235"},{"key":"849_CR29","doi-asserted-by":"crossref","unstructured":"Zhao, Z., Christensen, R., Li, F., Hu, X., Yi, K.: Random sampling over joins revisited. In: SIGMOD, pp. 1525\u20131539 (2018)","DOI":"10.1145\/3183713.3183739"},{"key":"849_CR30","unstructured":"The socrata open data api. https:\/\/developer.twitter.com\/en\/products\/twitter-api\/enterprise"},{"issue":"10","key":"849_CR31","doi-asserted-by":"publisher","first-page":"1832","DOI":"10.14778\/3467861.3467872","volume":"14","author":"Y Li","year":"2021","unstructured":"Li, Y., Yu, X., Koudas, N.: Data acquisition for improving machine learning models. Proc. VLDB Endow. 14(10), 1832\u20131844 (2021)","journal-title":"Proc. VLDB Endow."},{"key":"849_CR32","doi-asserted-by":"crossref","unstructured":"Sheng, C., Zhang, N., Tao, Y., Jin, X.: Optimal algorithms for crawling a hidden database in the web. arXiv preprint arXiv:1208.0075 (2012)","DOI":"10.14778\/2350229.2350232"},{"issue":"2","key":"849_CR33","doi-asserted-by":"publisher","first-page":"1241","DOI":"10.14778\/1454159.1454163","volume":"1","author":"J Madhavan","year":"2008","unstructured":"Madhavan, J., Ko, D., Kot, \u0141, Ganapathy, V., Rasmussen, A., Halevy, A.: Google\u2019s deep web crawl. Proceed. VLDB Endow. 1(2), 1241\u20131252 (2008)","journal-title":"Proceed. VLDB Endow."},{"issue":"7","key":"849_CR34","first-page":"600","volume":"9","author":"A Asudeh","year":"2016","unstructured":"Asudeh, A., Thirumuruganathan, S., Zhang, N., Das, G.: Discovering the skyline of web databases. PVLDB 9(7), 600\u2013611 (2016)","journal-title":"PVLDB"},{"issue":"11","key":"849_CR35","first-page":"888","volume":"9","author":"A Asudeh","year":"2016","unstructured":"Asudeh, A., Zhang, N., Das, G.: Query reranking as a service. PVLDB 9(11), 888\u2013899 (2016)","journal-title":"Query reranking as a service. PVLDB"},{"key":"849_CR36","doi-asserted-by":"publisher","first-page":"368","DOI":"10.1016\/j.engappai.2014.09.019","volume":"37","author":"GG Sundarkumar","year":"2015","unstructured":"Sundarkumar, G.G., Ravi, V.: A novel hybrid undersampling method for mining unbalanced datasets in banking and insurance. Eng. Appl. Artif. Intell. 37, 368\u2013377 (2015)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"849_CR37","unstructured":"Select Issues: Assessing Adverse Impact in Software, Algorithms, and Artificial Intelligence Used in Employment Selection Procedures Under Title VII of the Civil Rights Act of 1964 (2023)"},{"key":"849_CR38","doi-asserted-by":"crossref","unstructured":"Feldman, M., Friedler, S.A., Moeller, J., Scheidegger, C., Venkatasubramanian, S.: Certifying and removing disparate impact. In: proceedings of the 21th ACM SIGKDD international conference on knowledge discovery and data mining, pp. 259\u2013268 (2015)","DOI":"10.1145\/2783258.2783311"},{"key":"849_CR39","unstructured":"Kearns, M., Neel, S., Roth, A., Wu, Z.S.: Preventing fairness gerrymandering: Auditing and learning for subgroup fairness. In: International conference on machine learning, pp. 2564\u20132572. PMLR (2018)"},{"key":"849_CR40","doi-asserted-by":"crossref","unstructured":"Kearns, M., Neel, S., Roth, A., Wu, Z.S.: An empirical study of rich subgroup fairness for machine learning. In: Proceedings of the Conference on Fairness, Accountability, and Transparency, pp. 100\u2013109 (2019)","DOI":"10.1145\/3287560.3287592"},{"key":"849_CR41","doi-asserted-by":"crossref","unstructured":"Foulds, J.R., Islam, R., Keya, K.N., Pan, S.: An intersectional definition of fairness. In: 2020 IEEE 36th International Conference on Data Engineering (ICDE), pp. 1918\u20131921. IEEE (2020)","DOI":"10.1109\/ICDE48307.2020.00203"},{"key":"849_CR42","doi-asserted-by":"crossref","unstructured":"Asudeh, A., Jin, Z., Jagadish, H.V.: Assessing and remedying coverage for a given dataset. In: ICDE, pp. 554\u2013565 (2019)","DOI":"10.1109\/ICDE.2019.00056"},{"key":"849_CR43","doi-asserted-by":"crossref","unstructured":"Deng, S., Lu, S., Tao, Y.: On join sampling and the hardness of combinatorial output-sensitive join algorithms. In: PODS, pp. 99\u2013111. ACM (2023)","DOI":"10.1145\/3584372.3588666"},{"issue":"11","key":"849_CR44","doi-asserted-by":"publisher","first-page":"3137","DOI":"10.14778\/3551793.3551858","volume":"15","author":"A Asudeh","year":"2022","unstructured":"Asudeh, A., Nargesian, F.: Towards distribution-aware query answering in data markets. Proc. VLDB Endow. 15(11), 3137\u20133144 (2022)","journal-title":"Proc. VLDB Endow."},{"issue":"4","key":"849_CR45","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1145\/356827.356831","volume":"12","author":"RS Bird","year":"1980","unstructured":"Bird, R.S.: Tabulation techniques for recursive programs. ACM Comput. Surveys 12(4), 403\u2013417 (1980). https:\/\/doi.org\/10.1145\/356827.356831","journal-title":"ACM Comput. Surveys"},{"key":"849_CR46","doi-asserted-by":"crossref","unstructured":"Aggarwal, A., Klawe, M.M., Moran, S., Shor, P., WIlber, R.: Geometric applications of a matrix searching algorithm. In: Proceedings of the Second Annual Symposium on Computational Geometry (1986). https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/10515.10546","DOI":"10.1145\/10515.10546"},{"key":"849_CR47","doi-asserted-by":"publisher","unstructured":"Galil, Z., Park, K.: Dynamic programming with convexity, concavity and sparsity. Theor. Comput. Sci. 92(1), 49\u201376 (1992). https:\/\/doi.org\/10.1016\/0304-3975(92)90135-3","DOI":"10.1016\/0304-3975(92)90135-3"},{"key":"849_CR48","doi-asserted-by":"crossref","unstructured":"Motwani, R., Raghavan, P.: Randomized algorithms. Cambridge university press (1995)","DOI":"10.1017\/CBO9780511814075"},{"issue":"3","key":"849_CR49","doi-asserted-by":"publisher","first-page":"930","DOI":"10.1017\/jpr.2016.51","volume":"53","author":"M Brown","year":"2016","unstructured":"Brown, M., Ross, S.M.: Optimality results for coupon collection. J. Appl. Probab. 53(3), 930\u2013937 (2016)","journal-title":"J. Appl. Probab."},{"key":"849_CR50","doi-asserted-by":"crossref","unstructured":"Katehakis, M.N., Jr., A.F.V.: The multi-armed bandit problem: Decomposition and computation. Math. Oper. Res. 12(2), 262\u2013268 (1987)","DOI":"10.1287\/moor.12.2.262"},{"issue":"1","key":"849_CR51","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/2200000024","volume":"5","author":"S Bubeck","year":"2012","unstructured":"Bubeck, S., Cesa-Bianchi, N.: Regret analysis of stochastic and nonstochastic multi-armed bandit problems. Found. Trends Mach. Learn. 5(1), 1\u2013122 (2012)","journal-title":"Found. Trends Mach. Learn."},{"key":"849_CR52","doi-asserted-by":"publisher","unstructured":"Slivkins, A.: Introduction to Multi-Armed Bandits. Foundations and Trends\u00ae in Machine Learning 12(1-2), 1\u2013286 (2019). https:\/\/doi.org\/10.1561\/2200000068","DOI":"10.1561\/2200000068"},{"key":"849_CR53","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Mach. Learn. 47, 235\u2013256 (2002)","journal-title":"Mach. Learn."},{"key":"849_CR54","unstructured":"of\u00a0Transportation\u00a0Statistics, B.: Airborne flights database. U.S. Department of Transportation, https:\/\/www.transtats.bts.gov (2021)"},{"key":"849_CR55","unstructured":"ProPublica: Compas-analysis. ProPublica (2023). https:\/\/github.com\/propublica\/compas-analysis"},{"key":"849_CR56","unstructured":"Mattu, J., Angwin, L., Kirchner, S., Larson, J.: How We Analyzed the COMPAS Recidivism Algorithm (2016). https:\/\/www.propublica.org\/article\/how-we-analyzed-the-compas-recidivism-algorithm?token=TiqCeZIj4uLbXl91e3wM2PnmnWbCVOvS"},{"key":"849_CR57","doi-asserted-by":"crossref","unstructured":"Lagioia, F., Rovatti, R., Sartor, G.: Algorithmic fairness through group parities? the case of compas-sapmoc. AI & SOCIETY pp. 1\u201320 (2022)","DOI":"10.1007\/s00146-022-01441-y"},{"issue":"6","key":"849_CR58","doi-asserted-by":"publisher","first-page":"2074","DOI":"10.1007\/s10618-022-00854-z","volume":"36","author":"A Fabris","year":"2022","unstructured":"Fabris, A., Messina, S., Silvello, G., Susto, G.A.: Algorithmic fairness datasets: the story so far. Data Min. Knowl. Disc. 36(6), 2074\u20132152 (2022)","journal-title":"Data Min. Knowl. Disc."},{"key":"849_CR59","unstructured":"Barocas, S., Hardt, M., Narayanan, A.: Fairness and machine learning: Limitations and opportunities. URL: fairmlbook.org (2019)"},{"key":"849_CR60","doi-asserted-by":"crossref","unstructured":"Friedler, S.A., Scheidegger, C., Venkatasubramanian, S., Choudhary, S., Hamilton, E.P., Roth, D.: A comparative study of fairness-enhancing interventions in machine learning. In: Proceedings of the conference on fairness, accountability, and transparency, pp. 329\u2013338 (2019)","DOI":"10.1145\/3287560.3287589"},{"issue":"1","key":"849_CR61","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10115-011-0463-8","volume":"33","author":"F Kamiran","year":"2012","unstructured":"Kamiran, F., Calders, T.: Data preprocessing techniques for classification without discrimination. Knowl. Inf. Syst. 33(1), 1\u201333 (2012)","journal-title":"Knowl. Inf. Syst."},{"key":"849_CR62","unstructured":"Calmon, F., Wei, D., Vinzamuri, B., Ramamurthy, K.N., Varshney, K.R.: Optimized pre-processing for discrimination prevention. In: Advances in Neural Information Processing Systems, pp. 3992\u20134001 (2017)"},{"key":"849_CR63","doi-asserted-by":"crossref","unstructured":"Salimi, B., Rodriguez, L., Howe, B., Suciu, D.: Interventional fairness: Causal database repair for algorithmic fairness. In: SIGMOD, pp. 793\u2013810 (2019)","DOI":"10.1145\/3299869.3319901"},{"key":"849_CR64","doi-asserted-by":"crossref","unstructured":"Kamishima, T., Akaho, S., Asoh, H., Sakuma, J.: Fairness-aware classifier with prejudice remover regularizer. In: Joint European Conference on Machine Learning and Knowledge Discovery in Databases, pp. 35\u201350. Springer (2012)","DOI":"10.1007\/978-3-642-33486-3_3"},{"key":"849_CR65","unstructured":"Zemel, R., Wu, Y., Swersky, K., Pitassi, T., Dwork, C.: Learning fair representations. In: ICML (2013)"},{"key":"849_CR66","unstructured":"Zafar, M.B., Valera, I., Rodriguez, M.G., Gummadi, K.P.: Fairness constraints: Mechanisms for fair classification. CoRR, abs\/1507.05259 (2015)"},{"key":"849_CR67","doi-asserted-by":"crossref","unstructured":"Zhang, H., Chu, X., Asudeh, A., Navathe, S.: Omnifair: A declarative system for model-agnostic group fairness in machine learning. SIGMOD (2021)","DOI":"10.1145\/3448016.3452787"},{"key":"849_CR68","doi-asserted-by":"crossref","unstructured":"Kamiran, F., Calders, T., Pechenizkiy, M.: Discrimination aware decision tree learning. In: 2010 IEEE International Conference on Data Mining, pp. 869\u2013874. IEEE (2010)","DOI":"10.1109\/ICDM.2010.50"},{"key":"849_CR69","unstructured":"Hardt, M., Price, E., Srebro, N.: Equality of opportunity in supervised learning. arXiv preprint arXiv:1610.02413 (2016)"},{"key":"849_CR70","unstructured":"Woodworth, B., Gunasekar, S., Ohannessian, M.I., Srebro, N.: Learning non-discriminatory predictors. In: Conference on Learning Theory, pp. 1920\u20131953. PMLR (2017)"},{"issue":"1","key":"849_CR71","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1145\/3422648.3422657","volume":"49","author":"B Salimi","year":"2020","unstructured":"Salimi, B., Howe, B., Suciu, D.: Database repair meets algorithmic fairness. ACM SIGMOD Rec. 49(1), 34\u201341 (2020)","journal-title":"ACM SIGMOD Rec."},{"key":"849_CR72","doi-asserted-by":"crossref","unstructured":"Asudeh, A., Jagadish, H., Stoyanovich, J., Das, G.: Designing fair ranking schemes. In: SIGMOD, pp. 1259\u20131276 (2019)","DOI":"10.1145\/3299869.3300079"},{"issue":"12","key":"849_CR73","first-page":"2706","volume":"13","author":"C Kuhlman","year":"2020","unstructured":"Kuhlman, C., Rundensteiner, E.: Rank aggregation algorithms for fair consensus. PVLDB 13(12), 2706\u20132719 (2020)","journal-title":"PVLDB"},{"key":"849_CR74","doi-asserted-by":"crossref","unstructured":"Asudeh, A., Jagadish, H., Miklau, G., Stoyanovich, J.: On obtaining stable rankings. PVLDB 12(3) (2019)","DOI":"10.14778\/3291264.3291269"},{"key":"849_CR75","doi-asserted-by":"crossref","unstructured":"Guan, Y., Asudeh, A., Mayuram, P., Jagadish, H., Stoyanovich, J., Miklau, G., Das, G.: Mithraranking: A system for responsible ranking design. In: SIGMOD, pp. 1913\u20131916 (2019)","DOI":"10.1145\/3299869.3320244"},{"key":"849_CR76","doi-asserted-by":"crossref","unstructured":"Sun, C., Asudeh, A., Jagadish, H., Howe, B., Stoyanovich, J.: Mithralabel: Flexible dataset nutritional labels for responsible data science. In: Proceedings of the 28th ACM International Conference on Information and Knowledge Management, pp. 2893\u20132896 (2019)","DOI":"10.1145\/3357384.3357853"},{"key":"849_CR77","doi-asserted-by":"crossref","unstructured":"Yang, K., Stoyanovich, J., Asudeh, A., Howe, B., Jagadish, H., Miklau, G.: A nutritional label for rankings. In: SIGMOD, pp. 1773\u20131776 (2018)","DOI":"10.1145\/3183713.3193568"},{"key":"849_CR78","doi-asserted-by":"crossref","unstructured":"Getoor, L.: Responsible data science. In: SIGMOD (2019)","DOI":"10.1109\/BigData47090.2019.9006129"},{"issue":"12","key":"849_CR79","first-page":"3474","volume":"13","author":"J Stoyanovich","year":"2020","unstructured":"Stoyanovich, J., Howe, B., Jagadish, H.: Responsible data management. PVLDB 13(12), 3474\u20133488 (2020)","journal-title":"Responsible data management. PVLDB"},{"key":"849_CR80","doi-asserted-by":"crossref","unstructured":"Shah, N.B., Lipton, Z.: Sigmod 2020 tutorial on fairness and bias in peer review and other sociotechnical intelligent systems. In: SIGMOD, pp. 2637\u20132640 (2020)","DOI":"10.1145\/3318464.3383129"},{"key":"849_CR81","doi-asserted-by":"crossref","unstructured":"Venkatasubramanian, S.: Algorithmic fairness: measures, methods and representations. In: PODS, pp. 481\u2013481 (2019)","DOI":"10.1145\/3294052.3322192"},{"issue":"12","key":"849_CR82","first-page":"3445","volume":"13","author":"A Asudeh","year":"2020","unstructured":"Asudeh, A., Jagadish, H.V.: Fairly evaluating and scoring items in a data set. PVLDB 13(12), 3445\u20133448 (2020)","journal-title":"PVLDB"},{"key":"849_CR83","unstructured":"Neyman, J., Pearson, E.S.: Contributions to the theory of testing statistical hypotheses. Stat. Res. Memoirs (1936)"},{"key":"849_CR84","doi-asserted-by":"publisher","first-page":"13","DOI":"10.3389\/fdata.2019.00013","volume":"2","author":"A Olteanu","year":"2019","unstructured":"Olteanu, A., Castillo, C., Diaz, F., Kiciman, E.: Social data: Biases, methodological pitfalls, and ethical boundaries. Front. Big Data 2, 13 (2019)","journal-title":"Front. Big Data"},{"key":"849_CR85","first-page":"671","volume":"104","author":"S Barocas","year":"2016","unstructured":"Barocas, S., Selbst, A.D.: Big data\u2019s disparate impact. Calif. L. Rev. 104, 671 (2016)","journal-title":"Calif. L. Rev."},{"key":"849_CR86","unstructured":"Chen, I., Johansson, F.D., Sontag, D.: Why is my classifier discriminatory? In: S.\u00a0Bengio, H.\u00a0Wallach, H.\u00a0Larochelle, K.\u00a0Grauman, N.\u00a0Cesa-Bianchi, R.\u00a0Garnett (eds.) Advances in Neural Information Processing Systems, vol.\u00a031, pp. 3539\u20133550 (2018)"},{"key":"849_CR87","doi-asserted-by":"crossref","unstructured":"Holstein, K., Wortman\u00a0Vaughan, J., Daum\u00e9\u00a0III, H., Dudik, M., Wallach, H.: Improving fairness in machine learning systems: What do industry practitioners need? In: Proceedings of the 2019 CHI conference on human factors in computing systems, pp. 1\u201316 (2019)","DOI":"10.1145\/3290605.3300830"},{"key":"849_CR88","doi-asserted-by":"crossref","unstructured":"Drosou, M., Jagadish, H., Pitoura, E., Stoyanovich, J.: Diversity in big data: A review. Big data 5(2) (2017)","DOI":"10.1089\/big.2016.0054"},{"key":"849_CR89","doi-asserted-by":"crossref","unstructured":"Lin, Y., Guan, Y., Asudeh, A., V., J.H.: Identifying insufficient data coverage in databases with multiple relations. PVLDB 13(11), 2229\u20132242 (2020)","DOI":"10.14778\/3407790.3407821"},{"key":"849_CR90","doi-asserted-by":"crossref","unstructured":"Jin, Z., Xu, M., Sun, C., Asudeh, A., Jagadish, H.: Mithracoverage: A system for investigating population bias for intersectional fairness. In: Proceedings of the 2020 ACM SIGMOD International Conference on Management of Data, pp. 2721\u20132724 (2020)","DOI":"10.1145\/3318464.3384689"},{"key":"849_CR91","unstructured":"Accinelli, C., Minisi, S., Catania, B.: Coverage-based rewriting for data preparation. In: EDBT\/ICDT Workshops (2020)"},{"key":"849_CR92","doi-asserted-by":"crossref","unstructured":"Asudeh, A., Shahbazi, N., Jin, Z., Jagadish, H.: Identifying insufficient data coverage for ordinal continuous-valued attributes. SIGMOD (2021)","DOI":"10.1145\/3448016.3457315"},{"key":"849_CR93","doi-asserted-by":"crossref","unstructured":"Orr, L.J., Balazinska, M., Suciu, D.: Sample debiasing in the themis open world database system. In: SIGMOD, pp. 257\u2013268 (2020)","DOI":"10.1145\/3318464.3380606"},{"issue":"12","key":"849_CR94","first-page":"1185","volume":"9","author":"E Zhu","year":"2016","unstructured":"Zhu, E., Nargesian, F., Pu, K.Q., Miller, R.J.: LSH ensemble: internet-scale domain search. PVLDB 9(12), 1185\u20131196 (2016)","journal-title":"PVLDB"},{"issue":"4","key":"849_CR95","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1145\/3186549.3186559","volume":"46","author":"SW Sadiq","year":"2017","unstructured":"Sadiq, S.W., Dasu, T., Dong, X.L., Freire, J., Ilyas, I.F., Link, S., Miller, R.J., Naumann, F., Zhou, X., Srivastava, D.: Data quality: The role of empiricism. SIGMOD Rec. 46(4), 35\u201343 (2017)","journal-title":"SIGMOD Rec."},{"key":"849_CR96","doi-asserted-by":"crossref","unstructured":"Rekatsinas, T., Deshpande, A., Dong, X.L., Getoor, L., Srivastava, D.: Sourcesight: Enabling effective source selection. In: SIGMOD, pp. 2157\u20132160 (2016)","DOI":"10.1145\/2882903.2899403"},{"key":"849_CR97","doi-asserted-by":"crossref","unstructured":"Shen, Y., Chakrabarti, K., Chaudhuri, S., Ding, B., Novik, L.: Discovering queries based on example tuples. In: SIGMOD, pp. 493\u2013504 (2014)","DOI":"10.1145\/2588555.2593664"},{"key":"849_CR98","doi-asserted-by":"crossref","unstructured":"Qian, L., Cafarella, M.J., Jagadish, H.V.: Sample-driven schema mapping. In: SIGMOD, pp. 73\u201384 (2012)","DOI":"10.1145\/2213836.2213846"},{"key":"849_CR99","doi-asserted-by":"crossref","unstructured":"Lehmberg, O., Bizer, C.: Synthesizing n-ary relations from web tables. In: WIMS, pp. 17:1\u201317:12 (2019)","DOI":"10.1145\/3326467.3326480"},{"issue":"10","key":"849_CR100","first-page":"908","volume":"5","author":"R Pimplikar","year":"2012","unstructured":"Pimplikar, R., Sarawagi, S.: Answering table queries on the web using column keywords. PVLDB 5(10), 908\u2013919 (2012)","journal-title":"PVLDB"},{"key":"849_CR101","doi-asserted-by":"crossref","unstructured":"Brickley, D., Burgess, M., Noy, N.F.: Google dataset search: Building a search engine for datasets in an open web ecosystem. In: WWW, pp. 1365\u20131375 (2019)","DOI":"10.1145\/3308558.3313685"},{"key":"849_CR102","doi-asserted-by":"crossref","unstructured":"Koutris, P., Upadhyaya, P., Balazinska, M., Howe, B., Suciu, D.: Query-based data pricing. J. ACM 62(5), 43:1\u201343:44 (2015)","DOI":"10.1145\/2770870"},{"issue":"9","key":"849_CR103","first-page":"1373","volume":"13","author":"N Chepurko","year":"2020","unstructured":"Chepurko, N., Marcus, R., Zgraggen, E., Fernandez, R.C., Kraska, T., Karger, D.: ARDA: automatic relational data augmentation for machine learning. PVLDB 13(9), 1373\u20131387 (2020)","journal-title":"PVLDB"},{"key":"849_CR104","doi-asserted-by":"crossref","unstructured":"Radosavovic, I., Doll\u00e1r, P., Girshick, R.B., Gkioxari, G., He, K.: Data distillation: Towards omni-supervised learning. In: CVPR, pp. 4119\u20134128 (2018)","DOI":"10.1109\/CVPR.2018.00433"},{"key":"849_CR105","doi-asserted-by":"crossref","unstructured":"Brucato, M., Beltran, J.F., Abouzied, A., Meliou, A.: Scalable package queries in relational database systems. arXiv preprint arXiv:1512.03564 (2015)","DOI":"10.14778\/2904483.2904489"},{"key":"849_CR106","doi-asserted-by":"crossref","unstructured":"Brucato, M., Mannino, M., Abouzied, A., Haas, P.J., Meliou, A.: spaqltools: a stochastic package query interface for scalable constrained optimization. Proceedings of the VLDB Endowment 13(12) (2020)","DOI":"10.14778\/3415478.3415499"},{"key":"849_CR107","doi-asserted-by":"crossref","unstructured":"Erkut, E.: The discrete p-dispersion problem. Eur. J. Oper. Res. 46(1), 48\u201360 (1990)","DOI":"10.1016\/0377-2217(90)90297-O"},{"key":"849_CR108","doi-asserted-by":"crossref","unstructured":"Wang, Y., Fabbri, F., Mathioudakis, M.: Streaming algorithms for diversity maximization with fairness constraints. In: 2022 IEEE 38th International Conference on Data Engineering (ICDE), pp. 41\u201353. IEEE (2022)","DOI":"10.1109\/ICDE53745.2022.00008"},{"key":"849_CR109","doi-asserted-by":"crossref","unstructured":"Wang, Y., Mathioudakis, M., Li, J., Fabbri, F.: Max-min diversification with fairness constraints: Exact and approximation algorithms. In: SIAM nternational Conference on Data Mining (SDM23) (2023)","DOI":"10.1137\/1.9781611977653.ch11"}],"container-title":["The VLDB Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-024-00849-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00778-024-00849-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-024-00849-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,17]],"date-time":"2024-08-17T12:02:22Z","timestamp":1723896142000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00778-024-00849-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,12]]},"references-count":109,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,9]]}},"alternative-id":["849"],"URL":"https:\/\/doi.org\/10.1007\/s00778-024-00849-w","relation":{},"ISSN":["1066-8888","0949-877X"],"issn-type":[{"value":"1066-8888","type":"print"},{"value":"0949-877X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,4,12]]},"assertion":[{"value":"15 July 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 January 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 March 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 April 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}