{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T17:49:36Z","timestamp":1765388976498,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":67,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,12]]},"DOI":"10.1145\/3765612.3767256","type":"proceedings-article","created":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T17:45:59Z","timestamp":1765388759000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Augmenting Randomized Controlled Trials with Foundation Models as Synthetic Units"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1631-2570","authenticated-orcid":false,"given":"Shiv","family":"Shankar","sequence":"first","affiliation":[{"name":"University of Massachusetts, Amherst, Massachusetts, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4179-7274","authenticated-orcid":false,"given":"Madalina","family":"Fiterau","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Massachusetts, Amherst, Amherst, MA, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,12,10]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Guideline on adjustment for baseline covariates in clinical trials","author":"European Medicine Agency","year":"2015","unstructured":"European Medicine Agency. Guideline on adjustment for baseline covariates in clinical trials. London: European Medicines Agency, 2015."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308560.3316500"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.adi6000"},{"key":"e_1_3_2_1_4_1","volume-title":"Efficient prediction-powered inference. arXiv preprint arXiv:2311.01453","author":"Angelopoulos Anastasios","year":"2023","unstructured":"Anastasios Angelopoulos, John C Duchi, and Tijana Zrnic. PPI++: Efficient prediction-powered inference. arXiv preprint arXiv:2311.01453, 2023."},{"key":"e_1_3_2_1_5_1","volume-title":"A note on statistical efficiency in prediction-powered inference","author":"Angelopoulos Anastasios N","year":"2023","unstructured":"Anastasios N Angelopoulos, John C Duchi, and Tijana Zrnic. A note on statistical efficiency in prediction-powered inference, 2023."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1001\/jama.1992.03490020088036"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1017\/pan.2023.2"},{"key":"e_1_3_2_1_8_1","volume-title":"Matrix analysis","author":"Bhatia Rajendra","year":"2013","unstructured":"Rajendra Bhatia. Matrix analysis, volume 169. Springer Science & Business Media, 2013."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41698-025-00900-1"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1002\/pst.2376"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1111\/1467-9868.00243"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Victor Chernozhukov Denis Chetverikov Mert Demirer Esther Duflo Christian Hansen Whitney Newey and James Robins. Double\/debiased machine learning for treatment and structural parameters 2018.","DOI":"10.3386\/w23564"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.2023.2191817"},{"key":"e_1_3_2_1_14_1","volume-title":"Efficient randomized experiments using foundation models. arXiv preprint arXiv:2502.04262","author":"Bartolomeis Piersilvio De","year":"2025","unstructured":"Piersilvio De Bartolomeis, Javier Abad, Guanbo Wang, Konstantin Donhauser, Raymond M Duch, Fanny Yang, and Issa J Dahabreh. Efficient randomized experiments using foundation models. arXiv preprint arXiv:2502.04262, 2025."},{"key":"e_1_3_2_1_15_1","volume-title":"Prediction-powered generalization of causal inferences. arXiv preprint arXiv:2406.02873","author":"Demirel Ilker","year":"2024","unstructured":"Ilker Demirel, Ahmed Alaa, Anthony Philippakis, and David Sontag. Prediction-powered generalization of causal inferences. arXiv preprint arXiv:2406.02873, 2024."},{"key":"e_1_3_2_1_16_1","article-title":"A dense transformer foundation model with mixture of experts for multi-task brain image analysis","author":"Ding Rizhi","year":"2025","unstructured":"Rizhi Ding, Hui Lu, and Manhua Liu. Denseformer-moe: A dense transformer foundation model with mixture of experts for multi-task brain image analysis. IEEE Transactions on Medical Imaging, 2025.","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jalz.2014.10.003"},{"key":"e_1_3_2_1_18_1","volume-title":"The Thirty-eighth Annual Conference on Neural Information Processing Systems","author":"Fisch Adam","year":"2024","unstructured":"Adam Fisch, Joshua Maynez, R Alex Hofer, Bhuwan Dhingra, Amir Globerson, and William W Cohen. Stratified prediction-powered inference for effective hybrid evaluation of language models. In The Thirty-eighth Annual Conference on Neural Information Processing Systems, 2024."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuroimage.2012.01.021"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1080\/00031305.1984.10483175"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1287\/mksc.2023.0306"},{"key":"e_1_3_2_1_22_1","volume-title":"Another look at inference after prediction. arXiv preprint arXiv:2411.19908","author":"Gronsbell Jessica","year":"2024","unstructured":"Jessica Gronsbell, Jianhui Gao, Yaqi Shi, Zachary R McCaw, and David Cheng. Another look at inference after prediction. arXiv preprint arXiv:2411.19908, 2024."},{"key":"e_1_3_2_1_23_1","volume-title":"Causal discovery with endogenous context variables. arXiv preprint arXiv:2412.04981","author":"G\u00fcnther Wiebke","year":"2024","unstructured":"Wiebke G\u00fcnther, Oana-Iuliana Popescu, Martin Rabel, Urmi Ninad, Andreas Gerhardus, and Jakob Runge. Causal discovery with endogenous context variables. arXiv preprint arXiv:2412.04981, 2024."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/ass087"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1002\/jmri.21049"},{"key":"e_1_3_2_1_26_1","volume-title":"Bayesian dynamic borrowing of historical information with applications to the analysis of large-scale assessments. psychometrika, 88(1):1\u201330","author":"Kaplan David","year":"2023","unstructured":"David Kaplan, Jianshen Chen, Sinan Yavuz, and Weicong Lyu. Bayesian dynamic borrowing of historical information with applications to the analysis of large-scale assessments. psychometrika, 88(1):1\u201330, 2023."},{"key":"e_1_3_2_1_27_1","volume-title":"Robust integration of external control data in randomized trials. arXiv preprint arXiv:2406.17971","author":"Karlsson Rickard","year":"2024","unstructured":"Rickard Karlsson, Guanbo Wang, Jesse Krijthe, and Issa Dahabreh. Robust integration of external control data in randomized trials. arXiv preprint arXiv:2406.17971, 2024."},{"key":"e_1_3_2_1_28_1","first-page":"10","volume-title":"Conference on uncertainty in artificial intelligence","author":"K\u00fcgelgen Julius","unstructured":"Julius K\u00fcgelgen, Alexander Mey, Marco Loog, and Bernhard Sch\u00f6lkopf. Semi-supervised learning, causality, and the conditional cluster assumption. In Conference on uncertainty in artificial intelligence, pages 1\u201310. PMLR, 2020."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772758"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1287\/mksc.2023.0454"},{"key":"e_1_3_2_1_31_1","volume-title":"Prognostic adjustment with efficient estimators to unbiasedly leverage historical data in randomized trials. arXiv preprint arXiv:2305.19180","author":"Liao Lauren","year":"2023","unstructured":"Lauren Liao, Emilie H\u00f8jbjerre-Frandsen, Alan Hubbard, and Alejandro Schuler. Prognostic adjustment with efficient estimators to unbiasedly leverage historical data in randomized trials. arXiv preprint arXiv:2305.19180, 2023."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/asr085"},{"key":"e_1_3_2_1_33_1","first-page":"10","volume-title":"Predictive Intelligence in Medicine: Second International Workshop, PRIME 2019, Held in Conjunction with MICCAI 2019, Shenzhen, China, October 13, 2019, Proceedings 2","author":"Marinescu R\u0103zvan V","unstructured":"R\u0103zvan V Marinescu, Neil P Oxtoby, Alexandra L Young, Esther E Bron, Arthur W Toga, Michael W Weiner, Frederik Barkhof, Nick C Fox, Polina Golland, Stefan Klein, et al. Tadpole challenge: Accurate alzheimer's disease prediction through crowdsourced forecasting of future data. In Predictive Intelligence in Medicine: Second International Workshop, PRIME 2019, Held in Conjunction with MICCAI 2019, Shenzhen, China, October 13, 2019, Proceedings 2, pages 1\u201310. Springer, 2019."},{"key":"e_1_3_2_1_34_1","volume-title":"Assumption-lean and data-adaptive post-prediction inference. arXiv preprint arXiv:2311.14220","author":"Miao Jiacheng","year":"2023","unstructured":"Jiacheng Miao, Xinran Miao, Yixuan Wu, Jiwei Zhao, and Qiongshi Lu. Assumption-lean and data-adaptive post-prediction inference. arXiv preprint arXiv:2311.14220, 2023."},{"issue":"394","key":"e_1_3_2_1_35_1","first-page":"1","article-title":"Revisiting inference after prediction","volume":"24","author":"Motwani Keshav","year":"2023","unstructured":"Keshav Motwani and Daniela Witten. Revisiting inference after prediction. Journal of Machine Learning Research, 24(394):1\u201318, 2023.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_36_1","volume-title":"Cross-fitting and fast remainder rates for semiparametric estimation. arXiv preprint arXiv:1801.09138","author":"Newey Whitney K","year":"2018","unstructured":"Whitney K Newey and James R Robins. Cross-fitting and fast remainder rates for semiparametric estimation. arXiv preprint arXiv:1801.09138, 2018."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1214\/ss\/1177012031"},{"key":"e_1_3_2_1_38_1","volume-title":"New embedding models and api updates","author":"AI.","year":"2024","unstructured":"OpenAI. New embedding models and api updates, 2024."},{"key":"e_1_3_2_1_39_1","first-page":"10904","article-title":"Causal discovery in heterogeneous environments under the sparse mechanism shift hypothesis","volume":"35","author":"Perry Ronan","year":"2022","unstructured":"Ronan Perry, Julius Von K\u00fcgelgen, and Bernhard Sch\u00f6lkopf. Causal discovery in heterogeneous environments under the sparse mechanism shift hypothesis. Advances in Neural Information Processing Systems, 35:10904\u201310917, 2022.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1986.10478361"},{"key":"e_1_3_2_1_41_1","volume-title":"Bruno Dubois, Stanley Durrleman, and Bruno Jedynak. Prediction-powered inference for clinical trials. medRxiv","author":"Poulet Pierre-Emmanuel","year":"2025","unstructured":"Pierre-Emmanuel Poulet, Maylis Tran, Sophie Tezenas du Montcel, Bruno Dubois, Stanley Durrleman, and Bruno Jedynak. Prediction-powered inference for clinical trials. medRxiv, 2025."},{"key":"e_1_3_2_1_42_1","volume-title":"Tool learning with large language models: A survey","author":"Qu Changle","year":"2024","unstructured":"Changle Qu, Sunhao Dai, Xiaochi Wei, Hengyi Cai, Shuaiqiang Wang, Dawei Yin, Jun Xu, and Ji-Rong Wen. Tool learning with large language models: A survey, 2024."},{"key":"e_1_3_2_1_43_1","volume-title":"Estimation of regression coefficients when some regressors are not always observed. Journal of the American statistical Association, 89(427):846\u2013866","author":"Robins James M","year":"1994","unstructured":"James M Robins, Andrea Rotnitzky, and Lue Ping Zhao. Estimation of regression coefficients when some regressors are not always observed. Journal of the American statistical Association, 89(427):846\u2013866, 1994."},{"key":"e_1_3_2_1_44_1","volume-title":"Combining patient-level and summary-level data for alzheimer's disease modeling and simulation: a beta regression meta-analysis. Journal of pharmacokinetics and pharmacodynamics, 39:479\u2013498","author":"Rogers James A","year":"2012","unstructured":"James A Rogers, Daniel Polhamus, William R Gillespie, Kaori Ito, Klaus Romero, Ruolun Qiu, Diane Stephenson, Marc R Gastonguay, and Brian Corrigan. Combining patient-level and summary-level data for alzheimer's disease modeling and simulation: a beta regression meta-analysis. Journal of pharmacokinetics and pharmacodynamics, 39:479\u2013498, 2012."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1214\/11-PS182"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1037\/h0037350"},{"key":"e_1_3_2_1_47_1","volume-title":"The Eleventh International Conference on Learning Representations","author":"Schmutz Hugo","year":"2022","unstructured":"Hugo Schmutz, Olivier Humbert, and Pierre-Alexandre Mattei. Don't fear the unlabelled: safe semi-supervised learning via debiasing. In The Eleventh International Conference on Learning Representations, 2022."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1511656113"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1515\/ijb-2021-0072"},{"key":"e_1_3_2_1_50_1","volume-title":"Causality and Large Models@ NeurIPS","author":"Shankar Shiv","year":"2024","unstructured":"Shiv Shankar, Ritwik Sinha, and Madalina Fiterau. On llm augmented ab experimentation. In Causality and Large Models@ NeurIPS 2024."},{"key":"e_1_3_2_1_51_1","volume-title":"Three-quarter sibling regression for denoising observational data. arXiv preprint arXiv:2101.00074","author":"Shankar Shiv","year":"2020","unstructured":"Shiv Shankar, Daniel Sheldon, Tao Sun, John Pickering, and Thomas G Dietterich. Three-quarter sibling regression for denoising observational data. arXiv preprint arXiv:2101.00074, 2020."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jval.2012.05.004"},{"key":"e_1_3_2_1_53_1","volume-title":"Nathan Scales, Ajay Tanwani, Heather Cole-Lewis, Stephen Pfohl, et al. Large language models encode clinical knowledge. arXiv preprint arXiv:2212.13138","author":"Singhal Karan","year":"2022","unstructured":"Karan Singhal, Shekoofeh Azizi, Tao Tu, S Sara Mahdavi, Jason Wei, Hyung Won Chung, Nathan Scales, Ajay Tanwani, Heather Cole-Lewis, Stephen Pfohl, et al. Large language models encode clinical knowledge. arXiv preprint arXiv:2212.13138, 2022."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.2023.2169699"},{"key":"e_1_3_2_1_55_1","volume-title":"Brainiac: A foundation model for generalized brain mri analysis. medRxiv","author":"Divyanshu Tak","year":"2024","unstructured":"Divyanshu Tak et al. Brainiac: A foundation model for generalized brain mri analysis. medRxiv, 2024."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/ass045"},{"key":"e_1_3_2_1_57_1","volume-title":"An introduction to proximal causal learning. arXiv preprint arXiv:2009.10982","author":"Tchetgen Tchetgen Eric J","year":"2020","unstructured":"Eric J Tchetgen Tchetgen, Andrew Ying, Yifan Cui, Xu Shi, and Wang Miao. An introduction to proximal causal learning. arXiv preprint arXiv:2009.10982, 2020."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2001238117"},{"key":"e_1_3_2_1_59_1","volume-title":"Large language models for market research: A data-augmentation approach","author":"Wang Mengxin","year":"2024","unstructured":"Mengxin Wang, Dennis J. Zhang, and Heng Zhang. Large language models for market research: A data-augmentation approach, 2024."},{"key":"e_1_3_2_1_60_1","volume-title":"Yunxin Joy Jiao, Spencer Papay, Amelia Glaese, John Schulman, and William Fedus. Measuring short-form factuality in large language models. arXiv preprint arXiv:2411.04368","author":"Wei Jason","year":"2024","unstructured":"Jason Wei, Nguyen Karina, Hyung Won Chung, Yunxin Joy Jiao, Spencer Papay, Amelia Glaese, John Schulman, and William Fedus. Measuring short-form factuality in large language models. arXiv preprint arXiv:2411.04368, 2024."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1214\/aos\/1176347021"},{"key":"e_1_3_2_1_62_1","volume-title":"Can llms express their uncertainty? an empirical evaluation of confidence elicitation in llms. arXiv preprint arXiv:2306.13063","author":"Xiong Miao","year":"2023","unstructured":"Miao Xiong, Zhiyuan Hu, Xinyang Lu, Yifei Li, Jie Fu, Junxian He, and Bryan Hooi. Can llms express their uncertainty? an empirical evaluation of confidence elicitation in llms. arXiv preprint arXiv:2306.13063, 2023."},{"key":"e_1_3_2_1_63_1","volume-title":"Journal of the American Statistical Association","author":"Yang Shu","year":"2019","unstructured":"Shu Yang and Peng Ding. Combining multiple observational data sources to estimate causal effects. Journal of the American Statistical Association, 2019."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-025-57587-y"},{"key":"e_1_3_2_1_65_1","volume-title":"LOLA: LLM-assisted online learning algorithm for content experiments. arXiv preprint arXiv:2406.02611","author":"Ye Zikun","year":"2024","unstructured":"Zikun Ye, Hema Yoganarasimhan, and Yufeng Zheng. LOLA: LLM-assisted online learning algorithm for content experiments. arXiv preprint arXiv:2406.02611, 2024."},{"key":"e_1_3_2_1_66_1","volume-title":"Lola: Llm-assisted online learning algorithm for content experiments","author":"Ye Zikun","year":"2024","unstructured":"Zikun Ye, Hema Yoganarasimhan, and Yufeng Zheng. Lola: Llm-assisted online learning algorithm for content experiments, 2024."},{"key":"e_1_3_2_1_67_1","volume-title":"An efficient method of estimating seemingly unrelated regressions and tests for aggregation bias. Journal of the American statistical Association, 57(298):348\u2013368","author":"Zellner Arnold","year":"1962","unstructured":"Arnold Zellner. An efficient method of estimating seemingly unrelated regressions and tests for aggregation bias. Journal of the American statistical Association, 57(298):348\u2013368, 1962."}],"event":{"name":"BCB '25: 16th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","location":"Element Philadelphia Downtown Philadelphia PA USA","acronym":"BCB '25","sponsor":["SIGBio ACM Special Interest Group on Bioinformatics"]},"container-title":["Proceedings of the 16th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3765612.3767256","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T17:46:29Z","timestamp":1765388789000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3765612.3767256"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"references-count":67,"alternative-id":["10.1145\/3765612.3767256","10.1145\/3765612"],"URL":"https:\/\/doi.org\/10.1145\/3765612.3767256","relation":{},"subject":[],"published":{"date-parts":[[2025,10,12]]},"assertion":[{"value":"2025-12-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}