{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T11:22:35Z","timestamp":1777548155213,"version":"3.51.4"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2025,7,9]],"date-time":"2025-07-09T00:00:00Z","timestamp":1752019200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,9]],"date-time":"2025-07-09T00:00:00Z","timestamp":1752019200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s10489-025-06738-1","type":"journal-article","created":{"date-parts":[[2025,7,10]],"date-time":"2025-07-10T10:11:24Z","timestamp":1752142284000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Transformer-variational autoencoder for estimating individual treatment effect using causal inference framework"],"prefix":"10.1007","volume":"55","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9697-8905","authenticated-orcid":false,"given":"Sohail","family":"Ahmad","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6938-9507","authenticated-orcid":false,"given":"Hong","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,9]]},"reference":[{"key":"6738_CR1","doi-asserted-by":"crossref","unstructured":"Pearl J (2009) Causality. Cambridge University Press","DOI":"10.1017\/CBO9780511803161"},{"key":"6738_CR2","volume-title":"Causal inference: what if","author":"JM Robins","year":"2024","unstructured":"Robins JM, Hernan MA (2024) Causal inference: what if. Press, CRC"},{"issue":"427","key":"6738_CR3","doi-asserted-by":"publisher","first-page":"846","DOI":"10.1080\/01621459.1994.10476818","volume":"89","author":"JM Robins","year":"1994","unstructured":"Robins JM, Rotnitzky A, Zhao LP (1994) Estimation of regression coefficients when some regressors are not always observed. J Am Stat Assoc 89(427):846\u2013866","journal-title":"J Am Stat Assoc"},{"issue":"4","key":"6738_CR4","doi-asserted-by":"publisher","first-page":"962","DOI":"10.1111\/j.1541-0420.2005.00377.x","volume":"61","author":"H Bang","year":"2005","unstructured":"Bang H, Robins JM (2005) Doubly robust estimation in missing data and causal inference models. Biometrics 61(4):962\u2013973","journal-title":"Biometrics"},{"issue":"4","key":"6738_CR5","first-page":"523","volume":"22","author":"JDY Kang","year":"2007","unstructured":"Kang JDY, Schafer JL (2007) Demystifying double robustness: a comparison of alternative strategies for estimating a population mean from incomplete data. Stat Sci 22(4):523\u2013539","journal-title":"Stat Sci"},{"issue":"7","key":"6738_CR6","doi-asserted-by":"publisher","first-page":"761","DOI":"10.1093\/aje\/kwq439","volume":"173","author":"MJ Funk","year":"2011","unstructured":"Funk MJ, Westreich D, Wiesen C, Sturmer T, Brookhart MA, Davidian M (2011) Doubly robust estimation of causal effects. Am J Epidem 173(7):761\u2013767","journal-title":"Am J Epidem"},{"issue":"27","key":"6738_CR7","doi-asserted-by":"publisher","first-page":"7353","DOI":"10.1073\/pnas.1510489113","volume":"113","author":"S Athey","year":"2016","unstructured":"Athey S, Imbens G (2016) Recursive partitioning for heterogeneous causal effects. Proc Natl Acad Sci 113(27):7353\u20137360","journal-title":"Proc Natl Acad Sci"},{"issue":"523","key":"6738_CR8","doi-asserted-by":"publisher","first-page":"1228","DOI":"10.1080\/01621459.2017.1319839","volume":"113","author":"S Wager","year":"2018","unstructured":"Wager S, Athey S (2018) Estimation and inference of heterogeneous treatment effects using random forests. J Am Stat Assoc 113(523):1228\u20131242","journal-title":"J Am Stat Assoc"},{"key":"6738_CR9","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman L (2001) Random forests. Mach Learn 45:5\u201332","journal-title":"Mach Learn"},{"issue":"1","key":"6738_CR10","doi-asserted-by":"publisher","first-page":"e326","DOI":"10.1002\/sta4.326","volume":"10","author":"T Hillis","year":"2021","unstructured":"Hillis T, Guarcello MA, Levine RA, Fan J (2021) Causal inference in the presence of missing data using a random forest-based matching algorithm. Stat 10(1):e326","journal-title":"Stat"},{"issue":"1","key":"6738_CR11","first-page":"C1","volume":"21","author":"V Chernozhukov","year":"2018","unstructured":"Chernozhukov V, Chetverikov D, Demirer M, Duflo E, Hansen C, Newey W, Robins J (2018) Double\/debiased machine learning for treatment and structural parameters. Econ J 21(1):C1\u2013C68","journal-title":"Econ J"},{"key":"6738_CR12","unstructured":"Shalit U, Johansson FD, Sontag D (2017a) Estimating individual treatment effect: generalization bounds and algorithms. In: International conference on machine learning, pp 3076\u20133085. PMLR"},{"key":"6738_CR13","unstructured":"Ma Y, Tresp V (2021) Causal inference under networked interference and intervention policy enhancement. In: International conference on artificial intelligence and statistics. PMLR, pp 3700\u20133708"},{"key":"6738_CR14","unstructured":"Melnychuk V, Frauen D, Feuerriegel S (2022) Causal transformer for estimating counterfactual outcomes. In: International conference on machine learning. PMLR, pp 15293\u201315329"},{"key":"6738_CR15","unstructured":"Zhang Y-F, Zhang H, Lipton ZC, Li LE, Xing EP (2023b) Exploring transformer backbones for heterogeneous treatment effect estimation. Trans Mach Learn Res. ISSN 2835-8856"},{"key":"6738_CR16","unstructured":"Zhang J, Jennings J, Hilmkil A, Pawlowski N, Zhang C, Ma C (2023a) Towards causal foundation model: on duality between causal inference and attention. arXiv:2310.00809"},{"key":"6738_CR17","unstructured":"Louizos C, Shalit U, Mooij JM, Sontag D, Zemel R, Welling M (2017) Causal effect inference with deep latent-variable models. Adv Neural Inf Process Syst 30"},{"key":"6738_CR18","doi-asserted-by":"crossref","unstructured":"Dukes O, Shpitser I, Tchetgen\u00a0Tchetgen EJ (2023) Proximal mediation analysis. Biometrika 110(4):973\u2013987","DOI":"10.1093\/biomet\/asad015"},{"issue":"4","key":"6738_CR19","doi-asserted-by":"publisher","first-page":"987","DOI":"10.1093\/biomet\/asy038","volume":"105","author":"W Miao","year":"2018","unstructured":"Miao W, Geng Z, Tchetgen Tchetgen EJ (2018) Identifying causal effects with proxy variables of an unmeasured confounder. Biometrika 105(4):987\u2013993","journal-title":"Biometrika"},{"issue":"476","key":"6738_CR20","doi-asserted-by":"publisher","first-page":"1607","DOI":"10.1198\/016214505000001366","volume":"101","author":"Z Tan","year":"2006","unstructured":"Tan Z (2006) Regression and weighting methods for causal inference using instrumental variables. J Am Stat Assoc 101(476):1607\u20131618","journal-title":"J Am Stat Assoc"},{"key":"6738_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12874-021-01383-x","volume":"21","author":"C Belthangady","year":"2021","unstructured":"Belthangady C, Stedden W, Norgeot B (2021) Minimizing bias in massive multi-arm observational studies with bcaus: balancing covariates automatically using supervision. BMC Med Res Methodol 21:1\u201310","journal-title":"BMC Med Res Methodol"},{"key":"6738_CR22","doi-asserted-by":"publisher","first-page":"104339","DOI":"10.1016\/j.jbi.2023.104339","volume":"140","author":"G Tesei","year":"2023","unstructured":"Tesei G, Giampanis S, Shi J, Norgeot B (2023) Learning end-to-end patient representations through self-supervised covariate balancing for causal treatment effect estimation. J Biomed Inf 140:104339","journal-title":"J Biomed Inf"},{"key":"6738_CR23","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. In: Guyon I, Luxburg UV, Bengio S, Wallach H, Fergus R, Vishwanathan S, Garnett R (eds) Advances in neural information processing systems. Long Beach, CA, USA. Curran Associates, Inc, vol 30, pp 5998\u20136008"},{"issue":"1","key":"6738_CR24","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1198\/jcgs.2010.08162","volume":"20","author":"JL Hill","year":"2011","unstructured":"Hill JL (2011) Bayesian nonparametric modeling for causal inference. J Comput Graph Stat 20(1):217\u2013240","journal-title":"J Comput Graph Stat"},{"key":"6738_CR25","unstructured":"Johansson F, Shalit U, Sontag D (2016) Learning representations for counterfactual inference. In: International conference on machine learning. PMLR, pp 3020\u20133029"},{"key":"6738_CR26","unstructured":"Yoon J, Jordon J, Van Der\u00a0Schaar M (2018) Ganite: estimation of individualized treatment effects using generative adversarial nets. In: International conference on learning representations"},{"key":"6738_CR27","doi-asserted-by":"crossref","unstructured":"Chipman HA, George EI, McCulloch RE (2010) Bart: bayesian additive regression trees. Ann Appl Stat 266-298","DOI":"10.1214\/09-AOAS285"},{"key":"6738_CR28","unstructured":"Shalit U, Johansson FD, Sontag D (2017b) Estimating individual treatment effect: generalization bounds and algorithms. In: International conference on machine learning. PMLR, pp 3076\u20133085"},{"key":"6738_CR29","doi-asserted-by":"publisher","first-page":"103303","DOI":"10.1016\/j.jbi.2019.103303","volume":"100","author":"P Chen","year":"2019","unstructured":"Chen P, Dong W, Xudong L, Kaymak U, He K, Huang Z (2019) Deep representation learning for individualized treatment effect estimation using electronic health records. J Biomed Inf 100:103303","journal-title":"J Biomed Inf"},{"key":"6738_CR30","unstructured":"Shi C, Blei D, Veitch V (2019) Adapting neural networks for the estimation of treatment effects. Adv Neural Inf Process Syst 32"},{"key":"6738_CR31","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1023\/A:1007665907178","volume":"37","author":"MI Jordan","year":"1999","unstructured":"Jordan MI, Ghahramani Z, Jaakkola TS, Saul LK (1999) An introduction to variational methods for graphical models. Mach Learn 37:183\u2013233","journal-title":"Mach Learn"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06738-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-025-06738-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06738-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T15:56:21Z","timestamp":1758297381000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-025-06738-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,9]]},"references-count":31,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["6738"],"URL":"https:\/\/doi.org\/10.1007\/s10489-025-06738-1","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7,9]]},"assertion":[{"value":"17 June 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 July 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"855"}}