{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,2]],"date-time":"2026-03-02T22:09:02Z","timestamp":1772489342220,"version":"3.50.1"},"reference-count":100,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,1,24]],"date-time":"2024-01-24T00:00:00Z","timestamp":1706054400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,24]],"date-time":"2024-01-24T00:00:00Z","timestamp":1706054400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat Mach Intell"],"DOI":"10.1038\/s42256-023-00781-8","type":"journal-article","created":{"date-parts":[[2024,1,24]],"date-time":"2024-01-24T17:02:39Z","timestamp":1706115759000},"page":"15-24","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Improving generalization of machine learning-identified biomarkers using causal modelling with examples from immune receptor diagnostics"],"prefix":"10.1038","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2484-3868","authenticated-orcid":false,"given":"Milena","family":"Pavlovi\u0107","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1639-1424","authenticated-orcid":false,"given":"Ghadi S.","family":"Al Hajj","sequence":"additional","affiliation":[]},{"given":"Chakravarthi","family":"Kanduri","sequence":"additional","affiliation":[]},{"given":"Johan","family":"Pensar","sequence":"additional","affiliation":[]},{"given":"Mollie E.","family":"Wood","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8860-704X","authenticated-orcid":false,"given":"Ludvig M.","family":"Sollid","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2622-5032","authenticated-orcid":false,"given":"Victor","family":"Greiff","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4959-1409","authenticated-orcid":false,"given":"Geir K.","family":"Sandve","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,1,24]]},"reference":[{"key":"781_CR1","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1038\/nrg2554","volume":"10","author":"KA Frazer","year":"2009","unstructured":"Frazer, K. A., Murray, S. S., Schork, N. J. & Topol, E. J. Human genetic variation and its contribution to complex traits. Nat. Rev. Genet. 10, 241\u2013251 (2009).","journal-title":"Nat. Rev. Genet."},{"key":"781_CR2","doi-asserted-by":"publisher","first-page":"1150","DOI":"10.3389\/fgene.2019.01150","volume":"10","author":"WJ Locke","year":"2019","unstructured":"Locke, W. J. et al. DNA methylation cancer biomarkers: translation to the clinic. Front. Genet. 10, 1150 (2019).","journal-title":"Front. Genet."},{"key":"781_CR3","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1038\/nrg.2016.10","volume":"17","author":"SA Byron","year":"2016","unstructured":"Byron, S. A., Van Keuren-Jensen, K. R., Engelthaler, D. M., Carpten, J. D. & Craig, D. W. Translating RNA sequencing into clinical diagnostics: opportunities and challenges. Nat. Rev. Genet. 17, 257\u2013271 (2016).","journal-title":"Nat. Rev. Genet."},{"key":"781_CR4","doi-asserted-by":"publisher","first-page":"973","DOI":"10.1002\/jgh3.12659","volume":"5","author":"K Huang","year":"2021","unstructured":"Huang, K., Wu, L. & Yang, Y. Gut microbiota: an emerging biological diagnostic and treatment approach for gastrointestinal diseases. JGH Open 5, 973\u2013975 (2021).","journal-title":"JGH Open"},{"key":"781_CR5","doi-asserted-by":"publisher","first-page":"626793","DOI":"10.3389\/fimmu.2021.626793","volume":"12","author":"RA Arnaout","year":"2021","unstructured":"Arnaout, R. A. et al. The future of blood testing is the immunome. Front. Immunol 12, 626793 (2021).","journal-title":"Front. Immunol"},{"key":"781_CR6","doi-asserted-by":"publisher","first-page":"463","DOI":"10.1097\/COH.0b013e32833ed177","volume":"5","author":"K Strimbu","year":"2010","unstructured":"Strimbu, K. & Tavel, J. A. What are biomarkers? Curr. Opin. HIV AIDS 5, 463\u2013466 (2010).","journal-title":"Curr. Opin. HIV AIDS"},{"key":"781_CR7","first-page":"345","volume":"21","author":"A Subbaswamy","year":"2020","unstructured":"Subbaswamy, A. & Saria, S. From development to deployment: dataset shift, causality and shift-stable models in health AI. Biostatistics 21, 345\u2013352 (2020).","journal-title":"Biostatistics"},{"key":"781_CR8","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-020-17478-w","volume":"11","author":"DC Castro","year":"2020","unstructured":"Castro, D. C., Walker, I. & Glocker, B. Causality matters in medical imaging. Nat. Commun. 11, 3673 (2020).","journal-title":"Nat. Commun."},{"key":"781_CR9","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1038\/s41576-021-00434-9","volume":"23","author":"S Whalen","year":"2021","unstructured":"Whalen, S., Schreiber, J., Noble, W. S. & Pollard, K. S. Navigating the pitfalls of applying machine learning in genomics. Nat. Rev. Genet. 23, 169\u2013181 (2021).","journal-title":"Nat. Rev. Genet."},{"key":"781_CR10","doi-asserted-by":"publisher","DOI":"10.1093\/gigascience\/giab055","volume":"10","author":"J Dock\u00e8s","year":"2021","unstructured":"Dock\u00e8s, J., Varoquaux, G. & Poline, J.-B. Preventing dataset shift from breaking machine-learning biomarkers. GigaScience. 10, giab055 (2021).","journal-title":"GigaScience."},{"key":"781_CR11","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1613\/jair.1872","volume":"26","author":"H Daum\u00e9","year":"2006","unstructured":"Daum\u00e9, H. & Marcu, D. Domain adaptation for statistical classifiers. J. Artif. Intell. Res. 26, 101\u2013126 (2006).","journal-title":"J. Artif. Intell. Res."},{"key":"781_CR12","doi-asserted-by":"publisher","first-page":"766","DOI":"10.1109\/TPAMI.2019.2945942","volume":"43","author":"WM Kouw","year":"2021","unstructured":"Kouw, W. M. & Loog, M. A review of domain adaptation without target labels. IEEE Trans. Pattern Anal. Mach. Intell. 43, 766\u2013785 (2021).","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"781_CR13","first-page":"8052","volume":"35","author":"J Wang","year":"2023","unstructured":"Wang, J. et al. Generalizing to unseen domains: a survey on domain generalization. IEEE Trans. Knowl. Data Eng. 35, 8052\u20138072 (2023).","journal-title":"IEEE Trans. Knowl. Data Eng"},{"key":"781_CR14","unstructured":"Gulrajani, I. & Lopez-Paz, D. In search of lost domain generalization. Preprint at https:\/\/arxiv.org\/abs\/2007.01434 (2020)."},{"key":"781_CR15","doi-asserted-by":"publisher","unstructured":"Liu, J. et al. Towards out-of-distribution generalization: a survey. Preprint at https:\/\/doi.org\/10.48550\/arXiv.2108.13624 (2023).","DOI":"10.48550\/arXiv.2108.13624"},{"key":"781_CR16","doi-asserted-by":"publisher","unstructured":"Pearl, J. Causality (Cambridge Univ. Press, 2009); https:\/\/doi.org\/10.1017\/CBO9780511803161","DOI":"10.1017\/CBO9780511803161"},{"key":"781_CR17","unstructured":"Peters, J., Janzing, D. & Sch\u00f6lkopf, B. Elements of Causal Inference: Foundations and Learning Algorithms (MIT Press, 2017)."},{"key":"781_CR18","volume-title":"Causal Inference","author":"M Hern\u00e1n","year":"2020","unstructured":"Hern\u00e1n, M. & Robins, J. Causal Inference: What If (Chapman & Hall\/CRC, 2020)."},{"key":"781_CR19","doi-asserted-by":"publisher","first-page":"527","DOI":"10.1214\/23-STS902","volume":"38","author":"D Rothenh\u00e4usler","year":"2023","unstructured":"Rothenh\u00e4usler, D. & B\u00fchlmann, P. Distributionally robust and generalizable inference. Statist. Sci. 38, 527\u2013542 (2023).","journal-title":"Statist. Sci."},{"key":"781_CR20","doi-asserted-by":"publisher","unstructured":"Kaddour, J., Lynch, A., Liu, Q., Kusner, M. J. & Silva, R. Causal machine learning: a survey and open problems. Preprint at https:\/\/doi.org\/10.48550\/arXiv.2206.15475 (2022).","DOI":"10.48550\/arXiv.2206.15475"},{"key":"781_CR21","doi-asserted-by":"publisher","first-page":"371","DOI":"10.1146\/annurev-statistics-031017-100630","volume":"5","author":"C Heinze-Deml","year":"2018","unstructured":"Heinze-Deml, C., Maathuis, M. H. & Meinshausen, N. Causal structure learning. Annu. Rev. Stat. Appl. 5, 371\u2013391 (2018).","journal-title":"Annu. Rev. Stat. Appl."},{"key":"781_CR22","doi-asserted-by":"publisher","DOI":"10.1007\/s10208-022-09581-9","author":"C Squires","year":"2022","unstructured":"Squires, C. & Uhler, C. Causal structure learning: a combinatorial perspective. Found. Comput. Math. https:\/\/doi.org\/10.1007\/s10208-022-09581-9 (2022).","journal-title":"Found. Comput. Math."},{"key":"781_CR23","doi-asserted-by":"publisher","first-page":"947","DOI":"10.1111\/rssb.12167","volume":"78","author":"J Peters","year":"2016","unstructured":"Peters, J., B\u00fchlmann, P. & Meinshausen, N. Causal inference by using invariant prediction: identification and confidence intervals. J. R. Stat. Soc. B Stat. Methodol. 78, 947\u20131012 (2016).","journal-title":"J. R. Stat. Soc. B Stat. Methodol."},{"key":"781_CR24","doi-asserted-by":"publisher","unstructured":"Arjovsky, M., Bottou, L., Gulrajani, I. & Lopez-Paz, D. Invariant risk minimization. Preprint at https:\/\/doi.org\/10.48550\/arXiv.1907.02893 (2020).","DOI":"10.48550\/arXiv.1907.02893"},{"key":"781_CR25","first-page":"20782","volume":"35","author":"Y Jiang","year":"2022","unstructured":"Jiang, Y. & Veitch, V. Invariant and transportable representations for anti-causal domain shifts. Adv. Neural Inf. Process Syst. 35, 20782\u201320794 (2022).","journal-title":"Adv. Neural Inf. Process Syst."},{"key":"781_CR26","first-page":"10846","volume":"31","author":"S Magliacane","year":"2018","unstructured":"Magliacane, S. et al. Domain adaptation by using causal inference to predict invariant conditional distributions. Adv. Neural Inf. Process Syst. 31, 10846\u201310856 (2018).","journal-title":"Adv. Neural Inf. Process Syst."},{"key":"781_CR27","doi-asserted-by":"publisher","first-page":"612","DOI":"10.1109\/JPROC.2021.3058954","volume":"109","author":"B Sch\u00f6lkopf","year":"2021","unstructured":"Sch\u00f6lkopf, B. et al. Toward causal representation learning. Proc. IEEE 109, 612\u2013634 (2021).","journal-title":"Proc. IEEE"},{"key":"781_CR28","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1038\/s42256-022-00445-z","volume":"4","author":"P Cui","year":"2022","unstructured":"Cui, P. & Athey, S. Stable learning establishes some common ground between causal inference and machine learning. Nat. Mach. Intell. 4, 110\u2013115 (2022).","journal-title":"Nat. Mach. Intell."},{"key":"781_CR29","doi-asserted-by":"publisher","first-page":"7345","DOI":"10.1073\/pnas.1510507113","volume":"113","author":"E Bareinboim","year":"2016","unstructured":"Bareinboim, E. & Pearl, J. Causal inference and the data-fusion problem. Proc. Natl Acad. Sci. USA 113, 7345\u20137352 (2016).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"781_CR30","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-020-17419-7","volume":"11","author":"JG Richens","year":"2020","unstructured":"Richens, J. G., Lee, C. M. & Johri, S. Improving the accuracy of medical diagnosis with causal machine learning. Nat. Commun. 11, 3923 (2020).","journal-title":"Nat. Commun."},{"key":"781_CR31","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1038\/s42256-020-0197-y","volume":"2","author":"M Prosperi","year":"2020","unstructured":"Prosperi, M. et al. Causal inference and counterfactual prediction in machine learning for actionable healthcare. Nat. Mach. Intell. 2, 369\u2013375 (2020).","journal-title":"Nat. Mach. Intell."},{"key":"781_CR32","doi-asserted-by":"publisher","first-page":"678047","DOI":"10.3389\/fmed.2021.678047","volume":"8","author":"Y Raita","year":"2021","unstructured":"Raita, Y., Camargo, C. A., Liang, L. & Hasegawa, K. Big data, data science and causal inference: a primer for clinicians. Front. Med. 8, 678047 (2021).","journal-title":"Front. Med."},{"key":"781_CR33","unstructured":"Sch\u00f6lkopf, B. et al. On causal and anticausal learning. In Proc. 29th International Conference on Machine Learning 459\u2013466 (Omnipress, 2012)."},{"key":"781_CR34","doi-asserted-by":"publisher","DOI":"10.1016\/j.coisb.2020.10.010","author":"V Greiff","year":"2020","unstructured":"Greiff, V., Yaari, G. & Cowell, L. Mining adaptive immune receptor repertoires for biological and clinical information using machine learning. Curr. Opin. Syst. Biol. https:\/\/doi.org\/10.1016\/j.coisb.2020.10.010 (2020).","journal-title":"Curr. Opin. Syst. Biol."},{"key":"781_CR35","doi-asserted-by":"publisher","first-page":"659","DOI":"10.1038\/ng.3822","volume":"49","author":"RO Emerson","year":"2017","unstructured":"Emerson, R. O. et al. Immunosequencing identifies signatures of cytomegalovirus exposure history and HLA-mediated effects on the T cell repertoire. Nat. Genet. 49, 659\u2013665 (2017).","journal-title":"Nat. Genet."},{"key":"781_CR36","doi-asserted-by":"publisher","DOI":"10.1186\/s12916-019-1426-2","volume":"17","author":"CJ Kelly","year":"2019","unstructured":"Kelly, C. J., Karthikesalingam, A., Suleyman, M., Corrado, G. & King, D. Key challenges for delivering clinical impact with artificial intelligence. BMC Med. 17, 195 (2019).","journal-title":"BMC Med."},{"key":"781_CR37","doi-asserted-by":"publisher","first-page":"2689","DOI":"10.4049\/jimmunol.1302064","volume":"192","author":"OV Britanova","year":"2014","unstructured":"Britanova, O. V. et al. Age-related decrease in TCR repertoire diversity measured with deep and normalized sequence profiling. J. Immunol. 192, 2689\u20132698 (2014).","journal-title":"J. Immunol."},{"key":"781_CR38","doi-asserted-by":"publisher","first-page":"2168","DOI":"10.1073\/pnas.1716146115","volume":"115","author":"T Schneider-Hohendorf","year":"2018","unstructured":"Schneider-Hohendorf, T. et al. Sex bias in MHC I-associated shaping of the adaptive immune system. Proc. Natl Acad. Sci. USA 115, 2168\u20132173 (2018).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"781_CR39","doi-asserted-by":"publisher","first-page":"2209","DOI":"10.1101\/gr.275373.121","volume":"31","author":"A Slabodkin","year":"2021","unstructured":"Slabodkin, A. et al. Individualized VDJ recombination predisposes the available Ig sequence space. Genome Res. 31, 2209\u20132224 (2021).","journal-title":"Genome Res."},{"key":"781_CR40","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1038\/nri.2017.143","volume":"18","author":"CA Dendrou","year":"2018","unstructured":"Dendrou, C. A., Petersen, J., Rossjohn, J. & Fugger, L. HLA variation and disease. Nat. Rev. Immunol. 18, 325\u2013339 (2018).","journal-title":"Nat. Rev. Immunol."},{"key":"781_CR41","doi-asserted-by":"publisher","first-page":"393","DOI":"10.1038\/s41588-022-01032-z","volume":"54","author":"K Ishigaki","year":"2022","unstructured":"Ishigaki, K. et al. HLA autoimmune risk alleles restrict the hypervariable region of T cell receptors. Nat. Genet. 54, 393\u2013402 (2022).","journal-title":"Nat. Genet."},{"key":"781_CR42","doi-asserted-by":"publisher","first-page":"236","DOI":"10.1038\/s41587-020-0656-3","volume":"39","author":"P Barennes","year":"2021","unstructured":"Barennes, P. et al. Benchmarking of T cell receptor repertoire profiling methods reveals large systematic biases. Nat. Biotechnol. 39, 236\u2013245 (2021).","journal-title":"Nat. Biotechnol."},{"key":"781_CR43","doi-asserted-by":"publisher","first-page":"e66274","DOI":"10.7554\/eLife.66274","volume":"10","author":"J Tr\u00fcck","year":"2021","unstructured":"Tr\u00fcck, J. et al. Biological controls for standardization and interpretation of adaptive immune receptor repertoire profiling. eLife 10, e66274 (2021).","journal-title":"eLife"},{"key":"781_CR44","doi-asserted-by":"publisher","first-page":"e69157","DOI":"10.7554\/eLife.69157","volume":"12","author":"AO Smirnova","year":"2023","unstructured":"Smirnova, A. O. et al. The use of non-functional clonotypes as a natural calibrator for quantitative bias correction in adaptive immune receptor repertoire profiling. eLife 12, e69157 (2023).","journal-title":"eLife"},{"key":"781_CR45","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1186\/s12979-020-00195-9","volume":"17","author":"C Krishna","year":"2020","unstructured":"Krishna, C., Chowell, D., G\u00f6nen, M., Elhanati, Y. & Chan, T. A. Genetic and environmental determinants of human TCR repertoire diversity. Immun. Ageing 17, 26 (2020).","journal-title":"Immun. Ageing"},{"key":"781_CR46","doi-asserted-by":"publisher","first-page":"626","DOI":"10.1038\/nri.2016.90","volume":"16","author":"SL Klein","year":"2016","unstructured":"Klein, S. L. & Flanagan, K. L. Sex differences in immune responses. Nat. Rev. Immunol. 16, 626\u2013638 (2016).","journal-title":"Nat. Rev. Immunol."},{"key":"781_CR47","doi-asserted-by":"publisher","first-page":"16","DOI":"10.3109\/09513590.2013.852531","volume":"30","author":"C Castelo-Branco","year":"2014","unstructured":"Castelo-Branco, C. & Soveral, I. The immune system and aging: a review. Gynecol. Endocrinol. 30, 16\u201322 (2014).","journal-title":"Gynecol. Endocrinol."},{"key":"781_CR48","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1080\/09332480.2019.1579578","volume":"32","author":"MA Hern\u00e1n","year":"2019","unstructured":"Hern\u00e1n, M. A., Hsu, J. & Healy, B. A second chance to get causal inference right: a classification of data science tasks. Chance 32, 42\u201349 (2019).","journal-title":"Chance"},{"key":"781_CR49","unstructured":"Blaas, A., Miller, A., Zappella, L., Jacobsen, J.-H. & Heinze-Deml, C. Considerations for distribution shift robustness in health. In Proc. Machine Learning for Healthcare Workshop (ICLR, 2023)."},{"key":"781_CR50","doi-asserted-by":"publisher","first-page":"733","DOI":"10.1038\/nrg2825","volume":"11","author":"JT Leek","year":"2010","unstructured":"Leek, J. T. et al. Tackling the widespread and critical impact of batch effects in high-throughput data. Nat. Rev. Genet. 11, 733\u2013739 (2010).","journal-title":"Nat. Rev. Genet."},{"key":"781_CR51","doi-asserted-by":"publisher","first-page":"1412","DOI":"10.1038\/s41590-022-01309-9","volume":"23","author":"L Bonaguro","year":"2022","unstructured":"Bonaguro, L. et al. A guide to systems-level immunomics. Nat. Immunol. 23, 1412\u20131423 (2022).","journal-title":"Nat. Immunol."},{"key":"781_CR52","unstructured":"Bareinboim, E. & Pearl, J. Controlling selection bias in causal inference. In Proc. 15th International Conference on Artificial Intelligence and Statistics Vol. 22 (eds Lawrence, N. et al.), 100\u2013108 (PMLR, 2012)."},{"key":"781_CR53","doi-asserted-by":"crossref","unstructured":"Correa, J., Tian, J. & Bareinboim, E. Generalized adjustment under confounding and selection biases. In Proc. 32nd AAAI Conference on Artificial Intelligence Vol. 32, 6335\u20136342 (AAAI, 2018).","DOI":"10.1609\/aaai.v32i1.12125"},{"key":"781_CR54","doi-asserted-by":"publisher","first-page":"20202815","DOI":"10.1098\/rspb.2020.2815","volume":"288","author":"ZM Laubach","year":"2021","unstructured":"Laubach, Z. M., Murray, E. J., Hoke, K. L., Safran, R. J. & Perng, W. A biologist\u2019s guide to model selection and causal inference. Proc. R. Soc. B Biol. Sci. 288, 20202815 (2021).","journal-title":"Proc. R. Soc. B Biol. Sci."},{"key":"781_CR55","doi-asserted-by":"publisher","first-page":"615","DOI":"10.1097\/01.ede.0000135174.63482.43","volume":"15","author":"MA Hern\u00e1n","year":"2004","unstructured":"Hern\u00e1n, M. A., Hern\u00e1ndez-D\u00edaz, S. & Robins, J. M. A structural approach to selection bias. Epidemiology 15, 615\u2013625 (2004).","journal-title":"Epidemiology"},{"key":"781_CR56","unstructured":"Zhang, K., Sch\u00f6lkopf, B., Muandet, K. & Wang, Z. Domain adaptation under target and conditional shift. In Proc. International Conference on Machine Learning 28 (eds Dasgupta, S. et al.) 819\u2013827 (PMLR, 2013)."},{"key":"781_CR57","first-page":"3290","volume":"33","author":"S Garg","year":"2020","unstructured":"Garg, S., Wu, Y., Balakrishnan, S. & Lipton, Z. C. A unified view of label shift estimation. Adv. Neural Inf. Proc. Syst. 33, 3290\u20133300 (2020).","journal-title":"Adv. Neural Inf. Proc. Syst."},{"key":"781_CR58","doi-asserted-by":"publisher","first-page":"579","DOI":"10.1214\/14-STS486","volume":"29","author":"J Pearl","year":"2014","unstructured":"Pearl, J. & Bareinboim, E. External validity: from Do-calculus to transportability across populations. Stat. Sci. 29, 579\u2013595 (2014).","journal-title":"Stat. Sci."},{"key":"781_CR59","doi-asserted-by":"publisher","first-page":"501","DOI":"10.1146\/annurev-statistics-042522-103837","volume":"10","author":"I Degtiar","year":"2023","unstructured":"Degtiar, I. & Rose, S. A review of generalizability and transportability. Annu. Rev. Stat. Appl. 10, 501\u2013524 (2023).","journal-title":"Annu. Rev. Stat. Appl."},{"key":"781_CR60","doi-asserted-by":"publisher","first-page":"995","DOI":"10.1038\/ng.3625","volume":"48","author":"E Sharon","year":"2016","unstructured":"Sharon, E. et al. Genetic variation in MHC proteins is associated with T cell receptor expression biases. Nat. Genet. 48, 995\u20131002 (2016).","journal-title":"Nat. Genet."},{"key":"781_CR61","doi-asserted-by":"publisher","first-page":"3005","DOI":"10.4049\/jimmunol.1601693","volume":"198","author":"B Jabri","year":"2017","unstructured":"Jabri, B. & Sollid, L. M. T cells in Celiac disease. J. Immunol. 198, 3005\u20133014 (2017).","journal-title":"J. Immunol."},{"key":"781_CR62","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1038\/s41416-021-01400-2","volume":"125","author":"E Schaafsma","year":"2021","unstructured":"Schaafsma, E., Fugle, C. M., Wang, X. & Cheng, C. Pan-cancer association of HLA gene expression with cancer prognosis and immunotherapy efficacy. Br. J. Cancer 125, 422\u2013432 (2021).","journal-title":"Br. J. Cancer"},{"key":"781_CR63","doi-asserted-by":"publisher","first-page":"311","DOI":"10.4049\/jimmunol.2300136","volume":"211","author":"CG Rappazzo","year":"2023","unstructured":"Rappazzo, C. G. et al. Defining and studying B cell receptor and TCR interactions. J. Immunol. 211, 311\u2013322 (2023).","journal-title":"J. Immunol."},{"key":"781_CR64","unstructured":"Hendrycks, D., Lee, K. & Mazeika, M. Using pre-training can improve model robustness and uncertainty. In Proc. 36th International Conference on Machine Learning (eds Chaudhuri, K. et al.) 2712\u20132721 (PMLR, 2019)."},{"key":"781_CR65","doi-asserted-by":"publisher","unstructured":"Pradier, M. F. et al. AIRIVA: a deep generative model of adaptive immune repertoires. Preprint at https:\/\/doi.org\/10.48550\/arXiv.2304.13737 (2023).","DOI":"10.48550\/arXiv.2304.13737"},{"key":"781_CR66","doi-asserted-by":"publisher","first-page":"236","DOI":"10.1038\/s42256-023-00619-3","volume":"5","author":"Y Gao","year":"2023","unstructured":"Gao, Y. et al. Pan-Peptide meta learning for T-cell receptor\u2013antigen binding recognition. Nat. Mach. Intell. 5, 236\u2013249 (2023).","journal-title":"Nat. Mach. Intell."},{"key":"781_CR67","doi-asserted-by":"publisher","first-page":"680687","DOI":"10.3389\/fimmu.2021.680687","volume":"12","author":"M Ostrovsky-Berman","year":"2021","unstructured":"Ostrovsky-Berman, M., Frankel, B., Polak, P. & Yaari, G. Immune2vec: embedding B\/T cell receptor sequences in \u211dN using natural language processing. Front. Immunol. 12, 680687 (2021).","journal-title":"Front. Immunol."},{"key":"781_CR68","doi-asserted-by":"publisher","first-page":"bbac378","DOI":"10.1093\/bib\/bbac378","volume":"23","author":"Y Fang","year":"2022","unstructured":"Fang, Y., Liu, X. & Liu, H. Attention-aware contrastive learning for predicting T cell receptor\u2013antigen binding specificity. Brief. Bioinform. 23, bbac378 (2022).","journal-title":"Brief. Bioinform."},{"key":"781_CR69","doi-asserted-by":"publisher","unstructured":"Gupta, G., Kapila, R., Gupta, K. & Raskar, R. Domain generalization in robust invariant representation. Preprint at https:\/\/doi.org\/10.48550\/arXiv.2304.03431 (2023).","DOI":"10.48550\/arXiv.2304.03431"},{"key":"781_CR70","unstructured":"Zhang, J. & Bottou, L. Learning useful representations for shifting tasks and distributions. In Proc. 40th International Conference on Machine Learning (eds Krause, A et al.), 40830\u201340850 (PMLR, 2023)."},{"key":"781_CR71","doi-asserted-by":"publisher","first-page":"1122","DOI":"10.1038\/s41592-021-01205-4","volume":"18","author":"I Walsh","year":"2021","unstructured":"Walsh, I. et al. DOME: recommendations for supervised machine learning validation in biology. Nat. Methods 18, 1122\u20131127 (2021).","journal-title":"Nat. Methods"},{"key":"781_CR72","unstructured":"Wiles, O. et al. A fine-grained analysis on distribution shift. Preprint at https:\/\/arxiv.org\/abs\/2110.11328 (2021)."},{"key":"781_CR73","unstructured":"Byrd, J. & Lipton, Z. What is the effect of importance weighting in deep learning? In Proc. 36th International Conference on Machine Learning (eds Chaudhuri, K. et al.) 872\u2013881 (PMLR, 2019)."},{"key":"781_CR74","doi-asserted-by":"publisher","first-page":"1274","DOI":"10.1038\/ni.3873","volume":"18","author":"F Rubelt","year":"2017","unstructured":"Rubelt, F. et al. Adaptive Immune Receptor Repertoire Community recommendations for sharing immune-repertoire sequencing data. Nat. Immunol. 18, 1274\u20131278 (2017).","journal-title":"Nat. Immunol."},{"key":"781_CR75","doi-asserted-by":"publisher","first-page":"2206","DOI":"10.3389\/fimmu.2018.02206","volume":"9","author":"JA Vander Heiden","year":"2018","unstructured":"Vander Heiden, J. A. et al. AIRR community standardized representations for annotated immune repertoires. Front. Immunol. 9, 2206 (2018).","journal-title":"Front. Immunol."},{"key":"781_CR76","doi-asserted-by":"publisher","first-page":"588","DOI":"10.1038\/s41592-021-01169-5","volume":"18","author":"K Peng","year":"2021","unstructured":"Peng, K. et al. Diversity in immunogenomics: the value and the challenge. Nat. Methods 18, 588\u2013591 (2021).","journal-title":"Nat. Methods"},{"key":"781_CR77","doi-asserted-by":"publisher","first-page":"2177","DOI":"10.1016\/j.immuni.2021.09.015","volume":"54","author":"Y-N Huang","year":"2021","unstructured":"Huang, Y.-N. et al. Ancestral diversity is limited in published T cell receptor sequencing studies. Immunity 54, 2177\u20132179 (2021).","journal-title":"Immunity"},{"key":"781_CR78","unstructured":"Registered Reports (Center for Open Science); https:\/\/www.cos.io\/initiatives\/registered-reports"},{"key":"781_CR79","doi-asserted-by":"publisher","first-page":"e38358","DOI":"10.7554\/eLife.38358","volume":"7","author":"WS DeWitt III","year":"2018","unstructured":"DeWitt, W. S. III et al. Human T cell receptor occurrence patterns encode immune history, genetic background and receptor specificity. eLife 7, e38358 (2018).","journal-title":"eLife"},{"key":"781_CR80","doi-asserted-by":"publisher","unstructured":"Zaslavsky, M. E. et al. Disease diagnostics using machine learning of immune receptors. Preprint at bioRxiv https:\/\/doi.org\/10.1101\/2022.04.26.489314 (2023).","DOI":"10.1101\/2022.04.26.489314"},{"key":"781_CR81","doi-asserted-by":"publisher","first-page":"1649","DOI":"10.1038\/s41591-023-02420-6","volume":"29","author":"C Langenberg","year":"2023","unstructured":"Langenberg, C., Hingorani, A. D. & Whitty, C. J. M. Biological and functional multimorbidity\u2014from mechanisms to management. Nat. Med. 29, 1649\u20131657 (2023).","journal-title":"Nat. Med."},{"key":"781_CR82","doi-asserted-by":"publisher","first-page":"2885","DOI":"10.1214\/21-AOS2064","volume":"49","author":"S Bongers","year":"2021","unstructured":"Bongers, S., Forr\u00e9, P., Peters, J. & Mooij, J. M. Foundations of structural causal models with cycles and latent variables. Ann. Stat. 49, 2885\u20132915 (2021).","journal-title":"Ann. Stat."},{"key":"781_CR83","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1146\/annurev-statistics-022513-115553","volume":"1","author":"B Chakraborty","year":"2014","unstructured":"Chakraborty, B. & Murphy, S. A. Dynamic treatment regimes. Annu. Rev. Stat. Appl. 1, 447\u2013464 (2014).","journal-title":"Annu. Rev. Stat. Appl."},{"key":"781_CR84","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1038\/s41580-019-0127-1","volume":"20","author":"M Bizzarri","year":"2019","unstructured":"Bizzarri, M. et al. A call for a better understanding of causation in cell biology. Nat. Rev. Mol. Cell Biol. 20, 261\u2013262 (2019).","journal-title":"Nat. Rev. Mol. Cell Biol."},{"key":"781_CR85","doi-asserted-by":"publisher","first-page":"1173","DOI":"10.1037\/0022-3514.51.6.1173","volume":"51","author":"RM Baron","year":"1986","unstructured":"Baron, R. M. & Kenny, D. A. The moderator\u2013mediator variable distinction in social psychological research: conceptual, strategic and statistical considerations. J. Pers. Soc. Psychol. 51, 1173\u20131182 (1986).","journal-title":"J. Pers. Soc. Psychol."},{"key":"781_CR86","doi-asserted-by":"publisher","first-page":"738","DOI":"10.1016\/j.it.2015.09.006","volume":"36","author":"V Greiff","year":"2015","unstructured":"Greiff, V., Miho, E., Menzel, U. & Reddy, S. T. Bioinformatic and statistical analysis of adaptive immune repertoires. Trends Immunol. 36, 738\u2013749 (2015).","journal-title":"Trends Immunol."},{"key":"781_CR87","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1038\/nri1292","volume":"4","author":"J Nikolich-\u017dugich","year":"2004","unstructured":"Nikolich-\u017dugich, J., Slifka, M. K. & Messaoudi, I. The many important facets of T-cell repertoire diversity. Nat. Rev. Immunol. 4, 123\u2013132 (2004).","journal-title":"Nat. Rev. Immunol."},{"key":"781_CR88","doi-asserted-by":"publisher","first-page":"485","DOI":"10.3389\/fimmu.2013.00485","volume":"4","author":"V Zarnitsyna","year":"2013","unstructured":"Zarnitsyna, V., Evavold, B., Schoettle, L., Blattman, J. & Antia, R. Estimating the diversity, completeness, and cross-reactivity of the T cell repertoire. Front. Immunol. 4, 485 (2013).","journal-title":"Front. Immunol."},{"key":"781_CR89","doi-asserted-by":"publisher","first-page":"16161","DOI":"10.1073\/pnas.1212755109","volume":"109","author":"A Murugan","year":"2012","unstructured":"Murugan, A., Mora, T., Walczak, A. M. & Callan, C. G. Statistical inference of the generation probability of T-cell receptors from sequence repertoires. Proc. Natl Acad. Sci. USA 109, 16161\u201316166 (2012).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"781_CR90","doi-asserted-by":"publisher","first-page":"575","DOI":"10.1038\/302575a0","volume":"302","author":"S Tonegawa","year":"1983","unstructured":"Tonegawa, S. Somatic generation of antibody diversity. Nature 302, 575\u2013581 (1983).","journal-title":"Nature"},{"key":"781_CR91","doi-asserted-by":"publisher","first-page":"807","DOI":"10.1126\/science.1170020","volume":"324","author":"JA Weinstein","year":"2009","unstructured":"Weinstein, J. A., Jiang, N., White, R. A., Fisher, D. S. & Quake, S. R. High-throughput sequencing of the zebrafish antibody repertoire. Science 324, 807\u2013810 (2009).","journal-title":"Science"},{"key":"781_CR92","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/S1074-7613(00)00006-6","volume":"13","author":"JL Xu","year":"2000","unstructured":"Xu, J. L. & Davis, M. M. Diversity in the CDR3 region of VH is sufficient for most antibody specificities. Immunity 13, 37\u201345 (2000).","journal-title":"Immunity"},{"key":"781_CR93","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1038\/334395a0","volume":"334","author":"MM Davis","year":"1988","unstructured":"Davis, M. M. & Bjorkman, P. J. T-cell antigen receptor genes and T-cell recognition. Nature 334, 395\u2013402 (1988).","journal-title":"Nature"},{"key":"781_CR94","doi-asserted-by":"publisher","first-page":"701","DOI":"10.1039\/C9ME00071B","volume":"4","author":"AJ Brown","year":"2019","unstructured":"Brown, A. J. et al. Augmenting adaptive immunity: progress and challenges in the quantitative engineering and analysis of adaptive immune receptor repertoires. Mol. Syst. Des. Eng. 4, 701\u2013736 (2019).","journal-title":"Mol. Syst. Des. Eng."},{"key":"781_CR95","doi-asserted-by":"publisher","first-page":"13139","DOI":"10.1073\/pnas.1409155111","volume":"111","author":"Q Qi","year":"2014","unstructured":"Qi, Q. et al. Diversity and clonal selection in the human T-cell repertoire. Proc. Natl Acad. Sci. USA 111, 13139\u201313144 (2014).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"781_CR96","doi-asserted-by":"publisher","first-page":"20140243","DOI":"10.1098\/rstb.2014.0243","volume":"370","author":"Y Elhanati","year":"2015","unstructured":"Elhanati, Y. et al. Inferring processes underlying B-cell repertoire diversity. Philos. Trans. R. Soc. Lond. B. Biol. Sci. 370, 20140243 (2015).","journal-title":"Philos. Trans. R. Soc. Lond. B. Biol. Sci."},{"key":"781_CR97","doi-asserted-by":"publisher","DOI":"10.1186\/s13073-015-0169-8","volume":"7","author":"V Greiff","year":"2015","unstructured":"Greiff, V. et al. A bioinformatic framework for immune repertoire diversity profiling enables detection of immunological status. Genome Med. 7, 49 (2015).","journal-title":"Genome Med."},{"key":"781_CR98","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1111\/imr.12665","volume":"284","author":"Y Elhanati","year":"2018","unstructured":"Elhanati, Y., Sethna, Z., Callan, C. G. Jr, Mora, T. & Walczak, A. M. Predicting the spectrum of TCR repertoire sharing with a data-driven model of recombination. Immunol. Rev. 284, 167\u2013179 (2018).","journal-title":"Immunol. Rev."},{"key":"781_CR99","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1038\/s41746-022-00592-y","volume":"5","author":"G Varoquaux","year":"2022","unstructured":"Varoquaux, G. & Cheplygina, V. Machine learning for medical imaging: methodological failures and recommendations for the future. Npj Digit. Med. 5, 48 (2022).","journal-title":"Npj Digit. Med."},{"key":"781_CR100","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1007\/s10994-009-5152-4","volume":"79","author":"S Ben-David","year":"2010","unstructured":"Ben-David, S. et al. A theory of learning from different domains. Mach. Learn. 79, 151\u2013175 (2010).","journal-title":"Mach. Learn."}],"container-title":["Nature Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00781-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00781-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00781-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,24]],"date-time":"2024-01-24T17:28:37Z","timestamp":1706117317000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00781-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,24]]},"references-count":100,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2024,1]]}},"alternative-id":["781"],"URL":"https:\/\/doi.org\/10.1038\/s42256-023-00781-8","relation":{},"ISSN":["2522-5839"],"issn-type":[{"value":"2522-5839","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,1,24]]},"assertion":[{"value":"19 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 December 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 January 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"V.G. declares advisory board positions in aiNET GmbH, Enpicom BV, Absci, Omniscope and Diagonal Therapeutics. V.G. is a consultant for Adaptyv Biosystems, Specifica Inc., Roche\/Genentech, immunai and LabGenius. The remaining authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}