{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T16:33:07Z","timestamp":1763224387556,"version":"3.45.0"},"reference-count":130,"publisher":"Oxford University Press (OUP)","issue":"11","license":[{"start":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T00:00:00Z","timestamp":1761782400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Chan Zuckerberg Initiative DAF","award":["DAF2023-329596"],"award-info":[{"award-number":["DAF2023-329596"]}]},{"DOI":"10.13039\/100000923","name":"Silicon Valley Community Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000923","id-type":"DOI","asserted-by":"publisher"}]},{"name":"NSF DMS","award":["2436233"],"award-info":[{"award-number":["2436233"]}]},{"name":"NSF CMMI","award":["2440490"],"award-info":[{"award-number":["2440490"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,11,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>Single-cell sequencing technology maps cells to a high-dimensional space encoding their internal activity. Recently-proposed virtual cell models extend this concept, enriching cells\u2019 representations based on patterns learned from pretraining on vast cell atlases.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>This review explores how advances in understanding the structure of natural language embeddings informs ongoing efforts to analyze single-cell datasets. Both fields process unstructured data by partitioning datasets into tokens embedded within a high-dimensional vector space. We discuss how the context of tokens influences the geometry of embedding space, and how low-dimensional manifolds shape this space\u2019s robustness and interpretation. We highlight how new developments in foundation models for language, such as interpretability probes and in-context reasoning, can inform efforts to construct cell atlases and train virtual cell models.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and Implementation<\/jats:title>\n                    <jats:p>Code is available at https:\/\/github.com\/williamgilpin\/celltoken.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaf595","type":"journal-article","created":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T12:37:35Z","timestamp":1761568655000},"source":"Crossref","is-referenced-by-count":0,"title":["The cell as a token: high-dimensional geometry in language models and cell embeddings"],"prefix":"10.1093","volume":"41","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8666-6951","authenticated-orcid":false,"given":"William","family":"Gilpin","sequence":"first","affiliation":[{"name":"The University of Texas at Austin Department of Physics, , Austin, TX 78712,","place":["United States"]},{"name":"Medici Therapeutics , Boston, MA 02114,","place":["United States"]}]}],"member":"286","published-online":{"date-parts":[[2025,10,30]]},"reference":[{"key":"2025111511265155100_btaf595-B1","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1016\/j.cels.2018.12.008","article-title":"Continuum of gene-expression profiles provides spatial division of labor within a differentiated cell type","volume":"8","author":"Adler","year":"2019","journal-title":"Cell Syst"},{"key":"2025111511265155100_btaf595-B2","doi-asserted-by":"crossref","first-page":"1657","DOI":"10.1038\/s41592-025-02772-6","article-title":"Deep-learning-based gene perturbation effect prediction does not yet outperform simple linear baselines","volume":"22","author":"Ahlmann-Eltze","year":"2025","journal-title":"Nat Methods"},{"year":"2024","author":"Ahn","key":"2025111511265155100_btaf595-B3"},{"author":"Alain","key":"2025111511265155100_btaf595-B4"},{"key":"2025111511265155100_btaf595-B5","first-page":"32","article-title":"What the vec? towards probabilistically grounded embeddings","author":"Allen","year":"2019","journal-title":"Adv Neural Inf Process Syst"},{"key":"2025111511265155100_btaf595-B6","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1038\/s41576-020-00292-x","article-title":"Deciphering cell\u2013cell interactions and communication from gene expression","volume":"22","author":"Armingol","year":"2021","journal-title":"Nat Rev Genet"},{"first-page":"385","year":"2015","author":"Arora","key":"2025111511265155100_btaf595-B7"},{"key":"2025111511265155100_btaf595-B8","doi-asserted-by":"crossref","first-page":"385","DOI":"10.1162\/tacl_a_00106","article-title":"A latent variable model approach to pmi-based word embeddings","volume":"4","author":"Arora","year":"2016","journal-title":"TACL"},{"key":"2025111511265155100_btaf595-B9","doi-asserted-by":"crossref","first-page":"597","DOI":"10.1162\/tacl_a_00288","article-title":"Massively multilingual sentence embeddings for zero-shot cross-lingual transfer and beyond","volume":"7","author":"Artetxe","year":"2019","journal-title":"Trans Assoc Comput Linguist"},{"first-page":"380","year":"2017","author":"Bamler","key":"2025111511265155100_btaf595-B10"},{"key":"2025111511265155100_btaf595-B11","doi-asserted-by":"crossref","first-page":"dev173849","DOI":"10.1242\/dev.173849","article-title":"Comprehensive single cell mrna profiling reveals a detailed roadmap for pancreatic endocrinogenesis","volume":"146","author":"Bastidas-Ponce","year":"2019","journal-title":"Development"},{"key":"2025111511265155100_btaf595-B12","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1162\/tacl_a_00254","article-title":"Analysis methods in neural language processing: a survey","volume":"7","author":"Belinkov","year":"2019","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"2025111511265155100_btaf595-B13","doi-asserted-by":"crossref","first-page":"1408","DOI":"10.1038\/s41587-020-0591-3","article-title":"Generalizing RNA velocity to transient cell states through dynamical modeling","volume":"38","author":"Bergen","year":"2020","journal-title":"Nat Biotechnol"},{"first-page":"2025","year":"2025","author":"Bhasker","key":"2025111511265155100_btaf595-B14"},{"first-page":"2024","year":"2024","author":"Biondo","key":"2025111511265155100_btaf595-B15"},{"first-page":"2206","year":"2022","author":"Borgeaud","key":"2025111511265155100_btaf595-B16"},{"author":"Boukkouri","key":"2025111511265155100_btaf595-B17"},{"key":"2025111511265155100_btaf595-B18","doi-asserted-by":"crossref","first-page":"1757","DOI":"10.1038\/nprot.2016.105","article-title":"Cell painting, a high-content image-based assay for morphological profiling using multiplexed fluorescent dyes","volume":"11","author":"Bray","year":"2016","journal-title":"Nat Protoc"},{"key":"2025111511265155100_btaf595-B19","doi-asserted-by":"crossref","first-page":"1200","DOI":"10.1038\/s41592-020-00979-3","article-title":"Mars: discovering novel cell types across heterogeneous single-cell experiments","volume":"17","author":"Brbi\u0107","year":"2020","journal-title":"Nat Methods"},{"key":"2025111511265155100_btaf595-B20","doi-asserted-by":"crossref","first-page":"7045","DOI":"10.1016\/j.cell.2024.11.015","article-title":"How to build the virtual cell with artificial intelligence: priorities and opportunities","volume":"187","author":"Bunne","year":"2024","journal-title":"Cell"},{"key":"2025111511265155100_btaf595-B21","doi-asserted-by":"crossref","first-page":"411","DOI":"10.1038\/nbt.4096","article-title":"Integrating single-cell transcriptomic data across different conditions, technologies, and species","volume":"36","author":"Butler","year":"2018","journal-title":"Nat Biotechnol"},{"key":"2025111511265155100_btaf595-B22","doi-asserted-by":"crossref","first-page":"798","DOI":"10.1038\/ni.2940","article-title":"Insights into the initiation of tcr signaling","volume":"15","author":"Chakraborty","year":"2014","journal-title":"Nat Immunol"},{"first-page":"119","year":"2022","author":"Chang","key":"2025111511265155100_btaf595-B23"},{"key":"2025111511265155100_btaf595-B24","doi-asserted-by":"crossref","first-page":"e1011288","DOI":"10.1371\/journal.pcbi.1011288","article-title":"The specious art of single-cell genomics","volume":"19","author":"Chari","year":"2023","journal-title":"PLoS Comput Biol"},{"key":"2025111511265155100_btaf595-B25","doi-asserted-by":"crossref","first-page":"1003","DOI":"10.1038\/s41592-023-01899-8","article-title":"Simba: single-cell embedding along with features","volume":"21","author":"Chen","year":"2024","journal-title":"Nat Methods"},{"key":"2025111511265155100_btaf595-B26","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1016\/j.acha.2006.04.006","article-title":"Diffusion maps","volume":"21","author":"Coifman","year":"2006","journal-title":"Appl Comput Harmon Anal"},{"first-page":"8440","year":"2020","author":"Conneau","key":"2025111511265155100_btaf595-B27"},{"key":"2025111511265155100_btaf595-B28","doi-asserted-by":"crossref","first-page":"346","DOI":"10.1038\/s42256-025-01007-9","article-title":"Transformers and genome language models","volume":"7","author":"Consens","year":"2025","journal-title":"Nat Mach Intell"},{"key":"2025111511265155100_btaf595-B29","doi-asserted-by":"crossref","first-page":"393","DOI":"10.1186\/s12864-025-11600-2","article-title":"Benchmarking foundation cell models for post-perturbation rna-seq prediction","volume":"26","author":"Csendes","year":"2025","journal-title":"BMC Genomics"},{"key":"2025111511265155100_btaf595-B30","doi-asserted-by":"crossref","first-page":"1470","DOI":"10.1038\/s41592-024-02201-0","article-title":"Scgpt: toward building a foundation model for single-cell multi-omics using generative ai","volume":"21","author":"Cui","year":"2024","journal-title":"Nat Methods"},{"first-page":"4171","year":"2019","author":"Devlin","key":"2025111511265155100_btaf595-B31"},{"key":"2025111511265155100_btaf595-B32","first-page":"3035","article-title":"Eigenwords: spectral word embeddings","volume":"16","author":"Dhillon","year":"2015","journal-title":"J Mach Learn Res"},{"key":"2025111511265155100_btaf595-B33","doi-asserted-by":"crossref","first-page":"2554","DOI":"10.1038\/s41467-021-22851-4","article-title":"Deep generative model embedding of single-cell rna-seq profiles on hyperspheres and hyperbolic spaces","volume":"12","author":"Ding","year":"2021","journal-title":"Nat Commun"},{"year":"2025","author":"Engels","key":"2025111511265155100_btaf595-B34"},{"key":"2025111511265155100_btaf595-B35","doi-asserted-by":"crossref","first-page":"390","DOI":"10.1038\/s41467-018-07931-2","article-title":"Single-cell rna-seq denoising using a deep count autoencoder","volume":"10","author":"Eraslan","year":"2019","journal-title":"Nat Commun"},{"year":"2019","author":"Ethayarajh","key":"2025111511265155100_btaf595-B36"},{"key":"2025111511265155100_btaf595-B37","doi-asserted-by":"crossref","first-page":"12140","DOI":"10.1038\/s41598-017-11873-y","article-title":"Estimating the intrinsic dimension of datasets by a minimal neighborhood information","volume":"7","author":"Facco","year":"2017","journal-title":"Sci Rep"},{"key":"2025111511265155100_btaf595-B38","doi-asserted-by":"crossref","first-page":"btae020","DOI":"10.1093\/bioinformatics\/btae020","article-title":"Scmae: a masked autoencoder for single-cell rna-seq clustering","volume":"40","author":"Fang","year":"2024","journal-title":"Bioinformatics"},{"key":"2025111511265155100_btaf595-B39","first-page":"10","article-title":"A synopsis of linguistic theory, 1930\u20131955","author":"Firth","year":"1957","journal-title":"Studies in Linguistic Analysis"},{"key":"2025111511265155100_btaf595-B40","doi-asserted-by":"crossref","first-page":"965","DOI":"10.1038\/s41586-024-08391-z","article-title":"A foundation model of transcription across human cell types","volume":"637","author":"Fu","year":"2025","journal-title":"Nature"},{"author":"Gao","key":"2025111511265155100_btaf595-B41"},{"key":"2025111511265155100_btaf595-B42","doi-asserted-by":"crossref","first-page":"825","DOI":"10.1162\/tacl_a_00400","article-title":"Let\u2019s play Mono-poly: bert can reveal words\u2019 polysemy level and partitionability into senses","volume":"9","author":"Gar\u00ed Soler","year":"2021","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"2025111511265155100_btaf595-B43","doi-asserted-by":"crossref","first-page":"1118","DOI":"10.1038\/ni.2419","article-title":"Gene-expression profiles and transcriptional regulatory pathways that underlie the identity and diversity of mouse tissue macrophages","volume":"13","author":"Gautier","year":"2012","journal-title":"Nat Immunol"},{"key":"2025111511265155100_btaf595-B44","doi-asserted-by":"crossref","first-page":"240","DOI":"10.1126\/science.1062946","article-title":"The art of the probable: system control in the adaptive immune system","volume":"293","author":"Germain","year":"2001","journal-title":"Science"},{"first-page":"10565","year":"2022","author":"Goel","key":"2025111511265155100_btaf595-B45"},{"key":"2025111511265155100_btaf595-B46","doi-asserted-by":"crossref","first-page":"845","DOI":"10.1038\/nmeth.3971","article-title":"Diffusion pseudotime robustly reconstructs lineage branching","volume":"13","author":"Haghverdi","year":"2016","journal-title":"Nat Methods"},{"first-page":"2116","year":"2016","author":"Hamilton","key":"2025111511265155100_btaf595-B47"},{"key":"2025111511265155100_btaf595-B48","doi-asserted-by":"crossref","first-page":"bbac377","DOI":"10.1093\/bib\/bbac377","article-title":"Self-supervised contrastive learning for integrative single cell rna-seq data analysis","volume":"23","author":"Han","year":"2022","journal-title":"Brief Bioinform"},{"key":"2025111511265155100_btaf595-B49","doi-asserted-by":"crossref","first-page":"146","DOI":"10.1080\/00437956.1954.11659520","article-title":"Distributional structure","volume":"10","author":"Harris","year":"1954","journal-title":"Word"},{"key":"2025111511265155100_btaf595-B50","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1162\/tacl_a_00098","article-title":"Word embeddings as metric recovery in semantic spaces","volume":"4","author":"Hashimoto","year":"2016","journal-title":"TACL"},{"key":"2025111511265155100_btaf595-B51","doi-asserted-by":"crossref","first-page":"1085","DOI":"10.1038\/s41586-024-08411-y","article-title":"A cell atlas foundation model for scalable search of similar human cells","volume":"638","author":"Heimberg","year":"2025","journal-title":"Nature"},{"key":"2025111511265155100_btaf595-B52","doi-asserted-by":"crossref","first-page":"562","DOI":"10.1093\/biostatistics\/kxx053","article-title":"Missing data and technical variability in single-cell rna-sequencing experiments","volume":"19","author":"Hicks","year":"2018","journal-title":"Biostatistics"},{"key":"2025111511265155100_btaf595-B53","first-page":"2025","volume-title":"bioRxiv","author":"Hu","year":"2025"},{"key":"2025111511265155100_btaf595-B54","first-page":"2024","volume-title":"bioRxiv","author":"Istrate","year":"2024"},{"first-page":"103","year":"2020","author":"Jakubowski","key":"2025111511265155100_btaf595-B55"},{"key":"2025111511265155100_btaf595-B56","first-page":"2023","volume-title":"bioRxiv","author":"Ji","year":"2023"},{"key":"2025111511265155100_btaf595-B57","doi-asserted-by":"crossref","first-page":"2112","DOI":"10.1093\/bioinformatics\/btab083","article-title":"Dnabert: pre-trained bidirectional encoder representations from transformers model for dna-language in genome","volume":"37","author":"Ji","year":"2021","journal-title":"Bioinformatics"},{"key":"2025111511265155100_btaf595-B58","doi-asserted-by":"crossref","first-page":"764","DOI":"10.1016\/j.cell.2020.01.015","article-title":"Single-cell transcriptome atlas of murine endothelial cells","volume":"180","author":"Kalucka","year":"2020","journal-title":"Cell"},{"first-page":"2023","year":"2023","author":"Kedzierska","key":"2025111511265155100_btaf595-B59"},{"author":"Khandelwal","key":"2025111511265155100_btaf595-B60"},{"key":"2025111511265155100_btaf595-B61","doi-asserted-by":"crossref","first-page":"2966","DOI":"10.1038\/s41467-020-16822-4","article-title":"Poincar\u00e9 maps for analyzing complex hierarchies in single-cell data","volume":"11","author":"Klimovskaia","year":"2020","journal-title":"Nat Commun"},{"key":"2025111511265155100_btaf595-B62","first-page":"22199","article-title":"Large language models are zero-shot reasoners","volume":"35","author":"Kojima","year":"2022","journal-title":"Adv Neural Inf Process Syst"},{"key":"2025111511265155100_btaf595-B63","doi-asserted-by":"crossref","first-page":"e1004224","DOI":"10.1371\/journal.pcbi.1004224","article-title":"Geometry of the gene expression space of individual cells","volume":"11","author":"Korem","year":"2015","journal-title":"PLoS Comput Biol"},{"key":"2025111511265155100_btaf595-B64","doi-asserted-by":"crossref","first-page":"1289","DOI":"10.1038\/s41592-019-0619-0","article-title":"Fast, sensitive and accurate integration of single-cell data with harmony","volume":"16","author":"Korsunsky","year":"2019","journal-title":"Nat Methods"},{"key":"2025111511265155100_btaf595-B65","doi-asserted-by":"crossref","first-page":"1860","DOI":"10.1101\/gr.192237.115","article-title":"Single-cell rna-seq reveals changes in cell cycle and differentiation programs upon aging of hematopoietic stem cells","volume":"25","author":"Kowalczyk","year":"2015","journal-title":"Genome Res"},{"key":"2025111511265155100_btaf595-B66","doi-asserted-by":"crossref","first-page":"780","DOI":"10.1038\/s41467-022-28431-4","article-title":"Uinmf performs mosaic integration of single-cell multi-omic datasets using nonnegative matrix factorization","volume":"13","author":"Kriebel","year":"2022","journal-title":"Nat Commun"},{"author":"Kuang","key":"2025111511265155100_btaf595-B67"},{"key":"2025111511265155100_btaf595-B68","doi-asserted-by":"crossref","first-page":"494","DOI":"10.1038\/s41586-018-0414-6","article-title":"Rna velocity of single cells","volume":"560","author":"La Manno","year":"2018","journal-title":"Nature"},{"first-page":"2024","year":"2024","author":"Lee","key":"2025111511265155100_btaf595-B69"},{"first-page":"27299","year":"2024","author":"Levine","key":"2025111511265155100_btaf595-B70"},{"key":"2025111511265155100_btaf595-B71","first-page":"27","article-title":"Neural word embedding as implicit matrix factorization","author":"Levy","year":"2014","journal-title":"Adv Neural Inf Process Syst"},{"key":"2025111511265155100_btaf595-B72","doi-asserted-by":"crossref","first-page":"703","DOI":"10.1038\/s41587-021-01161-6","article-title":"Scjoint integrates atlas-scale single-cell rna-seq and atac-seq data with transfer learning","volume":"40","author":"Lin","year":"2022","journal-title":"Nat Biotechnol"},{"first-page":"2168","year":"2017","author":"Liu","key":"2025111511265155100_btaf595-B73"},{"first-page":"6884","year":"2024","author":"Liu","key":"2025111511265155100_btaf595-B74"},{"year":"2020","author":"Liu","key":"2025111511265155100_btaf595-B75"},{"year":"2019","author":"Liu","key":"2025111511265155100_btaf595-B76"},{"key":"2025111511265155100_btaf595-B77","doi-asserted-by":"crossref","first-page":"1053","DOI":"10.1038\/s41592-018-0229-2","article-title":"Deep generative modeling for single-cell transcriptomics","volume":"15","author":"Lopez","year":"2018","journal-title":"Nat Methods"},{"key":"2025111511265155100_btaf595-B78","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1038\/s41587-021-01001-7","article-title":"Mapping single-cell data to reference atlases by transfer learning","volume":"40","author":"Lotfollahi","year":"2022","journal-title":"Nat Biotechnol"},{"first-page":"6713","year":"2020","author":"Mamou","key":"2025111511265155100_btaf595-B79"},{"author":"Mikolov","key":"2025111511265155100_btaf595-B80"},{"key":"2025111511265155100_btaf595-B81","article-title":"Distributed representations of words and phrases and their compositionality","volume":"26","author":"Mikolov","year":"2013","journal-title":"Adv Neural Inf Process Syst"},{"first-page":"746","year":"2013","author":"Mikolov","key":"2025111511265155100_btaf595-B82"},{"year":"2017","author":"Mimno","key":"2025111511265155100_btaf595-B83"},{"year":"2025","author":"Modell","key":"2025111511265155100_btaf595-B84"},{"author":"Mu","key":"2025111511265155100_btaf595-B85"},{"first-page":"1059","year":"2014","author":"Neelakantan","key":"2025111511265155100_btaf595-B86"},{"first-page":"6341","year":"2017","author":"Nickel","key":"2025111511265155100_btaf595-B87"},{"key":"2025111511265155100_btaf595-B88","doi-asserted-by":"crossref","first-page":"e1913931118","DOI":"10.1073\/pnas.1913931118","article-title":"Revealing lineage-related signals in single-cell gene expression using random matrix theory","volume":"118","author":"Nitzan","year":"2021","journal-title":"Proc Natl Acad Sci USA"},{"key":"2025111511265155100_btaf595-B89","doi-asserted-by":"crossref","first-page":"132","DOI":"10.1038\/s41586-019-1773-3","article-title":"Gene expression cartography","volume":"576","author":"Nitzan","year":"2019","journal-title":"Nature"},{"key":"2025111511265155100_btaf595-B90","doi-asserted-by":"crossref","first-page":"e3002397","DOI":"10.1371\/journal.pbio.3002397","article-title":"Topological data analysis reveals a core gene expression backbone that defines form and function across flowering plants","volume":"21","author":"Palande","year":"2023","journal-title":"PLoS Biol"},{"key":"2025111511265155100_btaf595-B91","doi-asserted-by":"crossref","first-page":"1663","DOI":"10.1016\/j.cell.2015.11.013","article-title":"Transcriptional heterogeneity and lineage commitment in myeloid progenitors","volume":"163","author":"Paul","year":"2015","journal-title":"Cell"},{"first-page":"2025","year":"2025","author":"Pearce","key":"2025111511265155100_btaf595-B92"},{"year":"2024","author":"Pedrocchi","key":"2025111511265155100_btaf595-B93"},{"author":"Reisinger","key":"2025111511265155100_btaf595-B94"},{"first-page":"2025","year":"2025","author":"Rizvi","key":"2025111511265155100_btaf595-B95"},{"key":"2025111511265155100_btaf595-B96","doi-asserted-by":"crossref","first-page":"1492","DOI":"10.1038\/s41592-024-02191-z","article-title":"Toward universal cell embeddings: integrating single-cell rna-seq datasets across species with saturn","volume":"21","author":"Rosen","year":"2024","journal-title":"Nat Methods"},{"first-page":"2023","year":"2023","author":"Rosen","key":"2025111511265155100_btaf595-B97"},{"key":"2025111511265155100_btaf595-B98","doi-asserted-by":"crossref","first-page":"1062","DOI":"10.1038\/s41467-025-56388-7","article-title":"Integrating representation learning, permutation, and optimization to detect lineage-related gene expression patterns","volume":"16","author":"Schl\u00fcter","year":"2025","journal-title":"Nat Commun"},{"article-title":"Can sparse autoencoders make sense of latent representations?","year":"2024","author":"Schuster","key":"2025111511265155100_btaf595-B99"},{"key":"2025111511265155100_btaf595-B100","doi-asserted-by":"crossref","first-page":"451","DOI":"10.1038\/s41587-019-0068-4","article-title":"Characterization of cell fate probabilities in single-cell data with palantir","volume":"37","author":"Setty","year":"2019","journal-title":"Nat Biotechnol"},{"key":"2025111511265155100_btaf595-B101","doi-asserted-by":"crossref","first-page":"3826","DOI":"10.1038\/s41467-021-24172-y","article-title":"Scgcn is a graph convolutional networks algorithm for knowledge transfer in single cell omics","volume":"12","author":"Song","year":"2021","journal-title":"Nat Commun"},{"key":"2025111511265155100_btaf595-B102","doi-asserted-by":"crossref","first-page":"6495","DOI":"10.1038\/s41467-023-41855-w","article-title":"Benchmarking strategies for cross-species integration of single-cell rna sequencing data","volume":"14","author":"Song","year":"2023","journal-title":"Nat Commun"},{"year":"2017","author":"Speer","key":"2025111511265155100_btaf595-B103"},{"key":"2025111511265155100_btaf595-B104","doi-asserted-by":"crossref","first-page":"e2100473118","DOI":"10.1073\/pnas.2100473118","article-title":"Computing the riemannian curvature of image patch and single-cell RNA sequencing data manifolds using extrinsic differential geometry","volume":"118","author":"Sritharan","year":"2021","journal-title":"Proc Natl Acad Sci USA"},{"key":"2025111511265155100_btaf595-B105","doi-asserted-by":"crossref","first-page":"395","DOI":"10.1016\/j.cels.2019.04.004","article-title":"Decomposing cell identity for transfer learning across cellular measurements, platforms, tissues, and species","volume":"8","author":"Stein-O\u2019Brien","year":"2019","journal-title":"Cell Syst"},{"key":"2025111511265155100_btaf595-B106","doi-asserted-by":"crossref","first-page":"865","DOI":"10.1038\/nmeth.4380","article-title":"Simultaneous epitope and transcriptome measurement in single cells","volume":"14","author":"Stoeckius","year":"2017","journal-title":"Nat Methods"},{"key":"2025111511265155100_btaf595-B107","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1038\/s41576-019-0093-7","article-title":"Integrative single-cell analysis","volume":"20","author":"Stuart","year":"2019","journal-title":"Nat Rev Genet"},{"key":"2025111511265155100_btaf595-B108","doi-asserted-by":"crossref","first-page":"eabo0510","DOI":"10.1126\/science.abo0510","article-title":"Mapping the developing human immune system across organs","volume":"376","author":"Suo","year":"2022","journal-title":"Science"},{"key":"2025111511265155100_btaf595-B109","doi-asserted-by":"crossref","first-page":"e66747","DOI":"10.7554\/eLife.66747","article-title":"Mapping single-cell atlases throughout metazoa unravels cell type evolution","volume":"10","author":"Tarashansky","year":"2021","journal-title":"Elife"},{"key":"2025111511265155100_btaf595-B110","doi-asserted-by":"crossref","first-page":"616","DOI":"10.1038\/s41586-023-06139-9","article-title":"Transfer learning enables predictions in network biology","volume":"618","author":"Theodoris","year":"2023","journal-title":"Nature"},{"key":"2025111511265155100_btaf595-B111","doi-asserted-by":"crossref","first-page":"294","DOI":"10.1038\/s41567-023-02303-0","article-title":"The low-rank hypothesis of complex systems","volume":"20","author":"Thibeault","year":"2024","journal-title":"Nat Phys"},{"key":"2025111511265155100_btaf595-B112","doi-asserted-by":"crossref","first-page":"773","DOI":"10.1038\/s41587-022-01448-2","article-title":"The expanding vistas of spatial transcriptomics","volume":"41","author":"Tian","year":"2023","journal-title":"Nat Biotechnol"},{"key":"2025111511265155100_btaf595-B113","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1007\/s10994-005-0913-1","article-title":"Corpus-based learning of analogies and semantic relations","volume":"60","author":"Turney","year":"2005","journal-title":"Mach Learn"},{"first-page":"3609","year":"2021","author":"Ushio","key":"2025111511265155100_btaf595-B114"},{"key":"2025111511265155100_btaf595-B115","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv Neural Inf Process Syst"},{"year":"2024","author":"Walsh","key":"2025111511265155100_btaf595-B116"},{"key":"2025111511265155100_btaf595-B117","doi-asserted-by":"crossref","first-page":"3620","DOI":"10.1038\/s41467-022-31388-z","article-title":"Endothelial cell heterogeneity and microglia regulons revealed by a pig cell landscape at single-cell level","volume":"13","author":"Wang","year":"2022","journal-title":"Nat Commun"},{"key":"2025111511265155100_btaf595-B118","doi-asserted-by":"crossref","first-page":"4239","DOI":"10.1038\/s41467-020-18075-7","article-title":"Single cell transcriptomics of human epidermis identifies basal stem cell transition states","volume":"11","author":"Wang","year":"2020","journal-title":"Nat Commun"},{"first-page":"2024","year":"2024","author":"Wenteler","key":"2025111511265155100_btaf595-B119"},{"key":"2025111511265155100_btaf595-B120","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1186\/s13059-019-1663-x","article-title":"Paga: graph abstraction reconciles clustering with trajectory inference through a topology preserving map of single cells","volume":"20","author":"Wolf","year":"2019","journal-title":"Genome Biol"},{"year":"2016","author":"Wu","key":"2025111511265155100_btaf595-B121"},{"year":"2024","author":"Wu","key":"2025111511265155100_btaf595-B122"},{"key":"2025111511265155100_btaf595-B123","doi-asserted-by":"crossref","first-page":"e9620","DOI":"10.15252\/msb.20209620","article-title":"Probabilistic harmonization and annotation of single-cell transcriptomics data with deep generative models","volume":"17","author":"Xu","year":"2021","journal-title":"Mol Syst Biol"},{"key":"2025111511265155100_btaf595-B124","doi-asserted-by":"crossref","first-page":"291","DOI":"10.1162\/tacl_a_00461","article-title":"Byt5: towards a token-free future with pre-trained byte-to-byte models","volume":"10","author":"Xue","year":"2022","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"2025111511265155100_btaf595-B125","doi-asserted-by":"crossref","first-page":"830","DOI":"10.1038\/s41422-024-01034-y","article-title":"Genecompass: deciphering universal gene regulatory mechanisms with a knowledge-informed cross-species foundation model","volume":"34","author":"Yang","year":"2024","journal-title":"Cell Res"},{"key":"2025111511265155100_btaf595-B126","article-title":"On the dimensionality of word embedding","volume":"31","author":"Yin","year":"2018","journal-title":"Adv Neural Inf Process Syst"},{"key":"2025111511265155100_btaf595-B127","article-title":"Cell ontology guided transcriptome foundation model","author":"Yuan","year":"2024","journal-title":"Adv Neural Inf Process Syst"},{"author":"Zhang","key":"2025111511265155100_btaf595-B128"},{"key":"2025111511265155100_btaf595-B129","doi-asserted-by":"crossref","first-page":"5261","DOI":"10.1038\/s41467-021-25534-2","article-title":"Learning interpretable cellular and gene signature embeddings from single-cell transcriptomic data","volume":"12","author":"Zhao","year":"2021","journal-title":"Nat Commun"},{"key":"2025111511265155100_btaf595-B130","doi-asserted-by":"crossref","first-page":"102225","DOI":"10.1016\/j.isci.2021.102225","article-title":"Hyperbolic geometry of gene expression","volume":"24","author":"Zhou","year":"2021","journal-title":"iScience"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaf595\/65041398\/btaf595.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/41\/11\/btaf595\/65041398\/btaf595.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/41\/11\/btaf595\/65041398\/btaf595.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T16:27:06Z","timestamp":1763224026000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btaf595\/8306768"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,30]]},"references-count":130,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2025,11,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaf595","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"type":"print","value":"1367-4803"},{"type":"electronic","value":"1367-4811"}],"subject":[],"published-other":{"date-parts":[[2025,11]]},"published":{"date-parts":[[2025,10,30]]},"article-number":"btaf595"}}