{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,15]],"date-time":"2026-05-15T05:23:16Z","timestamp":1778822596292,"version":"3.51.4"},"reference-count":63,"publisher":"Oxford University Press (OUP)","issue":"5","license":[{"start":{"date-parts":[[2023,8,18]],"date-time":"2023-08-18T00:00:00Z","timestamp":1692316800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/pages\/standard-publication-reuse-rights"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,9,20]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:p>The availability of high-throughput sequencing data creates opportunities to comprehensively understand human diseases as well as challenges to train machine learning models using such high dimensions of data. Here, we propose a denoised multi-omics integration framework, which contains a distribution-based feature denoising algorithm, Feature Selection with Distribution (FSD), for dimension reduction and a multi-omics integration framework, Attention Multi-Omics Integration (AttentionMOI) to predict cancer prognosis and identify cancer subtypes. We demonstrated that FSD improved model performance either using single omic data or multi-omics data in 15 The Cancer Genome Atlas Program (TCGA) cancers for survival prediction and kidney cancer subtype identification. And our integration framework AttentionMOI outperformed machine learning models and current multi-omics integration algorithms with high dimensions of features. Furthermore, FSD identified features that were associated to cancer prognosis and could be considered as biomarkers.<\/jats:p>","DOI":"10.1093\/bib\/bbad304","type":"journal-article","created":{"date-parts":[[2023,8,18]],"date-time":"2023-08-18T14:03:06Z","timestamp":1692367386000},"source":"Crossref","is-referenced-by-count":20,"title":["A denoised multi-omics integration framework for cancer subtype classification and survival prediction"],"prefix":"10.1093","volume":"24","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5757-4804","authenticated-orcid":false,"given":"Jiali","family":"Pang","sequence":"first","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory , Shanghai , China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9467-8378","authenticated-orcid":false,"given":"Bilin","family":"Liang","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory , Shanghai , China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruifeng","family":"Ding","sequence":"additional","affiliation":[{"name":"Second Affiliated Hospital of Naval Medical University Department of Anesthesiology, Changzheng Hospital, , Shanghai , China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiujuan","family":"Yan","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory , Shanghai , China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruiyao","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory , Shanghai , China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9233-4363","authenticated-orcid":false,"given":"Jie","family":"Xu","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory , Shanghai , China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2023,8,18]]},"reference":[{"key":"2023092216560263800_ref1","doi-asserted-by":"crossref","first-page":"333","DOI":"10.1038\/nrg.2016.49","article-title":"Coming of age: ten years of next-generation sequencing technologies","volume":"17","author":"","year":"2016","journal-title":"Nat Rev Genet"},{"key":"2023092216560263800_ref2","doi-asserted-by":"crossref","first-page":"631","DOI":"10.1038\/s41576-019-0150-2","article-title":"RNA sequencing: the teenage years","volume":"20","author":"Stark","year":"2019","journal-title":"Nat Rev Genet"},{"key":"2023092216560263800_ref3","doi-asserted-by":"crossref","first-page":"288","DOI":"10.1016\/j.ygeno.2011.07.007","article-title":"High density DNA methylation array with single CpG site resolution","volume":"98","author":"Bibikova","year":"2011","journal-title":"Genomics"},{"key":"2023092216560263800_ref4","doi-asserted-by":"crossref","first-page":"e65","DOI":"10.1093\/nar\/29.13.e65","article-title":"Bisulfite genomic sequencing: systematic investigation of critical experimental parameters","volume":"29","author":"Grunau","year":"2001","journal-title":"Nucleic Acids Res"},{"key":"2023092216560263800_ref5","doi-asserted-by":"crossref","first-page":"949","DOI":"10.1016\/j.csbj.2021.01.009","article-title":"Multi-omics approaches in cancer research with applications in tumor subtyping, prognosis, and diagnosis","volume":"19","author":"Menyh\u00e1rt","year":"2021","journal-title":"Comput Struct Biotechnol J"},{"key":"2023092216560263800_ref6","doi-asserted-by":"crossref","first-page":"83","DOI":"10.1186\/s13059-017-1215-1","article-title":"Multi-omics approaches to disease","volume":"18","author":"Hasin","year":"2017","journal-title":"Genome Biol"},{"key":"2023092216560263800_ref7","doi-asserted-by":"crossref","first-page":"333","DOI":"10.1038\/nmeth.2810","article-title":"Similarity network fusion for aggregating data types on a genomic scale","volume":"11","author":"Wang","year":"2014","journal-title":"Nat Methods"},{"key":"2023092216560263800_ref8","first-page":"20150202","article-title":"Principal component analysis: a review and recent developments","volume":"374","author":"Jolliffe","year":"2016","journal-title":"Philos Trans A Math Phys Eng Sci"},{"key":"2023092216560263800_ref9","doi-asserted-by":"crossref","first-page":"162","DOI":"10.1007\/978-1-4612-4380-9_14","volume-title":"Breakthroughs in Statistics: Methodology and Distribution","author":"Hotelling","year":"1992"},{"key":"2023092216560263800_ref10","doi-asserted-by":"crossref","first-page":"788","DOI":"10.1038\/44565","article-title":"Learning the parts of objects by non-negative matrix factorization","volume":"401","author":"Lee","year":"1999","journal-title":"Nature"},{"key":"2023092216560263800_ref11","doi-asserted-by":"crossref","first-page":"10546","DOI":"10.1093\/nar\/gky889","article-title":"Multi-omic and multi-view clustering algorithms: review and cancer benchmark","volume":"46","author":"Rappoport","year":"2018","journal-title":"Nucleic Acids Res"},{"key":"2023092216560263800_ref12","doi-asserted-by":"crossref","first-page":"2906","DOI":"10.1093\/bioinformatics\/btp543","article-title":"Integrative clustering of multiple genomic data types using a joint latent variable model with application to breast and lung cancer subtype analysis","volume":"25","author":"Shen","year":"2009","journal-title":"Bioinformatics"},{"key":"2023092216560263800_ref13","doi-asserted-by":"crossref","first-page":"i237","DOI":"10.1093\/bioinformatics\/btq182","article-title":"Inference of patient-specific pathway activities from multi-dimensional cancer genomics data using PARADIGM","volume":"26","author":"Vaske","year":"2010","journal-title":"Bioinformatics"},{"key":"2023092216560263800_ref14","doi-asserted-by":"crossref","first-page":"765","DOI":"10.1109\/BIBM47256.2019.8983228","volume-title":"2019 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","author":"Zhang","year":"2019"},{"key":"2023092216560263800_ref15","doi-asserted-by":"crossref","first-page":"2719","DOI":"10.1016\/j.csbj.2021.04.067","article-title":"DeepOmix: a scalable and interpretable multi-omics deep learning framework and application in cancer survival analysis","volume":"19","author":"Zhao","year":"2021","journal-title":"Comput Struct Biotechnol J"},{"key":"2023092216560263800_ref16","doi-asserted-by":"crossref","first-page":"3445","DOI":"10.1038\/s41467-021-23774-w","article-title":"MOGONET integrates multi-omics data using graph convolutional networks allowing patient classification and biomarker identification","volume":"12","author":"Wang","year":"2021","journal-title":"Nat Commun"},{"key":"2023092216560263800_ref17","doi-asserted-by":"crossref","first-page":"1113","DOI":"10.1038\/s41588-019-0423-x","article-title":"Quantitative evidence for early metastatic seeding in colorectal cancer","volume":"51","author":"Hu","year":"2019","journal-title":"Nat Genet"},{"key":"2023092216560263800_ref18","doi-asserted-by":"crossref","first-page":"1248","DOI":"10.1158\/1078-0432.CCR-17-0853","article-title":"Deep learning\u2013based multi-omics integration robustly predicts survival in liver cancer","volume":"24","author":"Chaudhary","year":"2018","journal-title":"Clin Cancer Res"},{"key":"2023092216560263800_ref19","doi-asserted-by":"crossref","first-page":"348","DOI":"10.1038\/s41586-021-03922-4","article-title":"Biologically informed deep neural network for prostate cancer discovery","volume":"598","author":"Elmarakeby","year":"2021","journal-title":"Nature"},{"key":"2023092216560263800_ref20","doi-asserted-by":"crossref","first-page":"3047","DOI":"10.3390\/cancers13123047","article-title":"OmiEmbed: a unified multi-task deep learning framework for multi-omics data","volume":"13","author":"Zhang","year":"2021","journal-title":"Cancer"},{"key":"2023092216560263800_ref21","doi-asserted-by":"crossref","first-page":"140","DOI":"10.1093\/bib\/bbz124","article-title":"Current RNA-seq methodology reporting limits reproducibility","volume":"22","author":"Simoneau","year":"2021","journal-title":"Brief Bioinform"},{"key":"2023092216560263800_ref22","doi-asserted-by":"crossref","first-page":"1133","DOI":"10.1016\/S1097-2765(02)00531-2","article-title":"Protein interaction verification and functional annotation by integrated analysis of genome-scale data","volume":"9","author":"Kemmeren","year":"2002","journal-title":"Mol Cell"},{"key":"2023092216560263800_ref23","doi-asserted-by":"crossref","first-page":"2084","DOI":"10.1093\/bioinformatics\/bty895","article-title":"Heavy-tailed prior distributions for sequence count data: removing the noise and preserving large differences","volume":"35","author":"Zhu","year":"2019","journal-title":"Bioinformatics"},{"key":"2023092216560263800_ref24","doi-asserted-by":"crossref","first-page":"7398","DOI":"10.1109\/ICASSP.2013.6639100","volume-title":"2013 IEEE International Conference on Acoustics, Speech and Signal Processing","author":"Seltzer","year":"2013"},{"key":"2023092216560263800_ref25","doi-asserted-by":"crossref","first-page":"109","DOI":"10.1007\/978-3-319-13623-3_12","volume-title":"Advances in Speech and Language Technologies for Iberian Languages: Second International Conference, IberSPEECH 2014, Las Palmas de Gran Canaria, Spain, November 19-21, 2014. Proceedings","author":"de-la-Calle-Silos","year":"2014"},{"key":"2023092216560263800_ref26","doi-asserted-by":"crossref","first-page":"D956","DOI":"10.1093\/nar\/gkx1090","article-title":"LinkedOmics: analyzing multi-omics data within and across 32 cancer types","volume":"46","author":"Vasaikar","year":"2018","journal-title":"Nucleic Acids Res"},{"key":"2023092216560263800_ref27","doi-asserted-by":"crossref","first-page":"68","DOI":"10.1080\/01621459.1951.10500769","article-title":"The Kolmogorov-Smirnov test for goodness of fit","volume":"46","author":"Massey","year":"1951","journal-title":"J Am Stat Assoc"},{"key":"2023092216560263800_ref28","first-page":"2825","article-title":"Scikit-learn: machine learning in python","volume":"12","author":"Pedregosa","year":"2011","journal-title":"J Mach Learn Res"},{"key":"2023092216560263800_ref29","doi-asserted-by":"crossref","first-page":"10","DOI":"10.3389\/fonc.2020.01065","article-title":"Integrative network fusion: a multi-omics approach in molecular profiling","volume":"10","author":"Chierici","year":"2020","journal-title":"Front Oncol"},{"key":"2023092216560263800_ref30","doi-asserted-by":"crossref","first-page":"240","DOI":"10.3390\/genes10030240","article-title":"Group Lasso regularized deep learning for cancer prognosis from multi-omics and clinical features","volume":"10","author":"Xie","year":"2019","journal-title":"Genes"},{"key":"2023092216560263800_ref31","doi-asserted-by":"crossref","first-page":"bbaa167","DOI":"10.1093\/bib\/bbaa167","article-title":"Large-scale benchmark study of survival prediction methods using multi-omics data","volume":"22","author":"Herrmann","year":"2021","journal-title":"Brief Bioinform"},{"key":"2023092216560263800_ref32","doi-asserted-by":"crossref","first-page":"3735","DOI":"10.1016\/j.csbj.2021.06.030","article-title":"Integration strategies of multi-omics data for machine learning analysis","volume":"19","author":"Picard","year":"2021","journal-title":"Comput Struct Biotechnol J"},{"key":"2023092216560263800_ref33","doi-asserted-by":"crossref","first-page":"559","DOI":"10.1080\/14786440109462720","article-title":"LIII. On lines and planes of closest fit to systems of points in space","volume":"2","author":"Pearson","year":"1901","journal-title":"Lond Edinb Dublin Philos Mag J Sci"},{"key":"2023092216560263800_ref34","doi-asserted-by":"crossref","first-page":"4404","DOI":"10.1016\/j.csbj.2021.08.006","article-title":"MetaCancer: a deep learning-based pan-cancer metastasis prediction model developed using multi-omics data","volume":"19","author":"Albaradei","year":"2021","journal-title":"Comput Struct Biotechnol J"},{"key":"2023092216560263800_ref35","doi-asserted-by":"crossref","first-page":"546","DOI":"10.1038\/modpathol.2016.60","article-title":"Pan-cancer transcriptome analysis reveals a gene expression signature for the identification of tumor tissue origin","volume":"29","author":"Xu","year":"2016","journal-title":"Mod Pathol"},{"key":"2023092216560263800_ref36","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1007\/978-94-015-7744-1_2","volume-title":"Simulated Annealing: Theory and Applications","author":"Laarhoven","year":"1987"},{"key":"2023092216560263800_ref37","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1007\/978-3-319-93025-1_4","volume-title":"Evolutionary Algorithms and Neural Networks: Theory and Applications","author":"Mirjalili","year":"2019"},{"key":"2023092216560263800_ref38","first-page":"278","volume-title":"Proceedings of 3rd International Conference on Document Analysis and Recognition","author":"Ho","year":"1995"},{"key":"2023092216560263800_ref39","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1007\/BF00994018","article-title":"Support-vector networks","volume":"20","author":"Cortes","year":"1995","journal-title":"Mach Learn"},{"key":"2023092216560263800_ref40","volume-title":"Neural Networks: A Comprehensive Foundation","author":"Haykin","year":"1998"},{"key":"2023092216560263800_ref41","doi-asserted-by":"crossref","first-page":"785","DOI":"10.1145\/2939672.2939785","volume-title":"Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","author":"Chen","year":"2016"},{"key":"2023092216560263800_ref42","volume-title":"Advances in Neural Information Processing Systems","author":"Paszke","year":"2019"},{"key":"2023092216560263800_ref43","volume-title":"Advances in Neural Information Processing Systems","author":"Lundberg","year":"2017"},{"key":"2023092216560263800_ref44","first-page":"3319","volume-title":"Proceedings of the 34th International Conference on Machine Learning","author":"Sundararajan","year":"2017"},{"key":"2023092216560263800_ref45","doi-asserted-by":"crossref","first-page":"10912","DOI":"10.1109\/ACCESS.2023.3240515","article-title":"Moanna: multi-omics autoencoder-based neural network algorithm for predicting breast cancer subtypes","volume":"11","author":"Lupat","year":"2023","journal-title":"IEEE Access"},{"key":"2023092216560263800_ref46","doi-asserted-by":"crossref","first-page":"510","DOI":"10.1016\/j.ccr.2010.03.017","article-title":"Identification of a CpG Island Methylator phenotype that defines a distinct subgroup of glioma","volume":"17","author":"Noushmehr","year":"2010","journal-title":"Cancer Cell"},{"key":"2023092216560263800_ref47","doi-asserted-by":"crossref","first-page":"7490","DOI":"10.1158\/0008-5472.CAN-05-4552","article-title":"Epigenomic profiling reveals novel and frequent targets of aberrant DNA methylation-mediated silencing in malignant glioma","volume":"66","author":"Kim","year":"2006","journal-title":"Cancer Res"},{"key":"2023092216560263800_ref48","doi-asserted-by":"crossref","first-page":"550","DOI":"10.1016\/j.cell.2015.12.028","article-title":"Molecular profiling reveals biologically discrete subsets and pathways of progression in diffuse glioma","volume":"164","author":"Ceccarelli","year":"2016","journal-title":"Cell"},{"key":"2023092216560263800_ref49","doi-asserted-by":"crossref","first-page":"325","DOI":"10.1038\/nri.2017.143","article-title":"HLA variation and disease","volume":"18","author":"Dendrou","year":"2018","journal-title":"Nat Rev Immunol"},{"key":"2023092216560263800_ref50","doi-asserted-by":"crossref","first-page":"2765","DOI":"10.3390\/cells10102765","article-title":"Integrative analysis to identify genes associated with Stemness and immune infiltration in glioblastoma","volume":"10","author":"Warrier","year":"2021","journal-title":"Cell"},{"key":"2023092216560263800_ref51","doi-asserted-by":"crossref","first-page":"5799","DOI":"10.1158\/1078-0432.CCR-19-0261","article-title":"A randomized double-blind placebo-controlled phase II trial of dendritic cell vaccine ICT-107 in newly diagnosed patients with glioblastoma","volume":"25","author":"Wen","year":"2019","journal-title":"Clin Cancer Res"},{"key":"2023092216560263800_ref52","doi-asserted-by":"crossref","first-page":"1035","DOI":"10.1158\/1078-0432.CCR-12-2064","article-title":"Agonistic CD40 antibodies and cancer therapy","volume":"19","author":"Vonderheide","year":"2013","journal-title":"Clin Cancer Res"},{"key":"2023092216560263800_ref53","doi-asserted-by":"crossref","first-page":"360","DOI":"10.1016\/j.coph.2004.02.008","article-title":"The role of the CD40 pathway in the pathogenesis and treatment of cancer","volume":"4","author":"Eliopoulos","year":"2004","journal-title":"Curr Opin Pharmacol"},{"key":"2023092216560263800_ref54","doi-asserted-by":"crossref","first-page":"8803","DOI":"10.1093\/nar\/gkt656","article-title":"An integrative characterization of recurrent molecular aberrations in glioblastoma genomes","volume":"41","author":"Sintupisut","year":"2013","journal-title":"Nucleic Acids Res"},{"key":"2023092216560263800_ref55","doi-asserted-by":"crossref","first-page":"1607","DOI":"10.1093\/neuonc\/nou113","article-title":"Genome-wide methylation profiling identifies an essential role of reactive oxygen species in pediatric glioblastoma multiforme and validates a methylome specific for H3 histone family 3A with absence of G-CIMP\/isocitrate dehydrogenase 1 mutation","volume":"16","author":"Jha","year":"2014","journal-title":"Neuro Oncol"},{"key":"2023092216560263800_ref56","doi-asserted-by":"crossref","first-page":"685","DOI":"10.1007\/s12525-021-00475-2","article-title":"Machine learning and deep learning","volume":"31","author":"Janiesch","year":"2021","journal-title":"Electron Mark"},{"key":"2023092216560263800_ref57","doi-asserted-by":"crossref","first-page":"7776","DOI":"10.1109\/ACCESS.2017.2696365","article-title":"Machine learning with big data: challenges and approaches","volume":"5","author":"L\u2019Heureux","year":"2017","journal-title":"IEEE Access"},{"key":"2023092216560263800_ref58","doi-asserted-by":"crossref","first-page":"563","DOI":"10.1093\/bioinformatics\/btg062","article-title":"Effective dimension reduction methods for tumor classification using gene expression data","volume":"19","author":"Antoniadis","year":"2003","journal-title":"Bioinformatics"},{"key":"2023092216560263800_ref59","author":"Bao","year":"2022"},{"key":"2023092216560263800_ref60","volume-title":"Advances in Neural Information Processing Systems","author":"Tu","year":"2022"},{"key":"2023092216560263800_ref61","first-page":"1370","article-title":"Network approaches to systems biology analysis of complex disease: integrative methods for multi-omics data","volume":"19","author":"Yan","year":"2018","journal-title":"Brief Bioinform"},{"key":"2023092216560263800_ref62","doi-asserted-by":"crossref","first-page":"206","DOI":"10.1038\/nature01254","article-title":"Computational systems biology","volume":"420","author":"Kitano","year":"2002","journal-title":"Nature"},{"key":"2023092216560263800_ref63","doi-asserted-by":"crossref","first-page":"394","DOI":"10.1186\/s12859-022-04950-1","article-title":"Risk stratification and pathway analysis based on graph neural network and interpretable algorithm","volume":"23","author":"Liang","year":"2022","journal-title":"BMC Bioinform"}],"container-title":["Briefings in Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bib\/article-pdf\/24\/5\/bbad304\/51711338\/bbad304.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bib\/article-pdf\/24\/5\/bbad304\/51711338\/bbad304.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,22]],"date-time":"2023-09-22T17:19:29Z","timestamp":1695403169000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bib\/article\/doi\/10.1093\/bib\/bbad304\/7245703"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,18]]},"references-count":63,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2023,9,20]]}},"URL":"https:\/\/doi.org\/10.1093\/bib\/bbad304","relation":{},"ISSN":["1467-5463","1477-4054"],"issn-type":[{"value":"1467-5463","type":"print"},{"value":"1477-4054","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2023,9]]},"published":{"date-parts":[[2023,8,18]]},"article-number":"bbad304"}}