{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T14:16:38Z","timestamp":1775312198938,"version":"3.50.1"},"reference-count":30,"publisher":"Oxford University Press (OUP)","issue":"14","license":[{"start":{"date-parts":[[2019,7,8]],"date-time":"2019-07-08T00:00:00Z","timestamp":1562544000000},"content-version":"vor","delay-in-days":7,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"name":"European Union\u2019s Horizon 2020 research and innovation program","award":["666003"],"award-info":[{"award-number":["666003"]}]},{"DOI":"10.13039\/501100002341","name":"Academy of Finland","doi-asserted-by":"publisher","award":["292334"],"award-info":[{"award-number":["292334"]}],"id":[{"id":"10.13039\/501100002341","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002341","name":"Academy of Finland","doi-asserted-by":"publisher","award":["319264"],"award-info":[{"award-number":["319264"]}],"id":[{"id":"10.13039\/501100002341","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001695","name":"JST","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001695","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001700","name":"MEXT","doi-asserted-by":"publisher","award":["16H06299"],"award-info":[{"award-number":["16H06299"]}],"id":[{"id":"10.13039\/501100001700","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,7,15]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>Finding non-linear relationships between biomolecules and a biological outcome is computationally expensive and statistically challenging. Existing methods have important drawbacks, including among others lack of parsimony, non-convexity and computational overhead. Here we propose block HSIC Lasso, a non-linear feature selector that does not present the previous drawbacks.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>We compare block HSIC Lasso to other state-of-the-art feature selection techniques in both synthetic and real data, including experiments over three common types of genomic data: gene-expression microarrays, single-cell RNA sequencing and genome-wide association studies. In all cases, we observe that features selected by block HSIC Lasso retain more information about the underlying biology than those selected by other techniques. As a proof of concept, we applied block HSIC Lasso to a single-cell RNA sequencing experiment on mouse hippocampus. We discovered that many genes linked in the past to brain development and function are involved in the biological differences between the types of neurons.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>Block HSIC Lasso is implemented in the Python 2\/3 package pyHSICLasso, available on PyPI. Source code is available on GitHub (https:\/\/github.com\/riken-aip\/pyHSICLasso).<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Supplementary information<\/jats:title>\n                    <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btz333","type":"journal-article","created":{"date-parts":[[2019,5,9]],"date-time":"2019-05-09T07:20:03Z","timestamp":1557386403000},"page":"i427-i435","source":"Crossref","is-referenced-by-count":66,"title":["Block HSIC Lasso: model-free biomarker detection for ultra-high dimensional data"],"prefix":"10.1093","volume":"35","author":[{"given":"H\u00e9ctor","family":"Climente-Gonz\u00e1lez","sequence":"first","affiliation":[{"name":"Institut Curie, PSL Research University, Paris, France"},{"name":"INSERM, U900, Paris, France"},{"name":"MINES ParisTech, PSL Research University, CBIO-Centre for Computational Biology, Paris, France"},{"name":"RIKEN AIP, Tokyo, Japan"}]},{"given":"Chlo\u00e9-Agathe","family":"Azencott","sequence":"additional","affiliation":[{"name":"Institut Curie, PSL Research University, Paris, France"},{"name":"INSERM, U900, Paris, France"},{"name":"MINES ParisTech, PSL Research University, CBIO-Centre for Computational Biology, Paris, France"}]},{"given":"Samuel","family":"Kaski","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Aalto University, Espoo, Finland"}]},{"given":"Makoto","family":"Yamada","sequence":"additional","affiliation":[{"name":"RIKEN AIP, Tokyo, Japan"},{"name":"Department of intelligence science and technology, Kyoto University, Kyoto, Japan"}]}],"member":"286","published-online":{"date-parts":[[2019,7,5]]},"reference":[{"key":"2023062712381323500_btz333-B1","doi-asserted-by":"crossref","first-page":"661","DOI":"10.1038\/nature05911","article-title":"Genome-wide association study of 14,000 cases of seven common diseases and 3,000 shared controls","volume":"447","author":"Burton","year":"2007","journal-title":"Nature"},{"key":"2023062712381323500_btz333-B2","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1186\/s13742-015-0047-8","article-title":"Second-generation PLINK: rising to the challenge of larger and richer datasets","volume":"4","author":"Chang","year":"2015","journal-title":"GigaScience"},{"key":"2023062712381323500_btz333-B3","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1038\/nrc2294","article-title":"The properties of high-dimensional data spaces: implications for exploring gene and protein expression data","volume":"8","author":"Clarke","year":"2008","journal-title":"Nat. Rev. Cancer"},{"key":"2023062712381323500_btz333-B4","volume-title":"Elements of Information Theory","author":"Cover","year":"2006","edition":"2"},{"key":"2023062712381323500_btz333-B5","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1142\/S0219720005001004","article-title":"Minimum redundancy feature selection from microarray gene expression data","volume":"3","author":"Ding","year":"2005","journal-title":"J. Bioinform. Comput. Biol"},{"key":"2023062712381323500_btz333-B6","doi-asserted-by":"crossref","first-page":"407","DOI":"10.1214\/009053604000000067","article-title":"Least angle regression","volume":"32","author":"Efron","year":"2004","journal-title":"Ann. Statist"},{"key":"2023062712381323500_btz333-B7","volume-title":"Submodular Functions and Optimization","author":"Fujishige","year":"2005"},{"key":"2023062712381323500_btz333-B8","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1007\/11564089_7","article-title":"Measuring statistical dependence with Hilbert\u2013Schmidt norms","author":"Gretton","year":"2005","journal-title":"International Conference on Algorithmic Learning Theory (ALT), Singapore"},{"key":"2023062712381323500_btz333-B9","doi-asserted-by":"crossref","first-page":"333","DOI":"10.1038\/nature24489","article-title":"A single-cell survey of the small intestinal epithelium","volume":"551","author":"Haber","year":"2017","journal-title":"Nature"},{"key":"2023062712381323500_btz333-B10","doi-asserted-by":"crossref","first-page":"925","DOI":"10.1126\/science.aad7038","article-title":"Div-Seq: single-nucleus RNA-Seq reveals dynamics of rare adult newborn neurons","volume":"353","author":"Habib","year":"2016","journal-title":"Science"},{"key":"2023062712381323500_btz333-B11","doi-asserted-by":"crossref","first-page":"9975","DOI":"10.1523\/JNEUROSCI.2595-06.2006","article-title":"Targeted deletion of a single Sca8 ataxia locus allele in mice causes abnormal gait, progressive loss of motor coordination, and Purkinje cell dendritic deficits","volume":"26","author":"He","year":"2006","journal-title":"J. Neurosci"},{"key":"2023062712381323500_btz333-B12","doi-asserted-by":"crossref","first-page":"507","DOI":"10.1126\/science.273.5274.507","article-title":"The POU factor Oct-6 and Schwann cell differentiation","volume":"273","author":"Jaegle","year":"1996","journal-title":"Science"},{"key":"2023062712381323500_btz333-B13","first-page":"4237","article-title":"Statistical challenges of high-dimensional data","volume":"367","author":"Johnstone","year":"2009","journal-title":"Philos. Trans. Series A Math. Phys. Eng. Sci"},{"key":"2023062712381323500_btz333-B14","doi-asserted-by":"crossref","first-page":"1154","DOI":"10.2337\/db13-1301","article-title":"Expression of phosphofructokinase in skeletal muscle is influenced by genetic variation and associated with insulin sensitivity","volume":"63","author":"Keildson","year":"2014","journal-title":"Diabetes"},{"key":"2023062712381323500_btz333-B15","doi-asserted-by":"crossref","first-page":"94.","DOI":"10.1145\/3136625","article-title":"Feature selection: a data perspective","volume":"50","author":"Li","year":"2018","journal-title":"ACM Comp. Surveys"},{"key":"2023062712381323500_btz333-B16","first-page":"19","article-title":"Online learning for matrix factorization and sparse coding","volume":"11","author":"Mairal","year":"2010","journal-title":"J. Mach. Learn. Res"},{"key":"2023062712381323500_btz333-B17","author":"Peng","year":"2005"},{"key":"2023062712381323500_btz333-B18","doi-asserted-by":"crossref","first-page":"1226","DOI":"10.1109\/TPAMI.2005.159","article-title":"Feature selection based on mutual information: criteria of max-dependency, max-relevance, and min-redundancy","volume":"27","author":"Peng","year":"2005","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell"},{"key":"2023062712381323500_btz333-B19","doi-asserted-by":"crossref","first-page":"146.","DOI":"10.1186\/1471-2105-15-146","article-title":"GO2MSIG, an automated GO based multi-species gene set generator for gene set enrichment analysis","volume":"15","author":"Powell","year":"2014","journal-title":"BMC Bioinformatics"},{"key":"2023062712381323500_btz333-B20","doi-asserted-by":"crossref","first-page":"1009","DOI":"10.1111\/j.1467-9868.2009.00718.x","article-title":"Sparse additive models","volume":"71","author":"Ravikumar","year":"2009","journal-title":"J. R. Statist. Soc. Series B Statist. Methodol"},{"key":"2023062712381323500_btz333-B21","volume-title":"Learning with Kernels","author":"Sch\u00f6lkopf","year":"2002"},{"key":"2023062712381323500_btz333-B22","first-page":"1393","article-title":"Feature selection via dependence maximization","volume":"13","author":"Song","year":"2012","journal-title":"J. Mach. Learn. Res"},{"key":"2023062712381323500_btz333-B23","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","article-title":"Regression shrinkage and selection via the Lasso","volume":"58","author":"Tibshirani","year":"1996","journal-title":"J. R. Statist. Soc. Series B Methodol"},{"key":"2023062712381323500_btz333-B24","doi-asserted-by":"crossref","first-page":"716","DOI":"10.1016\/j.cell.2018.05.061","article-title":"Recovering gene interactions from single-cell data using data diffusion","volume":"174","author":"van Dijk","year":"2018","journal-title":"Cell"},{"key":"2023062712381323500_btz333-B25","doi-asserted-by":"crossref","first-page":"925","DOI":"10.1126\/science.aah4573","article-title":"Single-cell RNA-seq reveals new types of human blood dendritic cells, monocytes, and progenitors","volume":"356","author":"Villani","year":"2017","journal-title":"Science"},{"key":"2023062712381323500_btz333-B26","doi-asserted-by":"crossref","first-page":"389","DOI":"10.1007\/978-3-642-02962-2_49","volume-title":"Rough Sets and Knowledge Technology","author":"Walters-Williams","year":"2009"},{"key":"2023062712381323500_btz333-B27","doi-asserted-by":"crossref","first-page":"D146","DOI":"10.1093\/nar\/gku1104","article-title":"miRDB: an online resource for microRNA target prediction and functional annotations","volume":"43","author":"Wong","year":"2015","journal-title":"Nucleic Acids Res"},{"key":"2023062712381323500_btz333-B28","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1162\/NECO_a_00537","article-title":"High-dimensional feature selection by feature-wise kernelized lasso","volume":"26","author":"Yamada","year":"2014","journal-title":"Neural Computation"},{"key":"2023062712381323500_btz333-B29","doi-asserted-by":"crossref","first-page":"1352","DOI":"10.1109\/TKDE.2018.2789451","article-title":"Ultra high-dimensional nonlinear feature selection for big biological data","volume":"30","author":"Yamada","year":"2018","journal-title":"IEEE Trans. Knowl. Data Eng"},{"key":"2023062712381323500_btz333-B30","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1007\/s11222-016-9721-7","article-title":"Large-scale kernel methods for independence testing","volume":"28","author":"Zhang","year":"2018","journal-title":"Statist. Comput"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/35\/14\/i427\/50720630\/bioinformatics_35_14_i427.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/35\/14\/i427\/50720630\/bioinformatics_35_14_i427.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,17]],"date-time":"2024-07-17T16:39:57Z","timestamp":1721234397000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/35\/14\/i427\/5529193"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,7]]},"references-count":30,"journal-issue":{"issue":"14","published-print":{"date-parts":[[2019,7,15]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btz333","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/532192","asserted-by":"object"}]},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2019,7]]},"published":{"date-parts":[[2019,7]]}}}