{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T08:41:06Z","timestamp":1772095266610,"version":"3.50.1"},"reference-count":90,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Inform. Theory"],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1109\/tit.2024.3414266","type":"journal-article","created":{"date-parts":[[2024,6,13]],"date-time":"2024-06-13T17:42:40Z","timestamp":1718300560000},"page":"6572-6595","source":"Crossref","is-referenced-by-count":4,"title":["Data-Dependent Generalization Bounds via Variable-Size Compressibility"],"prefix":"10.1109","volume":"70","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3576-9552","authenticated-orcid":false,"given":"Milad","family":"Sefidgaran","sequence":"first","affiliation":[{"name":"Paris Research Center, Huawei Technologies France, Boulogne-Billancourt, France"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2023-9476","authenticated-orcid":false,"given":"Abdellatif","family":"Zaidi","sequence":"additional","affiliation":[{"name":"Paris Research Center, Huawei Technologies France, Boulogne-Billancourt, France"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781107298019"},{"key":"ref2","first-page":"1232","article-title":"Controlling bias in adaptive data analysis using information theory","volume-title":"Proc. 19th Int. Conf. Artif. Intell. Statist.","volume":"51","author":"Russo"},{"key":"ref3","article-title":"Information-theoretic analysis of generalization capability of learning algorithms","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Xu"},{"key":"ref4","first-page":"3437","article-title":"Reasoning about generalization via conditional mutual information","volume-title":"Proc. Conf. Learn. Theory","author":"Steinke"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2021.3085190"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/JSAIT.2020.2991139"},{"key":"ref7","first-page":"26370","article-title":"Towards a unified information-theoretic framework for generalization","volume-title":"Proc. 35th Conf. Neural Inf. Process. Syst.","author":"Haghifam"},{"key":"ref8","first-page":"3526","article-title":"Information-theoretic generalization bounds for stochastic gradient descent","volume-title":"Proc. Conf. Learn. Theory. PMLR","author":"Neu"},{"key":"ref9","first-page":"8106","article-title":"An exact characterization of the generalization error for the Gibbs algorithm","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Aminian"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2022.3144615"},{"key":"ref11","first-page":"3524","article-title":"Generalization bounds via convex analysis","volume-title":"Proc. Conf. Learn. Theory","author":"Lugosi"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2023.3268527"},{"key":"ref13","volume-title":"Relating Data Compression and Learnability","author":"Littlestone","year":"1986"},{"key":"ref14","first-page":"489","article-title":"A sharp lower bound for agnostic learning with sample compression schemes","volume-title":"Proc. 30th Int. Conf. Algorithmic Learn. Theory","volume":"98","author":"Hanneke"},{"key":"ref15","first-page":"466","article-title":"Sample compression for real-valued learners","volume-title":"Proc. 30th Int. Conf. Algorithmic Learn. Theory","author":"Hanneke"},{"key":"ref16","first-page":"582","article-title":"Proper learning, Helly number, and an optimal SVM bound","volume-title":"Proc. 33rd Annu. Conf. Comput. Learn. Theory","volume":"125","author":"Bousquet"},{"key":"ref17","first-page":"697","article-title":"Stable sample compression schemes: New applications and an optimal SVM margin bound","volume-title":"Proc. 32nd Int. Conf. Algorithmic Learn. Theory (PMLR)","author":"Hanneke"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ITA50056.2020.9244988"},{"key":"ref19","first-page":"662","article-title":"Learning with metric losses","volume-title":"Proc. 35th Conf. Learn. Theory","volume":"178","author":"Cohen"},{"key":"ref20","first-page":"254","article-title":"Stronger generalization bounds for deep nets via a compression approach","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","volume":"80","author":"Arora"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/393"},{"key":"ref22","article-title":"Generalization bounds via distillation","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Hsu"},{"key":"ref23","first-page":"29364","article-title":"Heavy tails in SGD and compressibility of overparametrized neural networks","volume-title":"Proc. 35th Conf. Neural Inf. Process. Syst.","author":"Barsbey"},{"key":"ref24","first-page":"4416","article-title":"Rate-distortion theoretic generalization bounds for stochastic learning algorithms","volume-title":"Proc. Conf. Learn. Theory","author":"Sefidgaran"},{"key":"ref25","first-page":"5138","article-title":"Hausdorff dimension, heavy tails, and generalization in neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"\u015eim\u015fekli"},{"key":"ref26","first-page":"6776","article-title":"Intrinsic dimension, persistent homology and generalization in neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Birdal"},{"key":"ref27","first-page":"8774","article-title":"Generalization bounds using lower tail exponents in stochastic optimizers","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hodgkinson"},{"key":"ref28","first-page":"26590","article-title":"Chaotic regularization and heavy-tailed limits for deterministic gradient descent","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Lim"},{"key":"ref29","first-page":"18774","article-title":"Fractal structure and generalization properties of stochastic optimization algorithms","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Camuto"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/279943.279989"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/307400.307435"},{"issue":"2","key":"ref32","first-page":"233","article-title":"PAC-Bayesian generalisation error bounds for Gaussian process classification","volume":"3","author":"Seeger","year":"2003","journal-title":"J. Mach. Learn. Res."},{"key":"ref33","article-title":"(Not) bounding the true error","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"14","author":"Langford"},{"key":"ref34","first-page":"2","article-title":"A PAC-Bayesian approach to adaptive classification","volume":"840","author":"Catoni","year":"2003","journal-title":"Preprint"},{"key":"ref35","article-title":"A note on the PAC Bayesian theorem","author":"Maurer","year":"2004","journal-title":"arXiv:cs\/0411099"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553419"},{"key":"ref37","article-title":"PAC-Bayes-empirical-Bernstein inequality","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"26","author":"Tolstikhin"},{"key":"ref38","first-page":"435","article-title":"PAC-Bayesian bounds based on the R\u00e9nyi divergence","volume-title":"Proc. Artif. Intell. Statist.","author":"B\u00e9gin"},{"key":"ref39","first-page":"466","article-title":"A strongly quasiconvex PAC-Bayesian bound","volume-title":"Proc. Int. Conf. Algorithmic Learn. Theory","author":"Thiemann"},{"key":"ref40","article-title":"Computing nonvacuous generalization bounds for deep (stochastic) neural networks with many more parameters than training data","author":"Dziugaite","year":"2017","journal-title":"arXiv:1703.11008"},{"key":"ref41","article-title":"A PAC-Bayesian approach to spectrally-normalized margin bounds for neural networks","volume-title":"Proc. Int. Conf. Learn. Represent","author":"Neyshabur"},{"key":"ref42","first-page":"16833","article-title":"PAC-Bayes analysis beyond the usual bounds","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Rivasplata"},{"key":"ref43","first-page":"7263","article-title":"In defense of uniform convergence: Generalization via derandomization with an application to interpolating predictors","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Negrea"},{"key":"ref44","article-title":"Information-theoretic generalization bounds for SGLD via data-dependent estimates","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Negrea"},{"key":"ref45","article-title":"A general framework for the practical disintegration of PAC-Bayesian bounds","author":"Viallard","year":"2021","journal-title":"arXiv:2102.08649"},{"key":"ref46","first-page":"163","volume-title":"PAC-Bayesian Supervised Classification","volume":"56","author":"Catoni","year":"2008"},{"key":"ref47","article-title":"User-friendly introduction to PAC-Bayes bounds","author":"Alquier","year":"2021","journal-title":"arXiv:2110.11216"},{"key":"ref48","article-title":"Non-vacuous generalization bounds at the ImageNet scale: A PAC-Bayesian compression approach","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Zhou"},{"key":"ref49","first-page":"31459","article-title":"PAC-Bayes compression bounds so tight that they can explain generalization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Lotfi"},{"key":"ref50","article-title":"A unified framework for information-theoretic generalization bounds","author":"Chu","year":"2023","journal-title":"arXiv:2305.11042"},{"key":"ref51","volume-title":"Elements of Information Theory","author":"Cover","year":"2006"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139030687"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511921889"},{"key":"ref54","first-page":"7","article-title":"Lecture notes on information theory","volume":"6","author":"Polyanskiy","year":"2014","journal-title":"Lecture Notes ECE (UIUC)"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.3390\/e23101255"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-72927-3_10"},{"key":"ref57","first-page":"24670","article-title":"Information-theoretic generalization bounds for black-box learning algorithms","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Harutyunyan"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-45167-9_26"},{"key":"ref59","first-page":"663","article-title":"Limitations of information-theoretic generalization bounds for gradient descent methods in stochastic convex optimization","volume-title":"Proc. Int. Conf. Algorithmic Learn. Theory","author":"Haghifam"},{"key":"ref60","first-page":"3709","article-title":"On margins and derandomisation in PAC-Bayes","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Biggs"},{"key":"ref61","article-title":"A primer on PAC-Bayesian learning","author":"Guedj","year":"2019","journal-title":"arXiv:1901.05353"},{"key":"ref62","article-title":"Generalisation under gradient descent via deterministic PAC-Bayes","author":"Clerico","year":"2022","journal-title":"arXiv:2209.02525"},{"key":"ref63","article-title":"Bounds for Averaging Classifiers","volume-title":"School of Computer Science, Carnegie Mellon University","author":"Langford","year":"2001"},{"key":"ref64","article-title":"PAC-Bayes & margins","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"15","author":"Langford"},{"key":"ref65","article-title":"Three factors influencing minima in SGD","author":"Jastrz\u0229bski","year":"2017","journal-title":"arXiv:1711.04623"},{"key":"ref66","article-title":"The break-even point on optimization trajectories of deep neural networks","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Jastrzebski"},{"key":"ref67","first-page":"4772","article-title":"Catastrophic Fisher explosion: Early phase Fisher matrix impacts generalization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jastrzebski"},{"issue":"1","key":"ref68","first-page":"7479","article-title":"Implicit self-regularization in deep neural networks: Evidence from random matrix theory and implications for learning","volume":"22","author":"Martin","year":"2021","journal-title":"J. Mach. Learn. Res."},{"key":"ref69","article-title":"A walk with SGD","author":"Xing","year":"2018","journal-title":"arXiv:1802.08770"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2012.2186786"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2019.2919718"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/ISIT.2016.7541665"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2019.2922186"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1007\/BF02063299"},{"key":"ref75","article-title":"Universal compressed sensing","author":"Jalali","year":"2014","journal-title":"arXiv:1406.7807"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2018.2806219"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/ISIT50566.2022.9834845"},{"key":"ref78","article-title":"Learning multiple layers of features from tiny images","volume-title":"Univ. Toronto","author":"Krizhevsky","year":"2009"},{"key":"ref79","article-title":"How many samples are needed to estimate a convolutional neural network?","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Du"},{"key":"ref80","article-title":"Why are convolutional nets more sample-efficient than fully-connected nets?","volume-title":"Proc. Int. Conf. Learn. Represent","author":"Li"},{"key":"ref81","article-title":"Theoretical analysis of the inductive biases in deep convolutional networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Wang"},{"key":"ref82","article-title":"Minimum description length and generalization guarantees for representation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Sefidgaran"},{"key":"ref83","article-title":"Lessons from generalization error analysis of federated learning: You may communicate less often!","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Sefidgaran"},{"key":"ref84","article-title":"Information theoretic lower bounds for information theoretic upper bounds","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Livni"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.5555\/3454287.3455008"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1017\/9781108627771"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-7091-2928-9_1"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1137\/130939730"},{"key":"ref89","first-page":"4381","article-title":"Stability of stochastic gradient descent on nonsmooth convex losses","volume-title":"Proc. Int. Conf. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Bassily"},{"key":"ref90","first-page":"63","article-title":"SGD generalizes better than GD (and regularization doesn\u2019t help)","volume-title":"Proc. Conf. Learn. Theory","author":"Amir"}],"container-title":["IEEE Transactions on Information Theory"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/18\/10642977\/10556630.pdf?arnumber=10556630","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,31]],"date-time":"2024-08-31T04:39:10Z","timestamp":1725079150000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10556630\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9]]},"references-count":90,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tit.2024.3414266","relation":{},"ISSN":["0018-9448","1557-9654"],"issn-type":[{"value":"0018-9448","type":"print"},{"value":"1557-9654","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9]]}}}