{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:47:46Z","timestamp":1767340066466,"version":"3.40.3"},"publisher-location":"Cham","reference-count":50,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031263897"},{"type":"electronic","value":"9783031263903"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-26390-3_38","type":"book-chapter","created":{"date-parts":[[2023,3,16]],"date-time":"2023-03-16T09:04:46Z","timestamp":1678957486000},"page":"655-670","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Self-distilled Pruning of\u00a0Deep Neural Networks"],"prefix":"10.1007","author":[{"given":"James","family":"O\u2019 Neill","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sourav","family":"Dutta","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haytham","family":"Assem","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,3,17]]},"reference":[{"key":"38_CR1","doi-asserted-by":"crossref","unstructured":"Ahn, S., Hu, S.X., Damianou, A., Lawrence, N.D., Dai, Z.: Variational information distillation for knowledge transfer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9163\u20139171 (2019)","DOI":"10.1109\/CVPR.2019.00938"},{"key":"38_CR2","unstructured":"Allen-Zhu, Z., Li, Y.: Towards understanding ensemble, knowledge distillation and self-distillation in deep learning. arXiv preprint arXiv:2012.09816 (2020)"},{"issue":"3","key":"38_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3005348","volume":"13","author":"S Anwar","year":"2017","unstructured":"Anwar, S., Hwang, K., Sung, W.: Structured pruning of deep convolutional neural networks. ACM J. Emerg. Technol. Comput. Syst. (JETC) 13(3), 1\u201318 (2017)","journal-title":"ACM J. Emerg. Technol. Comput. Syst. (JETC)"},{"key":"38_CR4","unstructured":"Bartoldson, B.R., Morcos, A.S., Barbu, A., Erlebacher, G.: The generalization-stability tradeoff in neural network pruning. arXiv preprint arXiv:1906.03728 (2019)"},{"key":"38_CR5","doi-asserted-by":"crossref","unstructured":"Conneau, A., et al.: Unsupervised cross-lingual representation learning at scale. arXiv preprint arXiv:1911.02116 (2019)","DOI":"10.18653\/v1\/2020.acl-main.747"},{"key":"38_CR6","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"38_CR7","unstructured":"Dong, X., Chen, S., Pan, S.J.: Learning to prune deep neural networks via layer-wise optimal brain surgeon. arXiv preprint arXiv:1705.07565 (2017)"},{"issue":"2093","key":"38_CR8","first-page":"1203","volume":"464","author":"D Evans","year":"2008","unstructured":"Evans, D.: A computationally efficient estimator for mutual information. Proc. R. Soc. A Math. Phys. Eng. Sci. 464(2093), 1203\u20131215 (2008)","journal-title":"Proc. R. Soc. A Math. Phys. Eng. Sci."},{"key":"38_CR9","unstructured":"Furlanello, T., Lipton, Z., Tschannen, M., Itti, L., Anandkumar, A.: Born again neural networks. In: International Conference on Machine Learning, pp. 1607\u20131616. PMLR (2018)"},{"key":"38_CR10","unstructured":"Han, S., Mao, H., Dally, W.: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint (2015)"},{"key":"38_CR11","unstructured":"Han, S., Pool, J., Tran, J., Dally, W.J.: Learning both weights and connections for efficient neural networks. arXiv preprint arXiv:1506.02626 (2015)"},{"key":"38_CR12","unstructured":"Hassibi, B., Stork, D.G.: Second Order Derivatives for Network Pruning: Optimal Brain Surgeon. Morgan Kaufmann (1993)"},{"key":"38_CR13","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)"},{"key":"38_CR14","unstructured":"Huang, Z., Wang, N.: Like what you like: knowledge distill via neuron selectivity transfer. arXiv preprint arXiv:1707.01219 (2017)"},{"key":"38_CR15","doi-asserted-by":"crossref","unstructured":"Jiao, X., et al.: TinyBERT: distilling bert for natural language understanding. arXiv preprint arXiv:1909.10351 (2019)","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"issue":"2","key":"38_CR16","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1109\/72.80236","volume":"1","author":"ED Karnin","year":"1990","unstructured":"Karnin, E.D.: A simple procedure for pruning back-propagation trained neural networks. IEEE Trans. Neural Netw. 1(2), 239\u2013242 (1990)","journal-title":"IEEE Trans. Neural Netw."},{"issue":"6","key":"38_CR17","doi-asserted-by":"publisher","first-page":"066138","DOI":"10.1103\/PhysRevE.69.066138","volume":"69","author":"A Kraskov","year":"2004","unstructured":"Kraskov, A., St\u00f6gbauer, H., Grassberger, P.: Estimating mutual information. Phys. Rev. E 69(6), 066138 (2004)","journal-title":"Phys. Rev. E"},{"key":"38_CR18","doi-asserted-by":"crossref","unstructured":"Lebedev, V., Lempitsky, V.: Fast convnets using group-wise brain damage. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2554\u20132564 (2016)","DOI":"10.1109\/CVPR.2016.280"},{"key":"38_CR19","unstructured":"LeCun, Y., Denker, J.S., Solla, S.A.: Optimal brain damage. In: Advances in Neural Information Processing Systems, pp. 598\u2013605 (1990)"},{"key":"38_CR20","unstructured":"Lee, J., Park, S., Mo, S., Ahn, S., Shin, J.: Layer-adaptive sparsity for the magnitude-based pruning. In: International Conference on Learning Representations (2020)"},{"key":"38_CR21","unstructured":"Li, H., Kadav, A., Durdanovic, I., Samet, H., Graf, H.P.: Pruning filters for efficient convnets. arXiv preprint arXiv:1608.08710 (2016)"},{"key":"38_CR22","doi-asserted-by":"crossref","unstructured":"Liang, Y., et al.: XGLUE: a new benchmark dataset for cross-lingual pre-training, understanding and generation. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 6008\u20136018 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.484"},{"key":"38_CR23","doi-asserted-by":"crossref","unstructured":"Liu, Z., Li, J., Shen, Z., Huang, G., Yan, S., Zhang, C.: Learning efficient convolutional networks through network slimming. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2736\u20132744 (2017)","DOI":"10.1109\/ICCV.2017.298"},{"key":"38_CR24","unstructured":"Louizos, C., Welling, M., Kingma, D.P.: Learning sparse neural networks through $$ l_0 $$ regularization. arXiv preprint arXiv:1712.01312 (2017)"},{"key":"38_CR25","doi-asserted-by":"crossref","unstructured":"Mallya, A., Davis, D., Lazebnik, S.: Piggyback: adapting a single network to multiple tasks by learning to mask weights. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 67\u201382 (2018)","DOI":"10.1007\/978-3-030-01225-0_5"},{"key":"38_CR26","unstructured":"Martens, J., Grosse, R.: Optimizing neural networks with kronecker-factored approximate curvature. In: International Conference on Machine Learning, pp. 2408\u20132417. PMLR (2015)"},{"key":"38_CR27","unstructured":"Mobahi, H., Farajtabar, M., Bartlett, P.L.: Self-distillation amplifies regularization in hilbert space. arXiv preprint arXiv:2002.05715 (2020)"},{"key":"38_CR28","unstructured":"Molchanov, D., Ashukha, A., Vetrov, D.: Variational dropout sparsifies deep neural networks. In: International Conference on Machine Learning, pp. 2498\u20132507. PMLR (2017)"},{"key":"38_CR29","unstructured":"Mozer, M.C., Smolensky, P.: Skeletonization: a technique for trimming the fat from a network via relevance assessment. In: Advances in Neural Information Processing Systems, pp. 107\u2013115 (1989)"},{"key":"38_CR30","unstructured":"Neill, J.O., Bollegala, D.: Semantically-conditioned negative samples for efficient contrastive learning. arXiv preprint arXiv:2102.06603 (2021)"},{"key":"38_CR31","unstructured":"Park, S., Lee, J., Mo, S., Shin, J.: Lookahead: a far-sighted alternative of magnitude-based pruning. arXiv preprint arXiv:2002.04809 (2020)"},{"key":"38_CR32","doi-asserted-by":"crossref","unstructured":"Park, W., Kim, D., Lu, Y., Cho, M.: Relational knowledge distillation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3967\u20133976 (2019)","DOI":"10.1109\/CVPR.2019.00409"},{"issue":"5","key":"38_CR33","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1109\/72.248452","volume":"4","author":"R Reed","year":"1993","unstructured":"Reed, R.: Pruning algorithms-a survey. IEEE Trans. Neural Netw. 4(5), 740\u2013747 (1993)","journal-title":"IEEE Trans. Neural Netw."},{"key":"38_CR34","unstructured":"Romero, A., Ballas, N., Kahou, S.E., Chassang, A., Gatta, C., Bengio, Y.: FitNets: hints for thin deep nets. arXiv preprint arXiv:1412.6550 (2014)"},{"key":"38_CR35","unstructured":"Sanh, V., Debut, L., Chaumond, J., Wolf, T.: DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108 (2019)"},{"key":"38_CR36","unstructured":"Sanh, V., Wolf, T., Rush, A.M.: Movement pruning: adaptive sparsity by fine-tuning. arXiv preprint arXiv:2005.07683 (2020)"},{"key":"38_CR37","unstructured":"Singh, S.P., Alistarh, D.: WoodFisher: efficient second-order approximations for model compression. arXiv preprint arXiv:2004.14340 (2020)"},{"key":"38_CR38","unstructured":"Stanton, S., Izmailov, P., Kirichenko, P., Alemi, A.A., Wilson, A.G.: Does knowledge distillation really work? arXiv preprint arXiv:2106.05945 (2021)"},{"key":"38_CR39","unstructured":"Tian, Y., Krishnan, D., Isola, P.: Contrastive representation distillation. arXiv preprint arXiv:1910.10699 (2019)"},{"key":"38_CR40","unstructured":"Turc, I., Chang, M.W., Lee, K., Toutanova, K.: Well-read students learn better: on the importance of pre-training compact models. arXiv preprint arXiv:1908.08962 (2019)"},{"key":"38_CR41","unstructured":"Ver Steeg, G.: Non-parametric entropy estimation toolbox (NPEET). Technical report (2000). https:\/\/www.isi.edu\/~gregv\/npeet_doc.pdf"},{"key":"38_CR42","doi-asserted-by":"crossref","unstructured":"Ver Steeg, G., Galstyan, A.: Information-theoretic measures of influence based on content dynamics. In: Proceedings of the Sixth ACM International Conference on Web Search and Data Mining, pp. 3\u201312 (2013)","DOI":"10.1145\/2433396.2433400"},{"key":"38_CR43","doi-asserted-by":"crossref","unstructured":"Wang, A., Singh, A., Michael, J., Hill, F., Levy, O., Bowman, S.R.: GLUE: a multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461 (2018)","DOI":"10.18653\/v1\/W18-5446"},{"key":"38_CR44","unstructured":"Wang, C., Grosse, R., Fidler, S., Zhang, G.: EigenDamage: structured pruning in the kronecker-factored eigenbasis. In: International Conference on Machine Learning, pp. 6566\u20136575. PMLR (2019)"},{"key":"38_CR45","unstructured":"Wen, W., Wu, C., Wang, Y., Chen, Y., Li, H.: Learning structured sparsity in deep neural networks. arXiv preprint arXiv:1608.03665 (2016)"},{"key":"38_CR46","doi-asserted-by":"crossref","unstructured":"Yang, C., Xie, L., Qiao, S., Yuille, A.L.: Training deep neural networks in generations: a more tolerant teacher educates better students. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 5628\u20135635 (2019)","DOI":"10.1609\/aaai.v33i01.33015628"},{"key":"38_CR47","unstructured":"Ye, J., Lu, X., Lin, Z., Wang, J.Z.: Rethinking the smaller-norm-less-informative assumption in channel pruning of convolution layers. arXiv preprint arXiv:1802.00124 (2018)"},{"issue":"1","key":"38_CR48","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1111\/j.1467-9868.2005.00532.x","volume":"68","author":"M Yuan","year":"2006","unstructured":"Yuan, M., Lin, Y.: Model selection and estimation in regression with grouped variables. J. R. Stat. Soc. Ser. B (Stat. Methodol.) 68(1), 49\u201367 (2006)","journal-title":"J. R. Stat. Soc. Ser. B (Stat. Methodol.)"},{"key":"38_CR49","unstructured":"Zbontar, J., Jing, L., Misra, I., LeCun, Y., Deny, S.: Barlow twins: self-supervised learning via redundancy reduction. arXiv preprint arXiv:2103.03230 (2021)"},{"key":"38_CR50","unstructured":"Zhu, M., Gupta, S.: To prune, or not to prune: exploring the efficacy of pruning for model compression. arXiv preprint arXiv:1710.01878 (2017)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-26390-3_38","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,13]],"date-time":"2023-10-13T07:09:49Z","timestamp":1697180989000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-26390-3_38"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031263897","9783031263903"],"references-count":50,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-26390-3_38","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"17 March 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Grenoble","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"France","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2022.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1060","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"236","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"22% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17 demo track papers have been accepted from 28 submissions","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}