{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T09:04:56Z","timestamp":1775552696320,"version":"3.50.1"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2024,8,14]],"date-time":"2024-08-14T00:00:00Z","timestamp":1723593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,8,14]],"date-time":"2024-08-14T00:00:00Z","timestamp":1723593600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172143"],"award-info":[{"award-number":["62172143"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key R &D Program of China","doi-asserted-by":"crossref","award":["2020YFB1710200"],"award-info":[{"award-number":["2020YFB1710200"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s40747-024-01572-3","type":"journal-article","created":{"date-parts":[[2024,8,14]],"date-time":"2024-08-14T19:02:00Z","timestamp":1723662120000},"page":"8043-8061","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["A multi-level collaborative self-distillation learning for improving adaptive inference efficiency"],"prefix":"10.1007","volume":"10","author":[{"given":"Likun","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Jinbao","family":"Li","sequence":"additional","affiliation":[]},{"given":"Benqian","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yahong","family":"Guo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,14]]},"reference":[{"issue":"6","key":"1572_CR1","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) Imagenet classification with deep convolutional neural networks. Commun ACM 60(6):84\u201390. https:\/\/doi.org\/10.1145\/3065386","journal-title":"Commun ACM"},{"key":"1572_CR2","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. CoRR arXiv:1409.1556"},{"key":"1572_CR3","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P (2015) Techniques satcdmCS many new distillation methods have been\u00a0designed. In: Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (eds) Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"1572_CR4","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"1572_CR5","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der\u00a0Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4700\u20134708","DOI":"10.1109\/CVPR.2017.243"},{"key":"1572_CR6","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez A.N, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. In: Advances in neural information processing systems, pp 6000\u20136010"},{"key":"1572_CR7","doi-asserted-by":"crossref","unstructured":"Liu Z, Li J, Shen Z, Huang G, Yan S, Zhang C (2017) Learning efficient convolutional networks through network slimming. In: Proceedings of the IEEE international conference on computer vision, pp 2736\u20132744","DOI":"10.1109\/ICCV.2017.298"},{"key":"1572_CR8","unstructured":"Liu S, Chen T, Chen X, Shen L, Mocanu DC, Wang Z, Pechenizkiy M (2022) The unreasonable effectiveness of random pruning: return of the most naive baseline for sparse training. In: International conference on learning representations"},{"key":"1572_CR9","doi-asserted-by":"crossref","unstructured":"Wimmer P, Mehnert J, Condurache A (2022) Interspace pruning: using adaptive filter representations to improve training of sparse CNNs. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12527\u201312537","DOI":"10.1109\/CVPR52688.2022.01220"},{"key":"1572_CR10","first-page":"9","volume":"1050","author":"G Hinton","year":"2015","unstructured":"Hinton G, Vinyals O, Dean J (2015) Distilling the knowledge in a neural network. Stat 1050:9","journal-title":"Stat"},{"key":"1572_CR11","unstructured":"Romero A, Ballas N, Kahou SE, Chassang A, Gatta C, Bengio Y (2014) Fitnets: hints for thin deep nets. CoRR arXiv:1412.6550"},{"key":"1572_CR12","doi-asserted-by":"crossref","unstructured":"Chen P, Liu S, Zhao H, Jia J (2021) Distilling knowledge via knowledge review. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5008\u20135017","DOI":"10.1109\/CVPR46437.2021.00497"},{"key":"1572_CR13","doi-asserted-by":"crossref","unstructured":"Zhao B, Cui Q, Song R, Qiu Y, Liang J (2022) Decoupled knowledge distillation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11953\u201311962","DOI":"10.1109\/CVPR52688.2022.01165"},{"key":"1572_CR14","doi-asserted-by":"publisher","DOI":"10.1007\/s40747-023-01036-0","author":"R Wang","year":"2023","unstructured":"Wang R, Wan S, Zhang W, Zhang C, Li Y, Xu S, Zhang L, Jin X, Jiang Z, Rao Y (2023) Progressive multi-level distillation learning for pruning network. Complex Intell Syst. https:\/\/doi.org\/10.1007\/s40747-023-01036-0","journal-title":"Complex Intell Syst"},{"key":"1572_CR15","unstructured":"Hubara I, Courbariaux M, Soudry D, El-Yaniv R, Bengio Y (2016) Binarized neural networks. In: Advances in neural information processing systems, pp 4114\u20134122"},{"key":"1572_CR16","unstructured":"Wei X, Gong R, Li Y, Liu X, Yu F (2022) Qdrop: randomly dropping quantization for extremely low-bit post-training quantization. In: International conference on learning representations"},{"key":"1572_CR17","doi-asserted-by":"crossref","unstructured":"Shang Y, Xu D, Zong Z, Nie L, Yan Y (2022) Network binarization via contrastive learning. In: Computer vision\u2013ECCV 2022: 17th European conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XI. Springer, pp 586\u2013602","DOI":"10.1007\/978-3-031-20083-0_35"},{"key":"1572_CR18","unstructured":"Huang G, Chen D (2018) Multi-scale dense networks for resource efficient image classification. In: International conference on learning representations"},{"key":"1572_CR19","doi-asserted-by":"crossref","unstructured":"Li H, Zhang H, Qi X, Yang R, Huang G (2019) Improved techniques for training adaptive deep networks. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 1891\u20131900","DOI":"10.1109\/ICCV.2019.00198"},{"key":"1572_CR20","doi-asserted-by":"crossref","unstructured":"Yang L, Han Y, Chen X, Song S, Dai J, Huang G (2020) Resolution adaptive networks for efficient inference. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2369\u20132378","DOI":"10.1109\/CVPR42600.2020.00244"},{"key":"1572_CR21","doi-asserted-by":"crossref","unstructured":"Zhang L, Song J, Gao A, Chen J, Bao C, Ma K (2019) Be your own teacher: improve the performance of convolutional neural networks via self distillation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 3713\u20133722","DOI":"10.1109\/ICCV.2019.00381"},{"key":"1572_CR22","doi-asserted-by":"crossref","unstructured":"Wang X, Yu F, Dou Z-Y, Darrell T, Gonzalez JE (2018) Skipnet: learning dynamic routing in convolutional networks. In: Proceedings of the European conference on computer vision (ECCV), pp 409\u2013424","DOI":"10.1007\/978-3-030-01261-8_25"},{"key":"1572_CR23","unstructured":"Zhang L, Tan Z, Song J, Chen J, Bao C, Ma K (2019) Scan: a scalable neural networks framework towards compact and efficient models. In: Advances in neural information processing Systems, pp 4027\u20134036"},{"key":"1572_CR24","doi-asserted-by":"crossref","unstructured":"Teerapittayanon S, McDanel B, Kung H-T (2016) Branchynet: fast inference via early exiting from deep neural networks. In: 2016 23rd International conference on pattern recognition (ICPR), pp 2464\u20132469","DOI":"10.1109\/ICPR.2016.7900006"},{"key":"1572_CR25","unstructured":"Iandola FN, Han S, Moskewicz MW, Ashraf K, Dally WJ, Keutzer K (2016) Squeezenet: Alexnet-level accuracy with 50x fewer parameters and $$< 0.5$$ mb model size. CoRR arXiv:1602.07360"},{"key":"1572_CR26","doi-asserted-by":"crossref","unstructured":"Sandler M, Howard A, Zhu M, Zhmoginov A, Chen L-C (2018) Mobilenetv2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4510\u20134520","DOI":"10.1109\/CVPR.2018.00474"},{"key":"1572_CR27","doi-asserted-by":"crossref","unstructured":"Ma N, Zhang X, Zheng H-T, Sun J (2018) Shufflenet v2: practical guidelines for efficient CNN architecture design. In: Proceedings of the European conference on computer vision (ECCV), pp 116\u2013131","DOI":"10.1007\/978-3-030-01264-9_8"},{"key":"1572_CR28","doi-asserted-by":"crossref","unstructured":"Mehta S, Rastegari M, Caspi A, Shapiro L, Hajishirzi H (2018) Espnet: efficient spatial pyramid of dilated convolutions for semantic segmentation. In: Proceedings of the European conference on computer vision (ECCV), pp 552\u2013568","DOI":"10.1007\/978-3-030-01249-6_34"},{"key":"1572_CR29","doi-asserted-by":"crossref","unstructured":"Han K, Wang Y, Tian Q, Guo J, Xu C, Xu C (2020) Ghostnet: more features from cheap operations. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1580\u20131589","DOI":"10.1109\/CVPR42600.2020.00165"},{"issue":"11","key":"1572_CR30","doi-asserted-by":"publisher","first-page":"7436","DOI":"10.1109\/TPAMI.2021.3117837","volume":"44","author":"Y Han","year":"2021","unstructured":"Han Y, Huang G, Song S, Yang L, Wang H, Wang Y (2021) Dynamic neural networks: a survey. IEEE Trans Pattern Anal Mach Intell 44(11):7436\u20137456. https:\/\/doi.org\/10.1109\/TPAMI.2021.3117837","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1572_CR31","unstructured":"Denton EL, Zaremba W, Bruna J, LeCun Y, Fergus R (2014) Exploiting linear structure within convolutional networks for efficient evaluation. In: Advances in neural information processing systems, pp 1269\u20131277"},{"key":"1572_CR32","first-page":"28","volume":"1050","author":"K Cho","year":"2014","unstructured":"Cho K, Bengio Y (2014) Exponentially increasing the capacity-to-computation ratio for conditional computation in deep learning. Stat 1050:28","journal-title":"Stat"},{"key":"1572_CR33","doi-asserted-by":"crossref","unstructured":"Park E, Kim D, Kim S, Kim Y-D, Kim G, Yoon S, Yoo S (2015) Big\/little deep neural network for ultra low power inference. In: 2015 International conference on hardware\/software codesign and system synthesis (codes+ Isss), pp 124\u2013132","DOI":"10.1109\/CODESISSS.2015.7331375"},{"key":"1572_CR34","unstructured":"Bolukbasi T, Wang J, Dekel O, Saligrama V (2017) Adaptive neural networks for efficient inference. In: International conference on machine learning, pp 527\u2013536"},{"issue":"1","key":"1572_CR35","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1162\/neco.1991.3.1.79","volume":"3","author":"RA Jacobs","year":"1991","unstructured":"Jacobs RA, Jordan MI, Nowlan SJ, Hinton GE (1991) Adaptive mixtures of local experts. Neural Comput 3(1):79\u201387","journal-title":"Neural Comput"},{"key":"1572_CR36","unstructured":"Mullapudi RT, Mark WR, Shazeer N, Fatahalian K (2018) Hydranets: specialized dynamic architectures for efficient inference. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 8080\u20138089"},{"key":"1572_CR37","doi-asserted-by":"publisher","first-page":"1789","DOI":"10.1007\/s11263-021-01453-z","volume":"129","author":"J Gou","year":"2021","unstructured":"Gou J, Yu B, Maybank SJ, Tao D (2021) Knowledge distillation: a survey. Int J Comput Vis 129:1789\u20131819","journal-title":"Int J Comput Vis"},{"key":"1572_CR38","unstructured":"Komodakis N, Zagoruyko S (2017) Paying more attention to attention: improving the performance of convolutional neural networks via attention transfer. In: ICLR"},{"key":"1572_CR39","doi-asserted-by":"crossref","unstructured":"Yim J, Joo D, Bae J, Kim J (2017) A gift from knowledge distillation: fast optimization, network minimization and transfer learning. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4133\u20134141","DOI":"10.1109\/CVPR.2017.754"},{"key":"1572_CR40","doi-asserted-by":"crossref","unstructured":"Peng B, Jin X, Liu J, Li D, Wu Y, Liu Y, Zhou S, Zhang Z (2019) Correlation congruence for knowledge distillation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 5007\u20135016","DOI":"10.1109\/ICCV.2019.00511"},{"key":"1572_CR41","doi-asserted-by":"crossref","unstructured":"Tung F, Mori G (2019) Similarity-preserving knowledge distillation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 1365\u20131374","DOI":"10.1109\/ICCV.2019.00145"},{"key":"1572_CR42","doi-asserted-by":"crossref","unstructured":"Park W, Kim D, Lu Y, Cho M (2019) Relational knowledge distillation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3967\u20133976","DOI":"10.1109\/CVPR.2019.00409"},{"key":"1572_CR43","doi-asserted-by":"crossref","unstructured":"Phuong M, Lampert CH (2019) Distillation-based training for multi-exit architectures. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 1355\u20131364","DOI":"10.1109\/ICCV.2019.00144"},{"key":"1572_CR44","unstructured":"Zhu X, Gong S et al (2018) Knowledge distillation by on-the-fly native ensemble. Adv Neural Inf Process Syst 31"},{"key":"1572_CR45","unstructured":"Krizhevsky A et al (2009) Learning multiple layers of features from tiny images"},{"issue":"3","key":"1572_CR46","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M et al (2015) Imagenet large scale visual recognition challenge. Int J Comput Vis 115(3):211\u2013252","journal-title":"Int J Comput Vis"},{"key":"1572_CR47","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2023.110867","volume":"148","author":"H Nasiri","year":"2023","unstructured":"Nasiri H, Ebadzadeh MM (2023) Multi-step-ahead stock price prediction using recurrent fuzzy neural network and variational mode decomposition. Appl Soft Comput 148:110867. https:\/\/doi.org\/10.1016\/j.asoc.2023.110867","journal-title":"Appl Soft Comput"},{"key":"1572_CR48","doi-asserted-by":"crossref","unstructured":"You S, Xu C, Xu C, Tao D (2017) Learning from multiple teacher networks. In: Proceedings of the 23rd ACM SIGKDD international conference on knowledge discovery and data mining, pp 1285\u20131294","DOI":"10.1145\/3097983.3098135"},{"key":"1572_CR49","doi-asserted-by":"crossref","unstructured":"Kwon K, Na H, Lee H, Kim NS (2020) Adaptive knowledge distillation based on entropy. In: ICASSP 2020-2020 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 7409\u20137413","DOI":"10.1109\/ICASSP40776.2020.9054698"},{"key":"1572_CR50","doi-asserted-by":"crossref","unstructured":"Zhang H, Chen D, Wang C (2022) Confidence-aware multi-teacher knowledge distillation. In: ICASSP 2022-2022 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 4498\u20134502","DOI":"10.1109\/ICASSP43922.2022.9747534"},{"key":"1572_CR51","unstructured":"Tian Y, Krishnan D, Isola P (2020) Contrastive representation distillation. In: International conference on learning representations"},{"key":"1572_CR52","doi-asserted-by":"crossref","unstructured":"Yuan L, Tay FE, Li G, Wang T, Feng J (2020) Revisiting knowledge distillation via label smoothing regularization. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3903\u20133911","DOI":"10.1109\/CVPR42600.2020.00396"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01572-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-024-01572-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01572-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,16]],"date-time":"2024-10-16T22:18:52Z","timestamp":1729117132000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-024-01572-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,14]]},"references-count":52,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["1572"],"URL":"https:\/\/doi.org\/10.1007\/s40747-024-01572-3","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"value":"2199-4536","type":"print"},{"value":"2198-6053","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8,14]]},"assertion":[{"value":"30 December 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 July 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 August 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}