{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T07:59:52Z","timestamp":1780473592587,"version":"3.54.1"},"reference-count":281,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2022,11,1]],"date-time":"2022-11-01T00:00:00Z","timestamp":1667260800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,11,1]],"date-time":"2022-11-01T00:00:00Z","timestamp":1667260800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,11,1]],"date-time":"2022-11-01T00:00:00Z","timestamp":1667260800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100018537","name":"National Science and Technology Major Project","doi-asserted-by":"publisher","award":["2018AAA0100701"],"award-info":[{"award-number":["2018AAA0100701"]}],"id":[{"id":"10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61906106"],"award-info":[{"award-number":["61906106"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62022048"],"award-info":[{"award-number":["62022048"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004147","name":"Tsinghua University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004147","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2022,11,1]]},"DOI":"10.1109\/tpami.2021.3117837","type":"journal-article","created":{"date-parts":[[2021,10,7]],"date-time":"2021-10-07T03:21:46Z","timestamp":1633576906000},"page":"7436-7456","source":"Crossref","is-referenced-by-count":612,"title":["Dynamic Neural Networks: A Survey"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5706-8784","authenticated-orcid":false,"given":"Yizeng","family":"Han","sequence":"first","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7251-0988","authenticated-orcid":false,"given":"Gao","family":"Huang","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0858-1770","authenticated-orcid":false,"given":"Shiji","family":"Song","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8379-4915","authenticated-orcid":false,"given":"Le","family":"Yang","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8953-4428","authenticated-orcid":false,"given":"Honghui","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1363-0234","authenticated-orcid":false,"given":"Yulin","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.5555\/2999134.2999257"},{"key":"ref2","article-title":"Very deep convolutional networks for large-scale image recognition","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Simonyan"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.4324\/9781410605337-29"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref6","first-page":"6000","article-title":"Attention is all you need","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Vaswani"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/vl\/N19-142"},{"key":"ref8","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Brown"},{"key":"ref9","article-title":"Neural architecture search with reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zoph"},{"key":"ref10","article-title":"DARTS: Differentiable architecture search","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Liu"},{"key":"ref11","article-title":"Adaptive computation time for recurrent neural networks","author":"Graves","year":"2016"},{"key":"ref12","article-title":"Multi-scale dense networks for resource efficient image classification","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Huang"},{"key":"ref13","first-page":"1307","article-title":"CondConv: Conditionally parameterized convolutions for efficient inference","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Yang"},{"key":"ref14","first-page":"3859","article-title":"Dynamic routing between capsules","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Sabour"},{"key":"ref15","article-title":"Runtime neural pruning","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Lin"},{"key":"ref16","article-title":"Outrageously large neural networks: The sparsely-gated mixture-of-experts layer","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Shazeer"},{"key":"ref17","first-page":"523","article-title":"Learning feed-forward one-shot learners","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Bertinetto"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00193"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01104"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00745"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.554"},{"key":"ref23","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kingma"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s13398-014-0173-7.2"},{"key":"ref25","first-page":"12635","article-title":"Implicit semantic data augmentation for deep networks","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Wang"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00020"},{"key":"ref27","article-title":"MobileNets: Efficient convolutional neural networks for mobile vision applications","author":"Howard","year":"2017"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00291"},{"key":"ref29","first-page":"2625","article-title":"Binarized neural networks","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Hubara"},{"key":"ref30","article-title":"Distilling the knowledge in a neural network","volume-title":"Proc. Conf. Neural Inf. Process. Syst. Workshop","author":"Hinton"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.5244\/c.28.88"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00244"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.194"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.684"},{"key":"ref35","article-title":"Universal transformers","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Dehghani"},{"key":"ref36","article-title":"Depth-adaptive transformer","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Elbayad"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1113\/jphysiol.1962.sp006837"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1152\/jn.2000.83.5.2580"},{"key":"ref39","article-title":"Glance and focus: A dynamic approach to reducing spatial redundancy in image classification","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Wang"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000013087.49260.fb"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.1.79"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(97)00011-7"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2003.820440"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.298"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2016.7900006"},{"key":"ref46","first-page":"527","article-title":"Adaptive neural networks for efficient inference","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Bolukbasi"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_25"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_1"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CODESISSS.2015.7331375"},{"key":"ref50","article-title":"Idk cascades: Fast deep learning by learning not to overthink","volume-title":"Proc. Conf. Assoc. Uncertainty Artif. Intell.","author":"Wang"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/s10115-017-1029-1"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/302"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3411973"},{"key":"ref54","first-page":"2363","article-title":"Deciding how to decide: Dynamic routing in artificial neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"McGill"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2959322"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00198"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.537"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.204"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.593"},{"key":"ref60","article-title":"BERT loses patience: Fast and robust inference with early exit","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Zhou"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/98"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00346"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098177"},{"key":"ref64","article-title":"Fast and accurate text classification: Skimming, rereading and early stopping","volume-title":"Proc. Int. Conf. Learn. Representations Workshop","author":"Yu"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2019.08.082"},{"key":"ref66","first-page":"1","article-title":"IamNN: Iterative and adaptive mobile neural network for efficient image classification","volume-title":"Proc. Int. Conf. Mach. Learn., Workshop","author":"Leroux"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00529"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17286"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00222"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6025"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00919"},{"key":"ref72","article-title":"Estimating or propagating gradients through stochastic neurons for conditional computation","author":"Bengio","year":"2013"},{"key":"ref73","article-title":"Exponentially increasing the capacity-to-computation ratio for conditional computation in deep learning","author":"Cho","year":"2014"},{"key":"ref74","article-title":"Conditional computation in neural networks for faster models","volume-title":"Proc. Int. Conf. Learn. Representations Workshop","author":"Bengio"},{"key":"ref75","article-title":"Low-rank approximations for conditional feedforward computation in deep neural networks","author":"Davis","year":"2013"},{"key":"ref76","article-title":"Learning factored representations in a deep mixture of experts","volume-title":"Proc. Int. Conf. Learn. Representations Workshop","author":"Eigen"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220007"},{"key":"ref78","first-page":"8080","article-title":"HydraNets: Specialized dynamic architectures for efficient inference","volume-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit.","author":"Mullapudi"},{"key":"ref79","first-page":"552","article-title":"Deep mixture of experts via shallow embedding","volume-title":"Proc. Uncertainty Artif. Intell. Conf.","author":"Wang"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00363"},{"key":"ref81","article-title":"Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity","author":"Fedus","year":"2021"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_11"},{"key":"ref84","first-page":"1886","article-title":"Channel gating neural networks","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Hua"},{"key":"ref85","article-title":"Dynamic channel pruning: Feature boosting and suppression","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Gao"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58583-9_15"},{"key":"ref87","article-title":"Batch-shaping for learning conditional channel gated networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Bejnordi"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-36708-4_15"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00850"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00939"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/416"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2020.2979669"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/TETC.2021.3056031"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58285-2_3"},{"key":"ref95","article-title":"Matrix capsules with EM routing","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hinton"},{"key":"ref96","article-title":"Changing model behavior at test-time using reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations Workshop","author":"Odena"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11630"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.18"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.172"},{"key":"ref100","article-title":"Distilling a neural network into a soft decision tree","author":"Frosst","year":"2017"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01237-6"},{"key":"ref102","first-page":"4138","article-title":"The tree ensemble layer: Differentiability meets conditional computation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hazimeh"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.314"},{"key":"ref104","article-title":"Decision forests, convolutional networks and the models in-between","author":"Ioannou","year":"2016"},{"key":"ref105","first-page":"6166","article-title":"Adaptive neural trees","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Tanno"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5764"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00858"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.539"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01142"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.89"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00953"},{"key":"ref112","article-title":"Deformable kernels: Adapting effective receptive fields for object deformation.,","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Gao"},{"key":"ref113","first-page":"5047","article-title":"Meta-neighborhoods","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Shan"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1145\/3431920.3439295"},{"key":"ref115","article-title":"Predicting parameters in deep learning","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Denil"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1992.4.1.131"},{"key":"ref117","first-page":"667","article-title":"Dynamic filter networks","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Jia"},{"key":"ref118","article-title":"Hypernetworks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Ha"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58555-6_46"},{"key":"ref120","article-title":"LambdaNetworks: Modeling long-range interactions without attention","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Bello"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.11"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-020-01345-8"},{"key":"ref123","first-page":"6597","article-title":"Modulating early visual processing by language","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"de Vries"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00194"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01155"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/ICME46284.2020.9102906"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.683"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-00928-1_48"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.667"},{"key":"ref131","first-page":"9423","article-title":"Gather-Excite: Exploiting feature context in convolutional neural networks","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Hu"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58529-7_21"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58621-8_21"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00060"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.5244\/C.31.185"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"ref137","first-page":"6511","article-title":"Compact generalized non-local network","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Yue"},{"key":"ref138","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Dosovitskiy"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1145\/3465055"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00679"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1145\/3505244"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.319"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00908"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.205"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01147"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2019.00114"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00239"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_31"},{"key":"ref149","article-title":"Dynamic capacity networks,","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Almahairi"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00982"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2017.2752806"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01249-6_12"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00167"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00310"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00797"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00188"},{"key":"ref157","first-page":"2017","article-title":"Spatial transformer networks","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Jaderberg","year":"2015"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_4"},{"key":"ref159","first-page":"2204","article-title":"Recurrent models of visual attention","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Mnih"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.145"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-54193-8_17"},{"key":"ref162","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.476"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00238"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.207"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1117\/12.2537799"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00236"},{"key":"ref167","article-title":"Skip RNN: Learning to skip state updates in recurrent neural networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Campos"},{"key":"ref168","article-title":"Neural Speed Reading with Structural-Jump-LSTM","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hansen"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1145\/3362743.3362965"},{"key":"ref170","article-title":"Variable computation in recurrent neural networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Jernite"},{"key":"ref171","first-page":"1","article-title":"Neural Speed Reading via Skim-RNN","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Seo"},{"key":"ref172","first-page":"1","article-title":"Hierarchical multiscale recurrent neural networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chung"},{"key":"ref173","first-page":"2554","article-title":"Focused hierarchical RNNs for conditional sequence processing","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ke"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1145\/3132847.3132947"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1172"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1474"},{"key":"ref177","first-page":"7780","article-title":"LiteEval: A coarse-to-fine framework for resource efficient video recognition","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Wu"},{"key":"ref178","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR48806.2021.9413153"},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.293"},{"key":"ref180","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46478-7_48"},{"key":"ref181","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00137"},{"key":"ref182","first-page":"1","article-title":"AdaFuse: Adaptive temporal fusion network for efficient action recognition","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Meng"},{"key":"ref183","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00728"},{"key":"ref184","article-title":"HMS: Hierarchical modality selectionfor efficient video recognition","author":"Weng","year":"2021"},{"key":"ref185","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00748"},{"key":"ref186","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01535"},{"key":"ref187","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_16"},{"key":"ref188","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.424"},{"key":"ref189","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00558"},{"key":"ref190","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00632"},{"key":"ref191","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00633"},{"key":"ref192","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3007826"},{"key":"ref193","article-title":"Model Rubiks cube: Twisting resolution, depth and width for tinyNets","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Han"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58529-7_30"},{"key":"ref195","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00609"},{"key":"ref196","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58571-6_6"},{"key":"ref197","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01594"},{"key":"ref198","article-title":"VA-RED 2: Video adaptive redundancy reduction","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Pan"},{"key":"ref199","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00470"},{"key":"ref200","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00511"},{"key":"ref201","first-page":"1321","article-title":"On calibration of modern neural networks,","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Guo"},{"key":"ref202","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00013"},{"key":"ref203","doi-asserted-by":"publisher","DOI":"10.1109\/34.655647"},{"key":"ref204","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299170"},{"key":"ref205","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.446"},{"key":"ref206","doi-asserted-by":"publisher","DOI":"10.5244\/c.29.32"},{"key":"ref207","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.234"},{"key":"ref208","first-page":"3525","article-title":"Adaptive feeding: Achieving fast and accurate detections by adaptively combining object detectors","volume-title":"Proc. Int. Conf. Comput. Vis.","author":"H.-Yu Z."},{"key":"ref209","first-page":"318","article-title":"MetaAnchor: Learning to detect objects with customized anchors","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Yang"},{"key":"ref210","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2916104"},{"key":"ref211","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01288"},{"key":"ref212","article-title":"Deep multimodal fusion by channel exchanging","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Wang"},{"key":"ref213","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.67"},{"key":"ref214","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00841"},{"key":"ref215","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3018269"},{"key":"ref216","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00366"},{"key":"ref217","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00222"},{"key":"ref218","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/401"},{"key":"ref219","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2019.12.042"},{"key":"ref220","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01308"},{"key":"ref221","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_11"},{"key":"ref222","first-page":"570","article-title":"Learning to predict layout-to-image conditional convolutions for semantic image synthesis","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Liu"},{"key":"ref223","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00244"},{"key":"ref224","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00515"},{"key":"ref225","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_11"},{"key":"ref226","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298685"},{"key":"ref227","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.557"},{"key":"ref228","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2970248"},{"key":"ref229","article-title":"Multiple object recognition with visual attention","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Ba"},{"key":"ref230","first-page":"3233","article-title":"Attend, infer, repeat: Fast scene understanding with generative models","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Ali Eslami","year":"2016"},{"key":"ref231","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00629"},{"key":"ref232","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01047"},{"key":"ref233","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00686"},{"key":"ref234","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.37"},{"key":"ref235","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.244"},{"key":"ref236","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00340"},{"key":"ref237","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.435"},{"key":"ref238","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00257"},{"key":"ref239","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_26"},{"key":"ref240","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00651"},{"key":"ref241","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00341"},{"key":"ref242","doi-asserted-by":"publisher","DOI":"10.5555\/3045118.3045336"},{"key":"ref243","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.450"},{"key":"ref244","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00756"},{"key":"ref245","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_29"},{"key":"ref246","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/p18-1208"},{"key":"ref247","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"ref248","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-70139-4_54"},{"key":"ref249","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330662"},{"key":"ref250","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611976700.56"},{"key":"ref251","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1103"},{"key":"ref252","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290989"},{"key":"ref253","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357925"},{"key":"ref254","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2955567"},{"key":"ref255","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6376"},{"key":"ref256","doi-asserted-by":"publisher","DOI":"10.3390\/e22121336"},{"key":"ref257","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.668"},{"key":"ref258","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00029"},{"key":"ref259","volume-title":"Statistical Theory of Extreme Values and Some Practical Applications(NBS Applied Mathematics Series)","author":"Gumbel","year":"1954"},{"key":"ref260","article-title":"Categorical reparameterization with gumbel-softmax","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Jang"},{"key":"ref261","article-title":"Discrete autoencoders for sequence models","author":"Kaiser","year":"2018"},{"key":"ref262","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2195"},{"key":"ref263","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref264","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref265","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018401"},{"key":"ref266","first-page":"3301","article-title":"Shallow-deep networks: Understanding and mitigating network overthinking","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"97","author":"Kaya"},{"key":"ref267","article-title":"ELF: An early-exiting framework for long-tailed classification","author":"Duggal","year":"2020"},{"key":"ref268","first-page":"1","article-title":"Triple Wins: Boosting accuracy, robustness and efficiency together by enabling input-adaptive inference","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hu"},{"key":"ref269","article-title":"Routing networks: Adaptive selection of non-linear functions for multi-task learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Rosenbaum"},{"key":"ref270","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00494"},{"key":"ref271","article-title":"Not all images are worth 16x16 words: Dynamic vision transformers with adaptive sequence length","author":"Wang","year":"2021"},{"key":"ref272","article-title":"DynamicVit: Efficient vision transformers with dynamic token sparsification","author":"Rao","year":"2021"},{"key":"ref273","article-title":"Ia-red 2: Interpretability-aware redundancy reduction for vision transformers","author":"Pan","year":"2021"},{"key":"ref274","doi-asserted-by":"publisher","DOI":"10.1145\/3289602.3293902"},{"key":"ref275","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.11"},{"key":"ref276","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS.2017.8050797"},{"key":"ref277","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00061"},{"key":"ref278","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358283"},{"key":"ref279","doi-asserted-by":"publisher","DOI":"10.1109\/ICSCC.2019.8843626"},{"key":"ref280","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01427"},{"key":"ref281","article-title":"A panda? No, it\u2019s a sloth: Slowdown attacks on adaptive multi-exit neural network inference","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hong"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/9910240\/09560049.pdf?arnumber=9560049","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,11]],"date-time":"2024-01-11T22:25:43Z","timestamp":1705011943000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9560049\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,1]]},"references-count":281,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2021.3117837","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,11,1]]}}}