{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:07:10Z","timestamp":1740100030895,"version":"3.37.3"},"reference-count":43,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,1,10]],"date-time":"2021-01-10T00:00:00Z","timestamp":1610236800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,1,10]],"date-time":"2021-01-10T00:00:00Z","timestamp":1610236800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,1,10]],"date-time":"2021-01-10T00:00:00Z","timestamp":1610236800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020AAA0107600,2018AAA0100704"],"award-info":[{"award-number":["2020AAA0107600,2018AAA0100704"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61972250,U19B2035,U1609220"],"award-info":[{"award-number":["61972250,U19B2035,U1609220"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,10]]},"DOI":"10.1109\/icpr48806.2021.9412192","type":"proceedings-article","created":{"date-parts":[[2021,5,6]],"date-time":"2021-05-06T02:15:54Z","timestamp":1620267354000},"page":"2446-2453","source":"Crossref","is-referenced-by-count":1,"title":["Automatic Student Network Search for Knowledge Distillation"],"prefix":"10.1109","author":[{"given":"Zhexi","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junchi","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peng","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guotong","family":"Xie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"journal-title":"Darts+ Improved differentiable architecture search with early stopping","year":"2019","author":"liang","key":"ref39"},{"journal-title":"Pc-darts Partial channel connections for memory-efficient differentiable architecture search","year":"2019","author":"xu","key":"ref38"},{"key":"ref33","first-page":"2016","article-title":"Neural architecture search with bayesian optimisation and optimal transport","author":"kandasamy","year":"2018","journal-title":"Advances in neural information processing systems"},{"journal-title":"Neural architecture search over a graph search space","year":"2018","author":"jastrzkebski","key":"ref32"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/424"},{"journal-title":"DARTS Differentiable Architecture Search","year":"2018","author":"liu","key":"ref30"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00138"},{"key":"ref36","first-page":"4095","article-title":"Efficient neural architecture search via parameters sharing","author":"pham","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1128"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00921"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"journal-title":"SNAS stochastic neural architecture search","year":"2018","author":"xie","key":"ref40"},{"journal-title":"Improving language understanding by generative pre-training","year":"0","author":"radford","key":"ref11"},{"key":"ref12","article-title":"Pre-training entity relation encoder with intra-span and inter-span information","author":"wang","year":"0","journal-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing"},{"journal-title":"RoBERTa A Robustly optimized BERT Pretraining Approach","year":"2019","author":"liu","key":"ref13"},{"key":"ref14","article-title":"Albert: A lite bert for self-supervised learning of language representations","author":"lan","year":"0","journal-title":"International Conference on Learning Representations"},{"journal-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","year":"2019","author":"raffel","key":"ref15"},{"key":"ref16","first-page":"9","article-title":"Distilling the knowledge in a neural network","volume":"1050","author":"hinton","year":"2015","journal-title":"Stat"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/1150402.1150464"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-614"},{"journal-title":"Very Deep Convolutional Networks for Large-scale Image Recognition","year":"2014","author":"simonyan","key":"ref19"},{"journal-title":"N2N learning Network to network compression via policy gradient reinforcement learning","year":"2017","author":"ashok","key":"ref28"},{"journal-title":"Improving multi-task deep neural networks via knowledge distillation for natural language understanding","year":"2019","author":"liu","key":"ref4"},{"journal-title":"From nodes to networks Evolving recurrent neural networks","year":"2018","author":"rawal","key":"ref27"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1445"},{"journal-title":"Distilling task-specific knowledge from BERT into simple neural networks","year":"2019","author":"tang","key":"ref6"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014780"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1441"},{"key":"ref8","first-page":"5754","article-title":"Xlnet: Generalized autoregressive pretraining for language understanding","author":"yang","year":"2019","journal-title":"Advances in neural information processing systems"},{"journal-title":"Neural Architecture Search A Survey","year":"2019","author":"wistuba","key":"ref7"},{"key":"ref2","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"key":"ref1","first-page":"4171","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","volume":"1","author":"devlin","year":"0","journal-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics Human Language Technologies"},{"key":"ref20","first-page":"1995","article-title":"Convolutional networks for images, speech, and time series","volume":"3361","author":"lecun","year":"1995","journal-title":"The Handbook of Brain Theory and Neural Networks"},{"journal-title":"TinyBERT Distilling BERT for Natural Language Understanding","year":"2019","author":"jiao","key":"ref22"},{"journal-title":"Paying more attention to attention Improving the performance of convolutional neural networks via attention transfer","year":"2016","author":"zagoruyko","key":"ref21"},{"key":"ref42","article-title":"First quora dataset release: Question pairs","author":"iyer","year":"2017","journal-title":"data quora com"},{"journal-title":"Neural architecture search with reinforcement learning","year":"2016","author":"zoph","key":"ref24"},{"key":"ref41","first-page":"2742","article-title":"Information aggregation via dynamic routing for sequence encoding","author":"gong","year":"0","journal-title":"Proceedings of the 27th International Conference on Computational Linguistics"},{"journal-title":"Wide residual networks","year":"2016","author":"zagoruyko","key":"ref23"},{"key":"ref26","first-page":"5877","article-title":"The evolved transformer","author":"so","year":"0","journal-title":"International Conference on Machine Learning"},{"journal-title":"Adam A method for stochastic optimization","year":"2014","author":"kingma","key":"ref43"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00907"}],"event":{"name":"2020 25th International Conference on Pattern Recognition (ICPR)","start":{"date-parts":[[2021,1,10]]},"location":"Milan, Italy","end":{"date-parts":[[2021,1,15]]}},"container-title":["2020 25th International Conference on Pattern Recognition (ICPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9411940\/9411911\/09412192.pdf?arnumber=9412192","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:40:48Z","timestamp":1652197248000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9412192\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,10]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/icpr48806.2021.9412192","relation":{},"subject":[],"published":{"date-parts":[[2021,1,10]]}}}