{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T23:04:43Z","timestamp":1746227083496,"version":"3.28.0"},"reference-count":41,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,6,10]],"date-time":"2022-06-10T00:00:00Z","timestamp":1654819200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,6,10]],"date-time":"2022-06-10T00:00:00Z","timestamp":1654819200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,6,10]]},"DOI":"10.1109\/iwqos54832.2022.9812905","type":"proceedings-article","created":{"date-parts":[[2022,7,5]],"date-time":"2022-07-05T19:27:44Z","timestamp":1657049264000},"page":"1-10","source":"Crossref","is-referenced-by-count":4,"title":["AIQoSer: Building the efficient Inference-QoS for AI Services"],"prefix":"10.1109","author":[{"given":"Jianxin","family":"Li","sequence":"first","affiliation":[{"name":"Beihang University,Advanced Innovation Center for Big Data and Brain Computing,Beijing,China,100191"}]},{"given":"Tianchen","family":"Zhu","sequence":"additional","affiliation":[{"name":"Beihang University,Advanced Innovation Center for Big Data and Brain Computing,Beijing,China,100191"}]},{"given":"Haoyi","family":"Zhou","sequence":"additional","affiliation":[{"name":"Beihang University,Advanced Innovation Center for Big Data and Brain Computing,Beijing,China,100191"}]},{"given":"Qingyun","family":"Sun","sequence":"additional","affiliation":[{"name":"Beihang University,Advanced Innovation Center for Big Data and Brain Computing,Beijing,China,100191"}]},{"given":"Chunyang","family":"Jiang","sequence":"additional","affiliation":[{"name":"Beihang University,Advanced Innovation Center for Big Data and Brain Computing,Beijing,China,100191"}]},{"given":"Shuai","family":"Zhang","sequence":"additional","affiliation":[{"name":"Beihang University,Advanced Innovation Center for Big Data and Brain Computing,Beijing,China,100191"}]},{"given":"Chunming","family":"Hu","sequence":"additional","affiliation":[{"name":"Beihang University,Advanced Innovation Center for Big Data and Brain Computing,Beijing,China,100191"}]}],"member":"263","reference":[{"article-title":"An empirical evaluation of generic convolutional and recurrent networks for sequence modeling","year":"2018","author":"bai","key":"ref39"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/1111322.1111341"},{"key":"ref33","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"NeurIPS"},{"key":"ref32","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"dosovitskiy","year":"2021","journal-title":"ICLRE"},{"key":"ref31","article-title":"Adaptive input representations for neural language modeling","author":"baevski","year":"2019","journal-title":"ICLRE"},{"key":"ref30","first-page":"221","author":"veldhuizen","year":"1998","journal-title":"Evolutionary Computation and Convergence to a Pareto front"},{"key":"ref37","first-page":"1","article-title":"A network traffic flow prediction with deep learning approach for large-scale metropolitan area network","author":"wang","year":"2018","journal-title":"NOMS"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1176"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3474717.3483923"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3447987"},{"key":"ref10","article-title":"Rethinking the value of network pruning","author":"liu","year":"2019","journal-title":"ICLRE"},{"key":"ref40","first-page":"5243","article-title":"Enhancing the locality and breaking the memory bottleneck of transformer on time series forecasting","volume":"32","author":"li","year":"2019","journal-title":"NeurIPS"},{"key":"ref11","article-title":"Scalable methods for 8-bit training of neural networks","volume":"31","author":"banner","year":"2018","journal-title":"NeurIPS"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.211"},{"key":"ref13","article-title":"Exploiting linear structure within convolutional networks for efficient evaluation","volume":"27","author":"denton","year":"2014","journal-title":"NeurIPS"},{"article-title":"Binarized neural networks: Training deep neural networks with weights and activations constrained to+ 1 or-1","year":"2016","author":"courbariaux","key":"ref14"},{"key":"ref15","article-title":"Distilling the knowledge in a neural network","volume":"2","author":"hinton","year":"2015"},{"key":"ref16","article-title":"Learning both weights and connections for efficient neural network","volume":"28","author":"han","year":"2015","journal-title":"NeurIPS"},{"key":"ref17","first-page":"164","article-title":"Second order derivatives for network pruning: Optimal brain surgeon","author":"hassibi","year":"1992","journal-title":"NeurIPS"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_48"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001163"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/1071690.1064259"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2017.2707140"},{"article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","year":"2014","author":"chung","key":"ref27"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-015-9438-6"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IWQoS49365.2020.9213008"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ISCC47284.2019.8969631"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.3390\/app10186564"},{"article-title":"A survey of quantization methods for efficient neural network inference","year":"2021","author":"gholami","key":"ref8"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2021.3096928"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3152434.3152441"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"11 106","DOI":"10.1609\/aaai.v35i12.17325","article-title":"Informer: Beyond efficient transformer for long sequence time-series forecasting","volume":"35","author":"zhou","year":"2021","journal-title":"AAAI"},{"journal-title":"A survey on artificial intelligence for network routing problems","year":"2007","author":"bai","key":"ref1"},{"key":"ref20","first-page":"1","article-title":"Sparsity in deep learning: Pruning and growth for efficient inference and training in neural networks","volume":"22","author":"hoefler","year":"2021","journal-title":"Journal of Machine Learning Research"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s42045-018-0009-7"},{"key":"ref21","first-page":"1737","article-title":"Deep learning with limited numerical precision","author":"gupta","year":"2015","journal-title":"ICML"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ISKE.2017.8258815"},{"key":"ref41","article-title":"Decoupled weight decay regularization","author":"loshchilov","year":"2017","journal-title":"ICLRE"},{"key":"ref23","first-page":"1","article-title":"Neutm: A neural network-based framework for traffic matrix prediction in sdn","author":"azzouni","year":"2018","journal-title":"NOMS"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICTON.2018.8473978"}],"event":{"name":"2022 IEEE\/ACM 30th International Symposium on Quality of Service (IWQoS)","start":{"date-parts":[[2022,6,10]]},"location":"Oslo, Norway","end":{"date-parts":[[2022,6,12]]}},"container-title":["2022 IEEE\/ACM 30th International Symposium on Quality of Service (IWQoS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9812862\/9812863\/09812905.pdf?arnumber=9812905","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,10]],"date-time":"2023-02-10T22:51:09Z","timestamp":1676069469000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9812905\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,10]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/iwqos54832.2022.9812905","relation":{},"subject":[],"published":{"date-parts":[[2022,6,10]]}}}