{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T14:30:19Z","timestamp":1775745019986,"version":"3.50.1"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030801281","type":"print"},{"value":"9783030801298","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-80129-8_17","type":"book-chapter","created":{"date-parts":[[2021,10,4]],"date-time":"2021-10-04T21:51:43Z","timestamp":1633384303000},"page":"213-232","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["Neural Network Compression Framework for Fast Model Inference"],"prefix":"10.1007","author":[{"given":"Alexander","family":"Kozlov","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ivan","family":"Lazarevich","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vasily","family":"Shamporov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nikolay","family":"Lyalyushkin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yury","family":"Gorbachev","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"17_CR1","unstructured":"OpenVINO Toolkit. https:\/\/software.intel.com\/en-us\/openvino-toolkit"},{"key":"17_CR2","doi-asserted-by":"crossref","unstructured":"Avron, H., Toledo, S.: Randomized algorithms for estimating the trace of an implicit symmetric positive semi-definite matrix. J. ACM (JACM) 58(2), 1\u201334 (2011)","DOI":"10.1145\/1944345.1944349"},{"key":"17_CR3","unstructured":"Chen, K., et al.: MMDetection: open mmlab detection toolbox and benchmark. arXiv preprint arXiv:1906.07155 (2019)"},{"key":"17_CR4","unstructured":"Choi, J., Wang, Z., Venkataramani, S., Chuang, P.I.-J., Srinivasan, V., Gopalakrishnan, K.: Pact: parameterized clipping activation for quantized neural networks. arXiv preprint arXiv:1805.06085 (2018)"},{"key":"17_CR5","doi-asserted-by":"crossref","unstructured":"Dong, Z., Yao, Z., Cai, Y., Arfeen, D., Gholami, A., Mahoney, M.W., Keutzer, K.: Hawq-v2: Hessian aware trace-weighted quantization of neural networks. arXiv preprint arXiv:1911.03852 (2019)","DOI":"10.1109\/ICCV.2019.00038"},{"key":"17_CR6","unstructured":"Gale, T., Elsen, E., Hooker, S.: The state of sparsity in deep neural networks. arXiv preprint arXiv:1902.09574 (2019)"},{"key":"17_CR7","unstructured":"Gomez, A.N., Zhang, I., Swersky, K., Gal, Y., Hinton, G.E.: Learning sparse networks using targeted dropout. arXiv preprint arXiv:1905.13678 (2019)"},{"key":"17_CR8","unstructured":"Han, S., Pool, J., Tran, J., Dally, W.: Learning both weights and connections for efficient neural network. In: Advances in Neural Information Processing Systems, pp. 1135\u20131143 (2015)"},{"key":"17_CR9","doi-asserted-by":"crossref","unstructured":"He, Y., Liu, P., Wang, Z., Hu, Z., Yang, Y.: Filter pruning via geometric median for deep convolutional neural networks acceleration. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4340\u20134349 (2019)","DOI":"10.1109\/CVPR.2019.00447"},{"key":"17_CR10","unstructured":"Hubara, I., Courbariaux, M., Soudry, D., El-Yaniv, R., Bengio, Y.: Binarized neural networks. In: Advances in Neural Information Processing Systems, pp. 4107\u20134115 (2016)"},{"key":"17_CR11","unstructured":"Krishnamoorthi, R.: Quantizing deep convolutional networks for efficient inference: a whitepaper. arXiv preprint arXiv:1806.08342 (2018)"},{"key":"17_CR12","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Pereira, F., Burges, C.J.C., Bottou, L., Weinberger, K.Q. (eds.) Advances in Neural Information Processing Systems, vol. 25, pp. 1097\u20131105. Curran Associates, Inc. (2012)"},{"key":"17_CR13","doi-asserted-by":"crossref","unstructured":"Liu, B., Wang, M., Foroosh, H., Tappen, M., Pensky, M.: Sparse convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 806\u2013814 (2015)","DOI":"10.1109\/CVPR.2015.7298681"},{"key":"17_CR14","doi-asserted-by":"crossref","unstructured":"Liu, C., et al.: Progressive neural architecture search. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 19\u201334 (2018)","DOI":"10.1007\/978-3-030-01246-5_2"},{"key":"17_CR15","doi-asserted-by":"crossref","unstructured":"Liu, Z., Li, J., Shen, Z., Huang, G., Yan, S., Zhang, C.: Learning efficient convolutional networks through network slimming. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2736\u20132744 (2017)","DOI":"10.1109\/ICCV.2017.298"},{"key":"17_CR16","unstructured":"Louizos, C., Welling, M., Kingma, D.P.: Learning sparse neural networks through $$ l\\_0 $$ regularization. arXiv preprint arXiv:1712.01312 (2017)"},{"key":"17_CR17","unstructured":"Molchanov, D., Ashukha, A., Vetrov, D.: Variational dropout sparsifies deep neural networks. In: Proceedings of the 34th International Conference on Machine Learning, vol. 70, pp. 2498\u20132507. JMLR.org (2017)"},{"key":"17_CR18","unstructured":"van den Oord, A., et al.: WaveNet: a generative model for raw audio. arXiv preprint arXiv:1609.03499 (2016)"},{"key":"17_CR19","unstructured":"Park, J., et al.: Faster CNNs with direct sparse convolutions and guided pruning. arXiv preprint arXiv:1608.01409 (2016)"},{"key":"17_CR20","doi-asserted-by":"publisher","unstructured":"Rastegari, M., Ordonez, V., Redmon, J., Farhadi, A.: XNOR-net: Imagenet classification using binary convolutional neural networks. In: European Conference on Computer Vision, pp. 525\u2013542. Springer, Heidelberg (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_32","DOI":"10.1007\/978-3-319-46493-0_32"},{"key":"17_CR21","unstructured":"Rodr\u00edguez, P., Gonzalez, J., Cucurull, G., Gonfaus, J.M., Roca, X.: Regularizing CNNs with locally constrained decorrelations. arXiv preprint arXiv:1611.01967 (2016)"},{"key":"17_CR22","unstructured":"Wu, M., Jain, S.R., Gural, A., Dick, C.H.: Trained quantization thresholds for accurate and efficient fixed-point inference of deep neural networks (2019)"},{"key":"17_CR23","unstructured":"Shang, W., Sohn, K., Almeida, D., Lee, H.: Understanding and improving convolutional neural networks via concatenated rectified linear units. In: International Conference on Machine Learning, pp. 2217\u20132225 (2016)"},{"key":"17_CR24","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition (2014)"},{"key":"17_CR25","doi-asserted-by":"crossref","unstructured":"Tan, M., et al.: MnasNet: platform-aware neural architecture search for mobile. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2820\u20132828 (2019)","DOI":"10.1109\/CVPR.2019.00293"},{"key":"17_CR26","unstructured":"Wen, W., Wu, C., Wang, Y., Chen, Y., Li, H.: Learning structured sparsity in deep neural networks. In: Advances in Neural Information Processing Systems, pp. 2074\u20132082 (2016)"},{"key":"17_CR27","unstructured":"Wolf, T., et al.: Huggingface\u2019s transformers: state-of-the-art natural language processing. ArXiv, arXiv-1910, (2019)"},{"key":"17_CR28","unstructured":"Wu, Y., et al.: Google\u2019s neural machine translation system: bridging the gap between human and machine translation. arXiv preprint arXiv:1609.08144 (2016)"},{"key":"17_CR29","doi-asserted-by":"publisher","unstructured":"Zeiler, M.D., Fergus, R.: Visualizing and understanding convolutional networks. In: European Conference on Computer Vision, pp. 818\u2013833. Springer, Heidelberg (2014). https:\/\/doi.org\/10.1007\/978-3-319-10590-1_53","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"17_CR30","unstructured":"Zhou, S., Wu, Y., Ni, Z., Zhou, X., Wen, H., Zou, Y.: Dorefa-net: training low bitwidth convolutional neural networks with low bitwidth gradients. arXiv preprint arXiv:1606.06160 (2016)"},{"key":"17_CR31","unstructured":"Zhu, M., Gupta, S.: To prune, or not to prune: exploring the efficacy of pruning for model compression. arXiv preprint arXiv:1710.01878 (2017)"},{"key":"17_CR32","unstructured":"Zmora, N., Jacob, G., Zlotnik, L., Elharar, B., Novik, G.: Neural network distiller, June 2018"},{"key":"17_CR33","doi-asserted-by":"crossref","unstructured":"Zoph, B., Vasudevan, V., Shlens, J., Le, Q.V.: Learning transferable architectures for scalable image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8697\u20138710 (2018)","DOI":"10.1109\/CVPR.2018.00907"}],"container-title":["Lecture Notes in Networks and Systems","Intelligent Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-80129-8_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T07:13:16Z","timestamp":1771657996000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-80129-8_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030801281","9783030801298"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-80129-8_17","relation":{},"ISSN":["2367-3370","2367-3389"],"issn-type":[{"value":"2367-3370","type":"print"},{"value":"2367-3389","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"6 July 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}