{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T14:38:34Z","timestamp":1740148714329,"version":"3.37.3"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2022,7,20]],"date-time":"2022-07-20T00:00:00Z","timestamp":1658275200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,7,20]],"date-time":"2022-07-20T00:00:00Z","timestamp":1658275200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100014553","name":"Samsung Advanced Institute of Technology","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100014553","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Institute for Information and Communications Technology Planning & evaluation"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Sign Process Syst"],"published-print":{"date-parts":[[2022,10]]},"DOI":"10.1007\/s11265-022-01782-3","type":"journal-article","created":{"date-parts":[[2022,7,20]],"date-time":"2022-07-20T22:02:27Z","timestamp":1658354547000},"page":"929-943","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Optimization of General Matrix Multiply Library for Ternary Weight for Fast DNN Inference"],"prefix":"10.1007","volume":"94","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6677-7646","authenticated-orcid":false,"given":"Seokhyeon","family":"Choi","sequence":"first","affiliation":[]},{"given":"Kyuhong","family":"Shim","sequence":"additional","affiliation":[]},{"given":"Jungwook","family":"Choi","sequence":"additional","affiliation":[]},{"given":"Wonyong","family":"Sung","sequence":"additional","affiliation":[]},{"given":"Byonghyo","family":"Shim","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,7,20]]},"reference":[{"key":"1782_CR1","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., & Chen, L.-C. (2018). Mobilenetv2: Inverted residuals and linear bottlenecks. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 4510\u20134520)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"1782_CR2","doi-asserted-by":"crossref","unstructured":"Tan, M., Chen, B., Pang, R., Vasudevan, V., Sandler, M., Howard, A., & Le, Q.\u00a0V. (2019). Mnasnet: Platform-aware neural architecture search for mobile. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (pp. 2820\u20132828)","DOI":"10.1109\/CVPR.2019.00293"},{"key":"1782_CR3","unstructured":"Tan, M., & Le, Q. (2019). Efficientnet: Rethinking model scaling for convolutional neural networks. In International Conference on Machine Learning (pp. 6105\u20136114). PMLR"},{"key":"1782_CR4","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1109\/72.248452","volume":"4","author":"R Reed","year":"1993","unstructured":"Reed, R. (1993). Pruning algorithms-a survey. IEEE transactions on Neural Networks, 4, 740\u2013747","journal-title":"IEEE transactions on Neural Networks"},{"key":"1782_CR5","first-page":"6869","volume":"18","author":"I Hubara","year":"2017","unstructured":"Hubara, I., Courbariaux, M., Soudry, D., El-Yaniv, R., & Bengio, Y. (2017). Quantized neural networks: Training neural networks with low precision weights and activations. The Journal of Machine Learning Research, 18, 6869\u20136898","journal-title":"The Journal of Machine Learning Research"},{"key":"1782_CR6","unstructured":"Sung, W., Shin, S., & Hwang, K. (2015). Resiliency of deep neural networks under quantization. arXiv preprint arXiv:1511.06488"},{"key":"1782_CR7","unstructured":"Choi, J., Wang, Z., Venkataramani, S., Chuang, P. I.-J., Srinivasan, V., & Gopalakrishnan, K. (2018). Pact: Parameterized clipping activation for quantized neural networks. arXiv preprint arXiv:1805.06085"},{"key":"1782_CR8","unstructured":"Esser, S.\u00a0K., McKinstry, J.\u00a0L., Bablani, D., Appuswamy, R., & Modha, D.\u00a0S. (2019). Learned step size quantization. arXiv preprint arXiv:1902.08153"},{"key":"1782_CR9","unstructured":"Jacob, B., & Warden, P. (2017). gemmlowp: A small self-contained low-precision gemm library"},{"key":"1782_CR10","doi-asserted-by":"crossref","unstructured":"Han, Q., Hu, Y., Yu, F., Yang, H., Liu, B., Hu, P., Gong, R., Wang, Y., Wang, R., Luan, Z., et\u00a0al. (2020). Extremely low-bit convolution optimization for quantized neural network on modern computer architectures. In 49th International Conference on Parallel Processing-ICPP (pp. 1\u201312)","DOI":"10.1145\/3404397.3404407"},{"key":"1782_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 770\u2013778)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1782_CR12","doi-asserted-by":"crossref","unstructured":"Hwang, K., & Sung, W. (2014). Fixed-point feedforward deep neural network design using weights+ 1, 0, and- 1. In 2014 IEEE Workshop on Signal Processing Systems (SiPS) (pp. 1\u20136). IEEE","DOI":"10.1109\/SiPS.2014.6986082"},{"key":"1782_CR13","doi-asserted-by":"crossref","unstructured":"Shin, S., Boo, Y., & Sung, W. (2017). Fixed-point optimization of deep neural networks with adaptive step size retraining. In 2017 IEEE International conference on acoustics, speech and signal processing (ICASSP) (pp. 1203\u20131207). IEEE","DOI":"10.1109\/ICASSP.2017.7952347"},{"key":"1782_CR14","unstructured":"Li, F., Zhang, B., & Liu, B. (2016). Ternary weight networks. arXiv preprint arXiv:1605.04711"},{"key":"1782_CR15","unstructured":"Zhu, C., Han, S., Mao, H., & Dally, W.\u00a0J. (2016). Trained ternary quantization. arXiv preprint arXiv:1612.01064"},{"key":"1782_CR16","unstructured":"Mellempudi, N., Kundu, A., Mudigere, D., Das, D., Kaul, B., & Dubey, P. (2017). Ternary neural networks with fine-grained quantization. arXiv preprint arXiv:1705.01462"},{"key":"1782_CR17","unstructured":"Mishra, A., Nurvitadhi, E., Cook, J.\u00a0J., & Marr, D. (2017). WRPN: Wide reduced-precision networks. arXiv preprint arXiv:1709.01134"},{"key":"1782_CR18","doi-asserted-by":"crossref","unstructured":"Shin, S., Park, J., Boo, Y., & Sung, W. (2020). Hlhlp: Quantized neural networks training for reaching flat minima in loss surface. In Proceedings of the AAAI Conference on Artificial Intelligence (pp. 5784\u20135791). volume\u00a034","DOI":"10.1609\/aaai.v34i04.6035"},{"key":"1782_CR19","doi-asserted-by":"crossref","unstructured":"Boo, Y., Shin, S., Choi, J., & Sung, W. (2021). Stochastic precision ensemble: self-knowledge distillation for quantized deep neural networks. In Proceedings of the AAAI Conference on Artificial Intelligence (pp. 6794\u20136802). volume\u00a035","DOI":"10.1609\/aaai.v35i8.16839"},{"key":"1782_CR20","doi-asserted-by":"crossref","unstructured":"Bhalgat, Y., Lee, J., Nagel, M., Blankevoort, T., & Kwak, N. (2020). Lsq+: Improving low-bit quantization through learnable offsets and better initialization. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (pp. 696\u2013697)","DOI":"10.1109\/CVPRW50498.2020.00356"},{"key":"1782_CR21","unstructured":"Chellapilla, K., Puri, S., & Simard, P. (2006). High performance convolutional neural networks for document processing. In Tenth International Workshop on Frontiers in Handwriting Recognition. Suvisoft"},{"key":"1782_CR22","unstructured":"Dukhan, M., Wu, Y., Lu, H., & Maher, B. (2018). Qnnpack"},{"key":"1782_CR23","unstructured":"Lee, J., Chirkov, N., Ignasheva, E., Pisarchyk, Y., Shieh, M., Riccardi, F., Sarokin, R., Kulik, A., & Grundmann, M. (2019). On-device neural net inference with mobile gpus. arXiv preprint arXiv:1907.01989"},{"key":"1782_CR24","unstructured":"Umuroglu, Y., & Jahre, M. (2017). Streamlined deployment for quantized neural networks. arXiv preprint arXiv:1709.04060"},{"key":"1782_CR25","doi-asserted-by":"crossref","unstructured":"Rastegari, M., Ordonez, V., Redmon, J., & Farhadi, A. (2016). Xnor-net: Imagenet classification using binary convolutional neural networks. In European conference on computer vision (pp. 525\u2013542). Springer","DOI":"10.1007\/978-3-319-46493-0_32"},{"key":"1782_CR26","unstructured":"Courbariaux, M., Hubara, I., Soudry, D., El-Yaniv, R., & Bengio, Y. (2016). Binarized neural networks: Training deep neural networks with weights and activations constrained to+ 1 or-1. arXiv preprint arXiv:1602.02830"},{"key":"1782_CR27","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., & Fei-Fei, L. (2009). Imagenet: A large-scale hierarchical image database. In 2009 IEEE conference on computer vision and pattern recognition (pp. 248\u2013255). IEEE","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1782_CR28","doi-asserted-by":"crossref","unstructured":"Jeon, Y., Park, B., Kwon, S.\u00a0J., Kim, B., Yun, J., & Lee, D. (2020). BiQGEMM: matrix multiplication with lookup table for binary-coding-based quantized dnns. arXiv preprint arXiv:2005.09904","DOI":"10.1109\/SC41405.2020.00099"},{"key":"1782_CR29","unstructured":"Bao, W., Chang, L.-W., Chen, Y., Deng, K., Agarwal, A., Barsoum, E., & Taha, A. (2019). NGEMM: Optimizing gemm for deep learning via compiler-based techniques. arXiv preprint arXiv:1910.00178"},{"key":"1782_CR30","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1145\/567806.567807","volume":"28","author":"LS Blackford","year":"2002","unstructured":"Blackford, L. S., Petitet, A., Pozo, R., Remington, K., Whaley, R. C., Demmel, J., et al. (2002). An updated set of basic linear algebra subprograms (blas). ACM Transactions on Mathematical Software, 28, 135\u2013151","journal-title":"ACM Transactions on Mathematical Software"},{"key":"1782_CR31","unstructured":"Jia, Y. (2014). Learning semantic image representations at a large scale. Ph.D. thesis UC Berkeley"},{"key":"1782_CR32","unstructured":"Intel. Intel intrinsics guide. https:\/\/software.intel.com\/sites\/landingpage\/IntrinsicsGuide\/. Online, Accessed: 2021-04-22"},{"key":"1782_CR33","unstructured":"Han, S., Mao, H., & Dally, W.\u00a0J. (2015). Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149."}],"container-title":["Journal of Signal Processing Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-022-01782-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11265-022-01782-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-022-01782-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,9]],"date-time":"2022-09-09T19:43:20Z","timestamp":1662752600000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11265-022-01782-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,20]]},"references-count":33,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2022,10]]}},"alternative-id":["1782"],"URL":"https:\/\/doi.org\/10.1007\/s11265-022-01782-3","relation":{},"ISSN":["1939-8018","1939-8115"],"issn-type":[{"type":"print","value":"1939-8018"},{"type":"electronic","value":"1939-8115"}],"subject":[],"published":{"date-parts":[[2022,7,20]]},"assertion":[{"value":"9 January 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 April 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 June 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 July 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}