{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T01:12:17Z","timestamp":1773277937475,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":102,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T00:00:00Z","timestamp":1714176000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,27]]},"DOI":"10.1145\/3620665.3640356","type":"proceedings-article","created":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T14:18:06Z","timestamp":1713795486000},"page":"85-102","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Atalanta: A Bit is Worth a \u201cThousand\u201d Tensor Values"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-2487-250X","authenticated-orcid":false,"given":"Alberto Delmas","family":"Lascorz","sequence":"first","affiliation":[{"name":"University of Toronto, Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8950-6221","authenticated-orcid":false,"given":"Mostafa","family":"Mahmoud","sequence":"additional","affiliation":[{"name":"University of Toronto, Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5823-2494","authenticated-orcid":false,"given":"Ali Hadi","family":"Zadeh","sequence":"additional","affiliation":[{"name":"University of Toronto, Toronto, Canada"},{"name":"1QBit Ltd, Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4168-0837","authenticated-orcid":false,"given":"Milos","family":"Nikolic","sequence":"additional","affiliation":[{"name":"University of Toronto, Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3951-0708","authenticated-orcid":false,"given":"Kareem","family":"Ibrahim","sequence":"additional","affiliation":[{"name":"University of Toronto, Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0162-4547","authenticated-orcid":false,"given":"Christina","family":"Giannoula","sequence":"additional","affiliation":[{"name":"University of Toronto, Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4683-8901","authenticated-orcid":false,"given":"Ameer","family":"Abdelhadi","sequence":"additional","affiliation":[{"name":"McMaster University, Hamilton, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7768-367X","authenticated-orcid":false,"given":"Andreas","family":"Moshovos","sequence":"additional","affiliation":[{"name":"University of Toronto, Toronto, Canada"},{"name":"Vector Institute, Toronto, Canada"}]}],"member":"320","published-online":{"date-parts":[[2024,4,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"\"Kaggle display advertising challenge.\" [Online]. Available: https:\/\/www.kaggle.com\/c\/criteo-display-ad-challenge\/data"},{"key":"e_1_3_2_1_2_1","unstructured":"\"LZMA SDK (Software Development Kit) --- 7-zip.org \" https:\/\/7-zip.org\/sdk.html [Accessed 10-08-2023]."},{"key":"e_1_3_2_1_3_1","unstructured":"\"Torchvision.\" [Online]. Available: https:\/\/pytorch.org\/vision\/stable\/index.html"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","unstructured":"\"Nlp architect by intel ai lab \" Nov. 2018. [Online]. 10.5281\/zenodo.1477518","DOI":"10.5281\/zenodo.1477518"},{"key":"e_1_3_2_1_5_1","unstructured":"2023. [Online]. Available: https:\/\/sparsezoo.neuralmagic.com\/models\/efficientnet_v2-s-imagenet-base_quantized"},{"key":"e_1_3_2_1_6_1","unstructured":"2023. [Online]. Available: https:\/\/huggingface.co\/decapoda-research\/llama-7b-hf"},{"key":"e_1_3_2_1_7_1","unstructured":"2023. [Online]. Available: https:\/\/sparsezoo.neuralmagic.com\/models\/yolov8-m-coco-pruned75_quantized"},{"key":"e_1_3_2_1_8_1","volume-title":"Information theory and coding","author":"Abramson N.","year":"1963","unstructured":"N. Abramson, Information theory and coding. McGraw-Hill Book Co, 1963."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2644615"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3444943"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2008.04.005"},{"key":"e_1_3_2_1_12_1","unstructured":"CAST Inc. \"CAST ZipAccel-C Intel GZIP\/ZLIB\/Deflate Data Compression Core \" 2022. [Online]. Available: https:\/\/www.cast-inc.com\/compression\/gzip-data-compression\/zipaccel-c"},{"key":"e_1_3_2_1_13_1","volume-title":"IEEE AICAS","author":"Cavigelli L.","year":"2018","unstructured":"L. Cavigelli and L. Benini, \"Extended Bit-Plane Compression for Convolutional Neural Network Accelerators,\" in Proc. IEEE AICAS, 2018."},{"key":"e_1_3_2_1_14_1","first-page":"262","volume-title":"ISSCC 2016","author":"Krishna Yu-Hsin","year":"2016","unstructured":"Chen, Yu-Hsin and Krishna, Tushar and Emer, Joel and Sze, Vivienne, \"Eyeriss: An Energy-Efficient Reconfigurable Accelerator for Deep Convolutional Neural Networks,\" in IEEE International Solid-State Circuits Conference, ISSCC 2016, Digest of Technical Papers, 2016, pp. 262--263."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_1_16_1","first-page":"06085","article-title":"PACT: parameterized clipping activation for quantized neural networks","volume":"1805","author":"Choi J.","year":"2018","unstructured":"J. Choi, Z. Wang, S. Venkataramani, P. I. Chuang, V. Srinivasan, and K. Gopalakrishnan, \"PACT: parameterized clipping activation for quantized neural networks,\" CoRR, vol. abs\/1805.06085, 2018. [Online]. Available: http:\/\/arxiv.org\/abs\/1805.06085","journal-title":"CoRR"},{"key":"e_1_3_2_1_17_1","first-page":"546","volume-title":"Compresso: Pragmatic main memory compression,\" in 2018 51st Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO)","author":"Choukse E.","year":"2018","unstructured":"E. Choukse, M. Erez, and A. R. Alameldeen, \"Compresso: Pragmatic main memory compression,\" in 2018 51st Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO), 2018, pp. 546--558."},{"key":"e_1_3_2_1_18_1","first-page":"926","volume-title":"Buddy compression: Enabling larger memory for deep learning and hpc workloads on gpus,\" in 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA)","author":"Choukse E.","year":"2020","unstructured":"E. Choukse, M. B. Sullivan, M. O'Connor, M. Erez, J. Pool, D. Nellans, and S. W. Keckler, \"Buddy compression: Enabling larger memory for deep learning and hpc workloads on gpus,\" in 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA), 2020, pp. 926--939."},{"key":"e_1_3_2_1_19_1","volume-title":"Nov.","author":"Courbariaux M.","year":"2015","unstructured":"M. Courbariaux, Y. Bengio, and J.-P. David, \"BinaryConnect: Training Deep Neural Networks with binary weights during propagations,\" ArXiv e-prints, Nov. 2015."},{"key":"e_1_3_2_1_20_1","first-page":"3123","article-title":"Binaryconnect: Training deep neural networks with binary weights during propagations","author":"Courbariaux M.","year":"2015","unstructured":"M. Courbariaux, Y. Bengio, and J.-P. David, \"Binaryconnect: Training deep neural networks with binary weights during propagations,\" in Advances in Neural Information Processing Systems, 2015, pp. 3123--3131.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","first-page":"878","volume-title":"May 2011","author":"Dally B.","unstructured":"B. Dally, \"Power, programmability, and granularity: The challenges of exascale computing,\" in 2011 IEEE International Parallel Distributed Processing Symposium, May 2011, pp. 878--878."},{"key":"e_1_3_2_1_22_1","volume-title":"RFC 1951","author":"Deutsch L. P.","year":"1996","unstructured":"L. P. Deutsch, \"DEFLATE Compressed Data Format Specification version 1.3,\" RFC 1951, May 1996. [Online]. Available: https:\/\/www.rfc-editor.org\/info\/rfc1951"},{"key":"e_1_3_2_1_23_1","first-page":"04805","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","volume":"1810","author":"Devlin J.","year":"2018","unstructured":"J. Devlin, M. Chang, K. Lee, and K. Toutanova, \"BERT: pre-training of deep bidirectional transformers for language understanding,\" CoRR, vol. abs\/1810.04805, 2018. [Online]. Available: http:\/\/arxiv.org\/abs\/1810.04805","journal-title":"CoRR"},{"key":"e_1_3_2_1_24_1","volume-title":"Nvidia developer blog","author":"Durant L.","year":"2017","unstructured":"L. Durant, O. Giroux, M. Harris, and N. Stam, \"Nvidia developer blog,\" May 2017. [Online]. Available: https:\/\/devblogs.nvidia.com\/inside-volta\/"},{"key":"e_1_3_2_1_25_1","volume-title":"Boveda: Building an on-chip deep learning memory hierarchy brick by brick","author":"Vivancos I. Edo","year":"2021","unstructured":"I. Edo Vivancos, S. Sharify, D. Ly-Ma, A. Abdelhadi, C. Bannon, M. Nikolic, M. Mahmoud, A. Delmas Lascorz, G. Pekhimenko, A. Moshovos, and et al., \"Boveda: Building an on-chip deep learning memory hierarchy brick by brick,\" Mar 2021. [Online]. Available: https:\/\/proceedings.mlsys.org\/paper\/2021\/hash\/013d407166ec4fa56eb1e1f8cbe183b9-Abstract.html"},{"key":"e_1_3_2_1_26_1","volume-title":"Gptq: Accurate post-training quantization for generative pre-trained transformers","author":"Frantar E.","year":"2023","unstructured":"E. Frantar, S. Ashkboos, T. Hoefler, and D. Alistarh, \"Gptq: Accurate post-training quantization for generative pre-trained transformers,\" 2023."},{"key":"e_1_3_2_1_27_1","first-page":"13630","article-title":"A survey of quantization methods for efficient neural network inference","volume":"2103","author":"Gholami A.","year":"2021","unstructured":"A. Gholami, S. Kim, Z. Dong, Z. Yao, M. W. Mahoney, and K. Keutzer, \"A survey of quantization methods for efficient neural network inference,\" CoRR, vol. abs\/2103.13630, 2021. [Online]. Available: https:\/\/arxiv.org\/abs\/2103.13630","journal-title":"CoRR"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579445"},{"key":"e_1_3_2_1_29_1","first-page":"243","volume-title":"ISCA 2016","author":"Han S.","year":"2016","unstructured":"S. Han, X. Liu, H. Mao, J. Pu, A. Pedram, M. A. Horowitz, and W. J. Dally, \"EIE: efficient inference engine on compressed deep neural network,\" in 43rd ACM\/IEEE Annual International Symposium on Computer Architecture, ISCA 2016, Seoul, South Korea, June 18-22, 2016, 2016, pp. 243--254."},{"key":"e_1_3_2_1_30_1","volume-title":"Oct.","author":"Han S.","year":"2015","unstructured":"S. Han, H. Mao, and W. J. Dally, \"Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding,\" arXiv:1510.00149 [cs], Oct. 2015, arXiv: 1510.00149. [Online]. Available: http:\/\/arxiv.org\/abs\/1510.00149"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2827872"},{"key":"e_1_3_2_1_32_1","first-page":"03385","article-title":"Deep residual learning for image recognition","volume":"1512","author":"He K.","year":"2015","unstructured":"K. He, X. Zhang, S. Ren, and J. Sun, \"Deep residual learning for image recognition,\" CoRR, vol. abs\/1512.03385, 2015. [Online]. Available: http:\/\/arxiv.org\/abs\/1512.03385","journal-title":"CoRR"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3038912.3052569"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"M. Horowitz \"1.1 computing's energy problem (and what we can do about it) \" vol. 57 02 2014 pp. 10--14.","DOI":"10.1109\/ISSCC.2014.6757323"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00140"},{"key":"e_1_3_2_1_36_1","first-page":"04861","article-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications","volume":"1704","author":"Howard A. G.","year":"2017","unstructured":"A. G. Howard, M. Zhu, B. Chen, D. Kalenichenko, W. Wang, T. Weyand, M. Andreetto, and H. Adam, \"Mobilenets: Efficient convolutional neural networks for mobile vision applications,\" CoRR, vol. abs\/1704.04861, 2017. [Online]. Available: http:\/\/arxiv.org\/abs\/1704.04861","journal-title":"CoRR"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-3596-6_4"},{"key":"e_1_3_2_1_38_1","unstructured":"IntelAI \"Models\/benchmarks at master \u00b7 intelai\/models.\" [Online]. Available: https:\/\/github.com\/IntelAI\/models\/tree\/master\/benchmarks"},{"key":"e_1_3_2_1_39_1","first-page":"15","volume-title":"Sparsity-aware and re-configurable npu architecture for samsung flagship mobile soc,\" in 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA)","author":"Jang J.-W.","year":"2021","unstructured":"J.-W. Jang, S. Lee, D. Kim, H. Park, A. S. Ardestani, Y. Choi, C. Kim, Y. Kim, H. Yu, H. Abdel-Aziz, J.-S. Park, H. Lee, D. Lee, M. W. Kim, H. Jung, H. Nam, D. Lim, S. Lee, J.-H. Song, S. Kwon, J. Hassoun, S. Lim, and C. Choi, \"Sparsity-aware and re-configurable npu architecture for samsung flagship mobile soc,\" in 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA), 2021, pp. 15--28."},{"key":"e_1_3_2_1_40_1","first-page":"06826","article-title":"Dissecting the NVIDIA volta GPU architecture via microbenchmarking","volume":"1804","author":"Jia Z.","year":"2018","unstructured":"Z. Jia, M. Maggioni, B. Staiger, and D. P. Scarpazza, \"Dissecting the NVIDIA volta GPU architecture via microbenchmarking,\" CoRR, vol. abs\/1804.06826, 2018. [Online]. Available: http:\/\/arxiv.org\/abs\/1804.06826","journal-title":"CoRR"},{"key":"e_1_3_2_1_41_1","volume-title":"Ultralytics yolov8","author":"Jocher G.","year":"2023","unstructured":"G. Jocher, A. Chaurasia, and J. Qiu, \"Ultralytics yolov8,\" 2023. [Online]. Available: https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/2925426.2926294"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.37"},{"key":"e_1_3_2_1_45_1","volume-title":"I-bert: Integer-only bert quantization,\" International Conference on Machine Learning (Accepted)","author":"Kim S.","year":"2021","unstructured":"S. Kim, A. Gholami, Z. Yao, M. W. Mahoney, and K. Keutzer, \"I-bert: Integer-only bert quantization,\" International Conference on Machine Learning (Accepted), 2021."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1147\/rd.282.0135"},{"key":"e_1_3_2_1_47_1","first-page":"08830","article-title":"Apack: Off-chip, lossless data compression for efficient deep learning inference","volume":"2201","author":"Lascorz A. D.","year":"2022","unstructured":"A. D. Lascorz, M. Mahmoud, and A. Moshovos, \"Apack: Off-chip, lossless data compression for efficient deep learning inference,\" CoRR, vol. abs\/2201.08830, 2022. [Online]. Available: https:\/\/arxiv.org\/abs\/2201.08830","journal-title":"CoRR"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358295"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2984191"},{"key":"e_1_3_2_1_50_1","first-page":"1","volume-title":"Design and evaluation of an fpga-based hardware accelerator for deflate data decompression,\" in 2019 IEEE Canadian Conference of Electrical and Computer Engineering (CCECE)","author":"Ledwon M.","year":"2019","unstructured":"M. Ledwon, B. F. Cockburn, and J. Han, \"Design and evaluation of an fpga-based hardware accelerator for deflate data decompression,\" in 2019 IEEE Canadian Conference of Electrical and Computer Engineering (CCECE), 2019, pp. 1--6."},{"key":"e_1_3_2_1_51_1","first-page":"09671","article-title":"Pruning and quantization for deep neural network acceleration: A survey","volume":"2101","author":"Liang T.","year":"2021","unstructured":"T. Liang, J. Glossner, L. Wang, and S. Shi, \"Pruning and quantization for deep neural network acceleration: A survey,\" CoRR, vol. abs\/2101.09671, 2021. [Online]. Available: https:\/\/arxiv.org\/abs\/2101.09671","journal-title":"CoRR"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-12087-4"},{"key":"e_1_3_2_1_53_1","first-page":"04275","article-title":"Pruning algorithms to accelerate convolutional neural networks for edge applications: A survey","volume":"2005","author":"Liu J.","year":"2020","unstructured":"J. Liu, S. Tripathi, U. Kurup, and M. Shah, \"Pruning algorithms to accelerate convolutional neural networks for edge applications: A survey,\" CoRR, vol. abs\/2005.04275, 2020. [Online]. Available: https:\/\/arxiv.org\/abs\/2005.04275","journal-title":"CoRR"},{"key":"e_1_3_2_1_54_1","first-page":"393","volume-title":"Cambricon: An instruction set architecture for neural networks,\" in 2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA)","author":"Liu S.","year":"2016","unstructured":"S. Liu, Z. Du, J. Tao, D. Han, T. Luo, Y. Xie, Y. Chen, and T. Chen, \"Cambricon: An instruction set architecture for neural networks,\" in 2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA), 2016, pp. 393--405."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"e_1_3_2_1_56_1","volume-title":"Roberta: A robustly optimized bert pretraining approach,\" arXiv preprint arXiv:1907.11692","author":"Liu Y.","year":"2019","unstructured":"Y. Liu, M. Ott, N. Goyal, J. Du, M. Joshi, D. Chen, O. Levy, M. Lewis, L. Zettlemoyer, and V. Stoyanov, \"Roberta: A robustly optimized bert pretraining approach,\" arXiv preprint arXiv:1907.11692, 2019."},{"key":"e_1_3_2_1_57_1","volume-title":"loup Gailly and M. Adler, \"Zlib: A massively spiffy yet delicately unobtrusive compression library","author":"J.","year":"2023","unstructured":"J. loup Gailly and M. Adler, \"Zlib: A massively spiffy yet delicately unobtrusive compression library,\" 2023. [Online]. Available: https:\/\/www.zlib.net\/"},{"key":"e_1_3_2_1_58_1","first-page":"89","volume-title":"Object detection based on ssd-resnet,\" in 2019 IEEE 6th International Conference on Cloud Computing and Intelligence Systems (CCIS)","author":"Lu X.","year":"2019","unstructured":"X. Lu, X. Kang, S. Nishide, and F. Ren, \"Object detection based on ssd-resnet,\" in 2019 IEEE 6th International Conference on Cloud Computing and Intelligence Systems (CCIS), 2019, pp. 89--92."},{"key":"e_1_3_2_1_59_1","volume-title":"Shufflenet v2: Practical guidelines for efficient cnn architecture design","author":"Ma N.","year":"2018","unstructured":"N. Ma, X. Zhang, H.-T. Zheng, and J. Sun, \"Shufflenet v2: Practical guidelines for efficient cnn architecture design,\" 2018."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00020"},{"key":"e_1_3_2_1_61_1","first-page":"07843","article-title":"Pointer sentinel mixture models","volume":"1609","author":"Merity S.","year":"2016","unstructured":"S. Merity, C. Xiong, J. Bradbury, and R. Socher, \"Pointer sentinel mixture models,\" CoRR, vol. abs\/1609.07843, 2016. [Online]. Available: http:\/\/arxiv.org\/abs\/1609.07843","journal-title":"CoRR"},{"key":"e_1_3_2_1_62_1","unstructured":"I. Micron Technology \"DDR4 power calculator 4.0 \" https:\/\/www.micron.com\/~\/media\/documents\/products\/power-calculator\/ddr4_power_calc.xlsm."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2015.2435788"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3342555"},{"key":"e_1_3_2_1_65_1","article-title":"Arithmetic Coding + Statistical Modeling = Data Compression","volume":"1991","author":"Nelson M.","unstructured":"M. Nelson, \"Arithmetic Coding + Statistical Modeling = Data Compression,\" Dr. Dobb's Journal, vol. February, 1991.","journal-title":"Dr. Dobb's Journal"},{"key":"e_1_3_2_1_66_1","first-page":"82","volume-title":"Design, and Verification (ICDV)","author":"Nguyen Q.-L.","year":"2017","unstructured":"Q.-L. Nguyen, D.-L. Tran, D.-H. Bui, D.-T. Mai, and X.-T. Tran, \"Efficient binary arithmetic encoder for hevc with multiple bypass bin processing,\" in 2017 7th International Conference on Integrated Circuits, Design, and Verification (ICDV), 2017, pp. 82--87."},{"key":"e_1_3_2_1_67_1","volume-title":"Bitpruning: Learning bitlengths for aggressive and accurate quantization","author":"Nikoli\u0107 M.","year":"2020","unstructured":"M. Nikoli\u0107, G. B. Hacene, C. Bannon, A. D. Lascorz, M. Courbariaux, Y. Bengio, V. Gripon, and A. Moshovos, \"Bitpruning: Learning bitlengths for aggressive and accurate quantization,\" 2020."},{"key":"e_1_3_2_1_68_1","first-page":"165","volume-title":"Characterizing sources of ineffectual computations in deep learning networks,\" in 2019 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)","author":"Nikoli\u0107 M.","year":"2019","unstructured":"M. Nikoli\u0107, M. Mahmoud, A. Moshovos, Y. Zhao, and R. Mullins, \"Characterizing sources of ineffectual computations in deep learning networks,\" in 2019 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), 2019, pp. 165--176."},{"key":"e_1_3_2_1_69_1","volume-title":"Schr\u00f6dinger's fp: Dynamic adaptation of floating-point containers for deep learning training","author":"Nikoli\u0107 M.","year":"2022","unstructured":"M. Nikoli\u0107, E. T. Sanchez, J. Wang, A. H. Zadeh, M. Mahmoud, A. Abdelhadi, and A. Moshovos, \"Schr\u00f6dinger's fp: Dynamic adaptation of floating-point containers for deep learning training,\" 2022. [Online]. Available: https:\/\/arxiv.org\/abs\/2204.13666"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080254"},{"key":"e_1_3_2_1_71_1","first-page":"688","volume-title":"IEEE Computer Society","author":"Park E.","year":"2018","unstructured":"E. Park, D. Kim, and S. Yoo, \"Energy-efficient neural network accelerator based on outlier-aware low-precision computation,\" in ISCA. IEEE Computer Society, 2018, pp. 688--698."},{"key":"e_1_3_2_1_72_1","unstructured":"G. G. Pekhimenko \"Practical Data Compression for Modern Memory Hierarchies \" 2016."},{"key":"e_1_3_2_1_73_1","volume-title":"Language models are unsupervised multitask learners","author":"Radford A.","year":"2019","unstructured":"A. Radford, J. Wu, R. Child, D. Luan, D. Amodei, and I. Sutskever, \"Language models are unsupervised multitask learners,\" 2019."},{"key":"e_1_3_2_1_74_1","first-page":"2383","article-title":"Squad: 100,000+ questions for machine comprehension of text,\" in Proceedings of the 2016 Conference on Empirical Methods","author":"Rajpurkar P.","year":"2016","unstructured":"P. Rajpurkar, J. Zhang, K. Lopyrev, and P. Liang, \"Squad: 100,000+ questions for machine comprehension of text,\" in Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing, 2016, pp. 2383--2392.","journal-title":"Natural Language Processing"},{"key":"e_1_3_2_1_75_1","first-page":"139","volume-title":"CSLDAMT '10","author":"Rashtchian C.","year":"2010","unstructured":"C. Rashtchian, P. Young, M. Hodosh, and J. Hockenmaier, \"Collecting image annotations using amazon's mechanical turk,\" in Proceedings of the NAACL HLT 2010 Workshop on Creating Speech and Language Data with Amazon's Mechanical Turk, ser. CSLDAMT '10. Stroudsburg, PA, USA: Association for Computational Linguistics, 2010, pp. 139--147. [Online]. Available: http:\/\/dl.acm.org\/citation.cfm?id=1866696.1866717"},{"key":"e_1_3_2_1_76_1","volume-title":"Mlperf inference benchmark","author":"Reddi V. J.","year":"2019","unstructured":"V. J. Reddi, C. Cheng, D. Kanter, P. Mattson, G. Schmuelling, C.-J. Wu, B. Anderson, M. Breughe, M. Charlebois, W. Chou, R. Chukka, C. Coleman, S. Davis, P. Deng, G. Diamos, J. Duke, D. Fick, J. S. Gardner, I. Hubara, S. Idgunji, T. B. Jablin, J. Jiao, T. S. John, P. Kanwar, D. Lee, J. Liao, A. Lokhmotov, F. Massa, P. Meng, P. Micikevicius, C. Osborne, G. Pekhimenko, A. T. R. Rajan, D. Sequeira, A. Sirasao, F. Sun, H. Tang, M. Thomson, F. Wei, E. Wu, L. Xu, K. Yamada, B. Yu, G. Yuan, A. Zhong, P. Zhang, and Y. Zhou, \"Mlperf inference benchmark,\" 2019."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00017"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1147\/rd.232.0149"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1147\/rd.203.0198"},{"key":"e_1_3_2_1_80_1","volume-title":"Sep.","author":"Russakovsky O.","year":"2014","unstructured":"O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh, S. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein, A. C. Berg, and L. Fei-Fei, \"ImageNet Large Scale Visual Recognition Challenge,\" arXiv:1409.0575 [cs], Sep. 2014, arXiv: 1409.0575."},{"key":"e_1_3_2_1_81_1","volume-title":"Improving the speed of lz77 compression by hashing and suffix sorting,\" IEICE transactions on fundamentals of electronics, communications and computer sciences","author":"Sadakane K.","unstructured":"K. Sadakane and H. Imai, \"Improving the speed of lz77 compression by hashing and suffix sorting,\" IEICE transactions on fundamentals of electronics, communications and computer sciences, vol. 83, no. 12, pp. 2689--2698, 2000."},{"key":"e_1_3_2_1_82_1","first-page":"04381","article-title":"Inverted residuals and linear bottlenecks: Mobile networks for classification, detection and segmentation","volume":"1801","author":"Sandler M.","year":"2018","unstructured":"M. Sandler, A. G. Howard, M. Zhu, A. Zhmoginov, and L. Chen, \"Inverted residuals and linear bottlenecks: Mobile networks for classification, detection and segmentation,\" CoRR, vol. abs\/1801.04381, 2018. [Online]. Available: http:\/\/arxiv.org\/abs\/1801.04381","journal-title":"CoRR"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1109\/HCS49909.2020.9220508"},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.5555\/2987777"},{"key":"e_1_3_2_1_85_1","volume-title":"Memory requirements for convolutional neural network hardware accelerators,\" in IEEE International Symposium on Workload Characterization","author":"Siu K.","year":"2018","unstructured":"K. Siu, D. M. Stuart, M. Mahmoud, and A. Moshovos, \"Memory requirements for convolutional neural network hardware accelerators,\" in IEEE International Symposium on Workload Characterization, 2018."},{"key":"e_1_3_2_1_86_1","volume-title":"Inception-v4, inception-resnet and the impact of residual connections on learning","author":"Szegedy C.","year":"2016","unstructured":"C. Szegedy, S. Ioffe, V. Vanhoucke, and A. Alemi, \"Inception-v4, inception-resnet and the impact of residual connections on learning,\" 2016."},{"key":"e_1_3_2_1_87_1","first-page":"1","volume-title":"Going deeper with convolutions,\" in 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Szegedy C.","year":"2015","unstructured":"C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich, \"Going deeper with convolutions,\" in 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2015, pp. 1--9."},{"key":"e_1_3_2_1_88_1","volume-title":"Rethinking the inception architecture for computer vision","author":"Szegedy C.","year":"2015","unstructured":"C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna, \"Rethinking the inception architecture for computer vision,\" 2015."},{"key":"e_1_3_2_1_89_1","first-page":"00298","article-title":"Efficientnetv2: Smaller models and faster training","volume":"2104","author":"Tan M.","year":"2021","unstructured":"M. Tan and Q. V. Le, \"Efficientnetv2: Smaller models and faster training,\" CoRR, vol. abs\/2104.00298, 2021. [Online]. Available: https:\/\/arxiv.org\/abs\/2104.00298","journal-title":"CoRR"},{"key":"e_1_3_2_1_90_1","volume-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron H.","year":"2023","unstructured":"H. Touvron, L. Martin, K. Stone, P. Albert, A. Almahairi, Y. Babaei, N. Bashlykov, S. Batra, P. Bhargava, S. Bhosale, D. Bikel, L. Blecher, C. C. Ferrer, M. Chen, G. Cucurull, D. Esiobu, J. Fernandes, J. Fu, W. Fu, B. Fuller, C. Gao, V. Goswami, N. Goyal, A. Hartshorn, S. Hosseini, R. Hou, H. Inan, M. Kardas, V. Kerkez, M. Khabsa, I. Kloumann, A. Korenev, P. S. Koura, M.-A. Lachaux, T. Lavril, J. Lee, D. Liskovich, Y. Lu, Y. Mao, X. Martinet, T. Mihaylov, P. Mishra, I. Molybog, Y. Nie, A. Poulton, J. Reizenstein, R. Rungta, K. Saladi, A. Schelten, R. Silva, E. M. Smith, R. Subramanian, X. E. Tan, B. Tang, R. Taylor, A. Williams, J. X. Kuan, P. Xu, Z. Yan, I. Zarov, Y. Zhang, A. Fan, M. Kambadur, S. Narang, A. Rodriguez, R. Stojnic, S. Edunov, and T. Scialom, \"Llama 2: Open foundation and fine-tuned chat models,\" 2023."},{"key":"e_1_3_2_1_91_1","first-page":"07461","article-title":"GLUE: A multi-task benchmark and analysis platform for natural language understanding","volume":"1804","author":"Wang A.","year":"2018","unstructured":"A. Wang, A. Singh, J. Michael, F. Hill, O. Levy, and S. R. Bowman, \"GLUE: A multi-task benchmark and analysis platform for natural language understanding,\" CoRR, vol. abs\/1804.07461, 2018. [Online]. Available: http:\/\/arxiv.org\/abs\/1804.07461","journal-title":"CoRR"},{"key":"e_1_3_2_1_92_1","first-page":"988","volume-title":"ACM","author":"Wang C.","year":"2016","unstructured":"C. Wang, H. Yang, C. Bartz, and C. Meinel, \"Image captioning with deep bidirectional lstms,\" in Proceedings of the 2016 ACM on Multimedia Conference. ACM, 2016, pp. 988--997."},{"key":"e_1_3_2_1_93_1","first-page":"08318","article-title":"Deepcabac: Context-adaptive binary arithmetic coding for deep neural network compression","volume":"1905","author":"Wiedemann S.","year":"2019","unstructured":"S. Wiedemann, H. Kirchhoffer, S. Matlage, P. Haase, A. Marb\u00e1n, T. Marinc, D. Neumann, A. Osman, D. Marpe, H. Schwarz, T. Wiegand, and W. Samek, \"Deepcabac: Context-adaptive binary arithmetic coding for deep neural network compression,\" CoRR, vol. abs\/1905.08318, 2019. [Online]. Available: http:\/\/arxiv.org\/abs\/1905.08318","journal-title":"CoRR"},{"key":"e_1_3_2_1_94_1","volume-title":"the free encyclopedia,\" http:\/\/en.wikipedia.org\/w\/index.php?title=Lempel%E2%80%93Ziv%E2%80%93Markov%20chain%20algorithm&oldid=1160826469","author":"Wikipedia Ziv-Markov","year":"2023","unstructured":"Wikipedia, \"Lempel-Ziv-Markov chain algorithm --- Wikipedia, the free encyclopedia,\" http:\/\/en.wikipedia.org\/w\/index.php?title=Lempel%E2%80%93Ziv%E2%80%93Markov%20chain%20algorithm&oldid=1160826469, 2023, [Online; accessed 18-September-2023]."},{"key":"e_1_3_2_1_95_1","first-page":"03771","article-title":"Huggingface's transformers: State-of-the-art natural language processing","volume":"1910","author":"Wolf T.","year":"2019","unstructured":"T. Wolf, L. Debut, V. Sanh, J. Chaumond, C. Delangue, A. Moi, P. Cistac, T. Rault, R. Louf, M. Funtowicz, and J. Brew, \"Huggingface's transformers: State-of-the-art natural language processing,\" ArXiv, vol. abs\/1910.03771, 2019.","journal-title":"ArXiv"},{"key":"e_1_3_2_1_96_1","volume-title":"Aggregated residual transformations for deep neural networks,\" arXiv preprint arXiv:1611.05431","author":"Xie S.","year":"2016","unstructured":"S. Xie, R. Girshick, P. Doll\u00e1r, Z. Tu, and K. He, \"Aggregated residual transformations for deep neural networks,\" arXiv preprint arXiv:1611.05431, 2016."},{"key":"e_1_3_2_1_97_1","volume-title":"IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Chen Tien-Ju","year":"2017","unstructured":"Yang, Tien-Ju and Chen, Yu-Hsin and Sze, Vivienne, \"Designing Energy-Efficient Convolutional Neural Networks using Energy-Aware Pruning,\" in IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017."},{"key":"e_1_3_2_1_98_1","first-page":"477","volume-title":"Embedded image-domain compression using context models,\" in Proceedings 1999 International Conference on Image Processing (Cat. 99CH36348)","author":"Yoo Y.","year":"1999","unstructured":"Y. Yoo, Y. G. Kwon, and A. Ortega, \"Embedded image-domain compression using context models,\" in Proceedings 1999 International Conference on Image Processing (Cat. 99CH36348), vol. 1, 1999, pp. 477--481 vol.1."},{"key":"e_1_3_2_1_99_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00071"},{"key":"e_1_3_2_1_100_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527438"},{"key":"e_1_3_2_1_101_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1977.1055714"},{"key":"e_1_3_2_1_102_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.1297430"}],"event":{"name":"ASPLOS '24: 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","location":"La Jolla CA USA","acronym":"ASPLOS '24","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620665.3640356","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3620665.3640356","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T22:29:27Z","timestamp":1750285767000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620665.3640356"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,27]]},"references-count":102,"alternative-id":["10.1145\/3620665.3640356","10.1145\/3620665"],"URL":"https:\/\/doi.org\/10.1145\/3620665.3640356","relation":{},"subject":[],"published":{"date-parts":[[2024,4,27]]},"assertion":[{"value":"2024-04-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}