{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T00:09:05Z","timestamp":1779322145840,"version":"3.51.4"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032238702","type":"print"},{"value":"9783032238719","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-23871-9_11","type":"book-chapter","created":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T23:54:10Z","timestamp":1779321250000},"page":"132-144","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Training for\u00a0Mixed-Precision Integer Weights, Activations and\u00a0Embeddings in\u00a0BERT"],"prefix":"10.1007","author":[{"given":"C\u00e9dric","family":"Gernigon","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xavier","family":"Pillet","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anastasia","family":"Volkova","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Richard","family":"Dufour","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,5,1]]},"reference":[{"key":"11_CR1","unstructured":"Bai, H., et al.: Binarybert: pushing the limit of Bert quantization. arXiv preprint arXiv:2012.15701 (2020)"},{"key":"11_CR2","unstructured":"Banner, R., Nahshan, Y., Soudry, D.: Post training 4-bit quantization of convolutional networks for rapid-deployment. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"11_CR3","unstructured":"Bengio, Y., L\u00e9onard, N., Courville, A.: Estimating or propagating gradients through stochastic neurons for conditional computation. arXiv preprint arXiv:1308.3432 (2013)"},{"key":"11_CR4","doi-asserted-by":"crossref","unstructured":"Bhalgat, Y., Lee, J., Nagel, M., Blankevoort, T., Kwak, N.: LSQ+: improving low-bit quantization through learnable offsets and better initialization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, pp. 696\u2013697 (2020)","DOI":"10.1109\/CVPRW50498.2020.00356"},{"key":"11_CR5","unstructured":"Choi, J., Wang, Z., Venkataramani, S., Chuang, P.I.J., Srinivasan, V., Gopalakrishnan, K.: PACT: parameterized clipping activation for quantized neural networks. arXiv preprint arXiv:1805.06085 (2018)"},{"key":"11_CR6","unstructured":"Courbariaux, M., Bengio, Y., David, J.P.: BinaryConnect: training deep neural networks with binary weights during propagations. In: Advances in Neural Information Processing Systems 28 \u2013 NIPS 2015. NIPS\u201915, vol.\u00a02, pp. 3123\u20133131. MIT Press (2015)"},{"key":"11_CR7","unstructured":"Courbariaux, M., Hubara, I., Soudry, D., El-Yaniv, R., Bengio, Y.: Binarized neural networks: training deep neural networks with weights and activations constrained to $$+1$$ or $$-1$$. arXiv preprint arXiv:1602.02830 (2016)"},{"key":"11_CR8","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL-HLT. vol.\u00a01, p.\u00a02 (2019)"},{"key":"11_CR9","doi-asserted-by":"crossref","unstructured":"Dong, Z., Yao, Z., Gholami, A., Mahoney, M.W., Keutzer, K.: HAWQ: hessian aware quantization of neural networks with mixed-precision. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 293\u2013302 (2019)","DOI":"10.1109\/ICCV.2019.00038"},{"key":"11_CR10","unstructured":"Esser, S.K., McKinstry, J.L., Bablani, D., Appuswamy, R., Modha, D.S.: Learned step size quantization. arXiv preprint arXiv:1902.08153 (2019)"},{"key":"11_CR11","doi-asserted-by":"crossref","unstructured":"Gernigon, C., Filip, S.I., Sentieys, O., Coggiola, C., Bruno, M.: Adaqat: adaptive bit-width quantization-aware training. In: 2024 IEEE 6th International Conference on AI Circuits and Systems (AICAS), pp. 442\u2013446. IEEE (2024)","DOI":"10.1109\/AICAS59952.2024.10595895"},{"key":"11_CR12","doi-asserted-by":"crossref","unstructured":"Gordon, M.A., Duh, K., Andrews, N.: Compressing Bert: studying the effects of weight pruning on transfer learning. arXiv preprint arXiv:2002.08307 (2020)","DOI":"10.18653\/v1\/2020.repl4nlp-1.18"},{"key":"11_CR13","unstructured":"Huang, X., et al.: SDQ: stochastic differentiable quantization with mixed precision. In: International Conference on Machine Learning, pp. 9295\u20139309. PMLR (2022)"},{"key":"11_CR14","doi-asserted-by":"crossref","unstructured":"Jiao, X., et al.: Tinybert: distilling Bert for natural language understanding. arXiv preprint arXiv:1909.10351 (2019)","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"key":"11_CR15","first-page":"14651","volume":"35","author":"A Kuzmin","year":"2022","unstructured":"Kuzmin, A., Van Baalen, M., Ren, Y., Nagel, M., Peters, J., Blankevoort, T.: Fp8 quantization: the power of the exponent. Adv. Neural. Inf. Process. Syst. 35, 14651\u201314662 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"11_CR16","unstructured":"Liu, J., Cai, J., Zhuang, B.: Sharpness-aware quantization for deep neural networks. arXiv preprint arXiv:2111.12273 (2021)"},{"key":"11_CR17","unstructured":"Liu, S.Y., Liu, Z., Cheng, K.T.: Oscillation-free quantization for low-bit vision transformers. In: International Conference on Machine Learning, pp. 21813\u201321824. PMLR (2023)"},{"key":"11_CR18","unstructured":"Ma, Y., et al.: OMPQ: orthogonal mixed precision quantization. arXiv preprint arXiv:2109.07865 (2021)"},{"key":"11_CR19","unstructured":"Nagel, M., Fournarakis, M., Bondarenko, Y., Blankevoort, T.: Overcoming oscillations in quantization-aware training. In: International Conference on Machine Learning, pp. 16318\u201316330. PMLR (2022)"},{"key":"11_CR20","unstructured":"Nikoli\u0107, M., et al.: BitPruning: learning bitlengths for aggressive and accurate quantization. arXiv preprint arXiv:2002.03090 (2020)"},{"key":"11_CR21","unstructured":"Qin, H., et al.: Bibert: accurate fully binarized Bert. arXiv preprint arXiv:2203.06390 (2022)"},{"key":"11_CR22","doi-asserted-by":"crossref","unstructured":"Rajpurkar, P.: Squad: 100,000+ questions for machine comprehension of text. arXiv preprint arXiv:1606.05250 (2016)","DOI":"10.18653\/v1\/D16-1264"},{"key":"11_CR23","doi-asserted-by":"crossref","unstructured":"Rajpurkar, P., Jia, R., Liang, P.: Know what you don\u2019t know: unanswerable questions for squad. arXiv preprint arXiv:1806.03822 (2018)","DOI":"10.18653\/v1\/P18-2124"},{"key":"11_CR24","unstructured":"Sanh, V., Debut, L., Chaumond, J., Wolf, T.: Distilbert, a distilled version of Bert: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108 (2019)"},{"key":"11_CR25","doi-asserted-by":"crossref","unstructured":"Shen, S., et al.: Q-Bert: hessian based ultra low precision quantization of Bert. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a034, pp. 8815\u20138821 (2020)","DOI":"10.1609\/aaai.v34i05.6409"},{"key":"11_CR26","unstructured":"Tang, H., Zhang, X., Liu, K., Zhu, J., Kang, Z.: MKQ-Bert: quantized Bert with 4-bits weights and activations. arXiv preprint arXiv:2203.13483 (2022)"},{"key":"11_CR27","doi-asserted-by":"publisher","first-page":"475","DOI":"10.1023\/A:1017501703105","volume":"109","author":"P Tseng","year":"2001","unstructured":"Tseng, P.: Convergence of a block coordinate descent method for nondifferentiable minimization. J. Optim. Theory Appl. 109, 475\u2013494 (2001)","journal-title":"J. Optim. Theory Appl."},{"key":"11_CR28","doi-asserted-by":"crossref","unstructured":"Wang, A.: Glue: a multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461 (2018)","DOI":"10.18653\/v1\/W18-5446"},{"key":"11_CR29","doi-asserted-by":"publisher","unstructured":"Wang, K., Liu, Z., Lin, Y., Lin, J., Han, S.: HAQ: hardware-aware automated quantization with mixed precision. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00881","DOI":"10.1109\/CVPR.2019.00881"},{"key":"11_CR30","unstructured":"Wang, Z., Li, J.B., Qu, S., Metze, F., Strubell, E.: Squat: sharpness-and quantization-aware training for Bert. arXiv preprint arXiv:2210.07171 (2022)"},{"key":"11_CR31","unstructured":"Wu, B., Wang, Y., Zhang, P., Tian, Y., Vajda, P., Keutzer, K.: Mixed precision quantization of convnets via differentiable neural architecture search. arXiv preprint arXiv:1812.00090 (2018)"},{"issue":"2","key":"11_CR32","doi-asserted-by":"publisher","first-page":"700","DOI":"10.1007\/s10915-017-0376-0","volume":"72","author":"Y Xu","year":"2017","unstructured":"Xu, Y., Yin, W.: A globally convergent algorithm for nonconvex optimization based on block coordinate update. J. Sci. Comput. 72(2), 700\u2013734 (2017)","journal-title":"J. Sci. Comput."},{"key":"11_CR33","doi-asserted-by":"crossref","unstructured":"Yang, L., Jin, Q.: FracBits: mixed precision quantization via fractional bit-widths. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 10612\u201310620 (2021)","DOI":"10.1609\/aaai.v35i12.17269"},{"key":"11_CR34","unstructured":"Yao, Z., et\u00a0al.: HAWQ-V3: dyadic neural network quantization. In: International Conference on Machine Learning, pp. 11875\u201311886. PMLR (2021)"},{"key":"11_CR35","doi-asserted-by":"crossref","unstructured":"Zadeh, A.H., Edo, I., Awad, O.M., Moshovos, A.: Gobo: quantizing attention-based NLP models for low latency and energy efficient inference. In: 2020 53rd Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO), pp. 811\u2013824. IEEE (2020)","DOI":"10.1109\/MICRO50266.2020.00071"},{"key":"11_CR36","doi-asserted-by":"crossref","unstructured":"Zafrir, O., Boudoukh, G., Izsak, P., Wasserblat, M.: Q8bert: quantized 8bit Bert. In: 2019 Fifth Workshop on Energy Efficient Machine Learning and Cognitive Computing-NeurIPS Edition (EMC2-NIPS), pp. 36\u201339. IEEE (2019)","DOI":"10.1109\/EMC2-NIPS53020.2019.00016"},{"key":"11_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, W., et al.: Ternarybert: distillation-aware ultra-low bit bert. arXiv preprint arXiv:2009.12812 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.37"},{"key":"11_CR38","doi-asserted-by":"crossref","unstructured":"Zhao, C., Hua, T., Shen, Y., Lou, Q., Jin, H.: Automatic mixed-precision quantization search of Bert. arXiv preprint arXiv:2112.14938 (2021)","DOI":"10.24963\/ijcai.2021\/472"},{"key":"11_CR39","unstructured":"Zhou, S., Wu, Y., Ni, Z., Zhou, X., Wen, H., Zou, Y.: DoReFa-Net: training low bitwidth convolutional neural networks with low bitwidth gradients. arXiv:1606.06160 (2016)"}],"container-title":["Lecture Notes in Computer Science","Design and Architecture for Signal and Image Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-23871-9_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T23:54:16Z","timestamp":1779321256000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-23871-9_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032238702","9783032238719"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-23871-9_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"1 May 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DASIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Design and Architectures for Signal and Image Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Krakow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Poland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2026","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 January 2026","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 January 2026","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dasip2026","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/dasip-2026.github.io\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}