{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T06:08:10Z","timestamp":1769580490561,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":41,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819620630","type":"print"},{"value":"9789819620647","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,12,28]],"date-time":"2024-12-28T00:00:00Z","timestamp":1735344000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,28]],"date-time":"2024-12-28T00:00:00Z","timestamp":1735344000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-2064-7_20","type":"book-chapter","created":{"date-parts":[[2024,12,27]],"date-time":"2024-12-27T19:25:40Z","timestamp":1735327540000},"page":"270-284","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Quantized-ViT Efficient Training via\u00a0Fisher Matrix Regularization"],"prefix":"10.1007","author":[{"given":"Yuzhang","family":"Shang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gaowen","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ramana","family":"Kompella","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yan","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,28]]},"reference":[{"key":"20_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1007\/978-3-030-58517-4_9","volume-title":"Computer Vision \u2013 ECCV 2020","author":"S Agarwal","year":"2020","unstructured":"Agarwal, S., Arora, H., Anand, S., Arora, C.: Contextual diversity for active learning. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12361, pp. 137\u2013153. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58517-4_9"},{"key":"20_CR2","unstructured":"Bengio, Y., L\u00e9onard, N., Courville, A.: Estimating or propagating gradients through stochastic neurons for conditional computation. arXiv:1308.3432 (2013)"},{"key":"20_CR3","unstructured":"Chen, Y., Welling, M., Smola, A.: Super-samples from kernel herding. In: UAI (2012)"},{"key":"20_CR4","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"20_CR5","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv:2010.11929 (2020)"},{"key":"20_CR6","unstructured":"Esser, S.K., McKinstry, J.L., Bablani, D., Appuswamy, R., Modha, D.S.: Learned step size quantization. arXiv:1902.08153 (2019)"},{"key":"20_CR7","doi-asserted-by":"crossref","unstructured":"Ganesh, P., et al.: Compressing large-scale transformer-based models: a case study on BERT. TACL (2021)","DOI":"10.1162\/tacl_a_00413"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Gholami, A., Kim, S., Dong, Z., Yao, Z., Mahoney, M.W., Keutzer, K.: A survey of quantization methods for efficient neural network inference. In: Low-Power Computer Vision (2022)","DOI":"10.1201\/9781003162810-13"},{"key":"20_CR9","doi-asserted-by":"crossref","unstructured":"Guo, C., Zhao, B., Bai, Y.: Deepcore: a comprehensive library for coreset selection in deep learning. In: DEXA (2022)","DOI":"10.1007\/978-3-031-12423-5_14"},{"key":"20_CR10","unstructured":"Han, K., et\u00a0al.: A survey on vision transformer. TPAMI (2022)"},{"key":"20_CR11","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-84858-7","volume-title":"The Elements of Statistical Learning: Data Mining, Inference, and Prediction","author":"T Hastie","year":"2009","unstructured":"Hastie, T., Tibshirani, R., Friedman, J.H., Friedman, J.H.: The Elements of Statistical Learning: Data Mining, Inference, and Prediction. Springer, New York (2009). https:\/\/doi.org\/10.1007\/978-0-387-84858-7"},{"key":"20_CR12","unstructured":"Hubara, I., Courbariaux, M., Soudry, D., El-Yaniv, R., Bengio, Y.: Binarized neural networks. In: NeurIPS (2016)"},{"key":"20_CR13","unstructured":"Kao, T.C., Jensen, K., van\u00a0de Ven, G., Bernacchia, A., Hennequin, G.: Natural continual learning: success is a journey, not (just) a destination. In: NeurIPS (2021)"},{"key":"20_CR14","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv:1412.6980 (2014)"},{"key":"20_CR15","doi-asserted-by":"crossref","unstructured":"Kirkpatrick, J., et\u00a0al.: Overcoming catastrophic forgetting in neural networks. PNAS (2017)","DOI":"10.1073\/pnas.1611835114"},{"key":"20_CR16","unstructured":"Krizhevsky, A., Hinton, G., et\u00a0al.: Learning multiple layers of features from tiny images (2009)"},{"key":"20_CR17","unstructured":"Li, Y., Xu, S., Zhang, B., Cao, X., Gao, P., Guo, G.: Q-ViT: accurate and fully quantized low-bit vision transformer. In: NeurIPS (2022)"},{"key":"20_CR18","unstructured":"Li, Y., et al.: BRECQ: pushing the limit of post-training quantization by block reconstruction. In: ICLR (2021)"},{"key":"20_CR19","doi-asserted-by":"crossref","unstructured":"Liu, J., Niu, L., Yuan, Z., Yang, D., Wang, X., Liu, W.: PD-quant: post-training quantization based on prediction difference metric. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.02340"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"20_CR21","unstructured":"Nagel, M., Amjad, R.A., Van\u00a0Baalen, M., Louizos, C., Blankevoort, T.: Up or down? adaptive rounding for post-training quantization. In: ICML (2020)"},{"key":"20_CR22","doi-asserted-by":"crossref","unstructured":"Nagel, M., Baalen, M.v., Blankevoort, T., Welling, M.: Data-free quantization through weight equalization and bias correction. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00141"},{"key":"20_CR23","unstructured":"Nagel, M., Fournarakis, M., Amjad, R.A., Bondarenko, Y., van Baalen, M., Blankevoort, T.: A white paper on neural network quantization. CoRR (2021)"},{"key":"20_CR24","unstructured":"Paszke, A., et\u00a0al.: Pytorch: an imperative style, high-performance deep learning library. In: NeurIPS (2019)"},{"key":"20_CR25","unstructured":"Ritter, H., Botev, A., Barber, D.: Online structured laplace approximations for overcoming catastrophic forgetting. In: NeurIPS (2018)"},{"key":"20_CR26","unstructured":"Sener, O., Savarese, S.: Active learning for convolutional neural networks: a core-set approach. In: ICLR (2018)"},{"key":"20_CR27","doi-asserted-by":"crossref","unstructured":"Shang, Y., Liu, G., Kompella, R.R., Yan, Y.: Enhancing post-training quantization calibration through contrastive learning. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01507"},{"key":"20_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"603","DOI":"10.1007\/978-3-031-20083-0_36","volume-title":"Computer Vision - ECCV 2022","author":"Y Shang","year":"2022","unstructured":"Shang, Y., Xu, D., Duan, B., Zong, Z., Nie, L., Yan, Y.: Lipschitz continuity retained binary neural network. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13671, pp. 603\u2013619. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20083-0_36"},{"key":"20_CR29","doi-asserted-by":"crossref","unstructured":"Shang, Y., Xu, D., Liu, G., Kompella, R.R., Yan, Y.: Efficient multitask dense predictor via binarization. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01505"},{"key":"20_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"586","DOI":"10.1007\/978-3-031-20083-0_35","volume-title":"Computer Vision - ECCV 2022","author":"Y Shang","year":"2022","unstructured":"Shang, Y., Xu, D., Zong, Z., Nie, L., Yan, Y.: Network binarization via contrastive learning. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13671, pp. 586\u2013602. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20083-0_35"},{"key":"20_CR31","doi-asserted-by":"crossref","unstructured":"Shang, Y., Yuan, Z., Xie, B., Wu, B., Yan, Y.: Post-training quantization on diffusion models. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00196"},{"key":"20_CR32","unstructured":"Sinha, S., Zhang, H., Goyal, A., Bengio, Y., Larochelle, H., Odena, A.: Small-GAN: speeding up GAN training using core-sets. In: ICML (2020)"},{"key":"20_CR33","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers & distillation through attention. In: ICML (2021)"},{"key":"20_CR34","unstructured":"Wang, L., et al.: AFEC: active forgetting of negative transfer in continual learning. In: NeurIPS (2021)"},{"key":"20_CR35","unstructured":"Wang, L., Zhang, X., Su, H., Zhu, J.: A comprehensive survey of continual learning: theory, method and application. arXiv:2302.00487 (2023)"},{"key":"20_CR36","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"20_CR37","unstructured":"Wei, X., Gong, R., Li, Y., Liu, X., Yu, F.: QDrop: randomly dropping quantization for extremely low-bit post-training quantization. In: ICLR (2022)"},{"key":"20_CR38","unstructured":"Yu, S., et al.: Unified visual transformer compression. arXiv:2203.08243 (2022)"},{"key":"20_CR39","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1007\/978-3-031-19775-8_12","volume-title":"Computer Vision - ECCV 2022","author":"Z Yuan","year":"2022","unstructured":"Yuan, Z., Xue, C., Chen, Y., Wu, Q., Sun, G.: Ptq4vit: Post-training quantization for vision transformers with twin uniform quantization. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13672, pp. 191\u2013207. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19775-8_12"},{"key":"20_CR40","unstructured":"Zhou, S., Wu, Y., Ni, Z., Zhou, X., Wen, H., Zou, Y.: DoReFa-net: training low bitwidth convolutional neural networks with low bitwidth gradients. arXiv:1606.06160 (2016)"},{"key":"20_CR41","unstructured":"Zhu, C., Han, S., Mao, H., Dally, W.J.: Trained ternary quantization. arXiv:1612.01064 (2016)"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-2064-7_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,27]],"date-time":"2024-12-27T20:04:40Z","timestamp":1735329880000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-2064-7_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,28]]},"ISBN":["9789819620630","9789819620647"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-2064-7_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,28]]},"assertion":[{"value":"28 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Nara","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 January 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 January 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/mmm2025.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}