{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T19:53:01Z","timestamp":1742932381543,"version":"3.40.3"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031545337"},{"type":"electronic","value":"9783031545344"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-54534-4_3","type":"book-chapter","created":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T10:39:18Z","timestamp":1710844758000},"page":"36-48","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Transformers Compression: A Study of\u00a0Matrix Decomposition Methods Using Fisher Information"],"prefix":"10.1007","author":[{"given":"Sergey","family":"Pletenev","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniil","family":"Moskovskiy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Viktoriia","family":"Chekalina","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mikhail","family":"Seleznyov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sergey","family":"Zagoruyko","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexander","family":"Panchenko","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,3,12]]},"reference":[{"issue":"4","key":"3_CR1","doi-asserted-by":"publisher","DOI":"10.1117\/1.2819119","volume":"16","author":"CM Bishop","year":"2007","unstructured":"Bishop, C.M., Nasrabadi, N.M.: Pattern recognition and machine learning. J. Electron. Imaging 16(4), 049901 (2007)","journal-title":"J. Electron. Imaging"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Cer, D.M., Diab, M.T., Agirre, E., Lopez-Gazpio, I., Specia, L.: Semeval-2017 task 1: semantic textual similarity - multilingual and cross-lingual focused evaluation. CoRR abs\/1708.00055 (2017). https:\/\/arxiv.org\/abs\/1708.00055","DOI":"10.18653\/v1\/S17-2001"},{"key":"3_CR3","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Burstein, J., Doran, C., Solorio, T. (eds.) Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Minneapolis, MN, USA, 2\u20137 June 2019, Volume 1 (Long and Short Papers), pp. 4171\u20134186. Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/v1\/n19-1423","DOI":"10.18653\/v1\/n19-1423"},{"key":"3_CR4","unstructured":"Dolan, W.B., Brockett, C.: Automatically constructing a corpus of sentential paraphrases. In: Proceedings of the Third International Workshop on Paraphrasing, IWP@IJCNLP 2005, Jeju Island, Korea, October 2005, 2005. Asian Federation of Natural Language Processing (2005). https:\/\/aclanthology.org\/I05-5002\/"},{"key":"3_CR5","unstructured":"Garipov, T., Podoprikhin, D., Novikov, A., Vetrov, D.P.: Ultimate tensorization: compressing convolutional and FC layers alike. CoRR abs\/1611.03214 (2016). https:\/\/arxiv.org\/abs\/1611.03214"},{"key":"3_CR6","doi-asserted-by":"publisher","DOI":"10.3389\/frai.2021.676564","volume":"4","author":"B Hawks","year":"2021","unstructured":"Hawks, B., Duarte, J.M., Fraser, N.J., Pappalardo, A., Tran, N., Umuroglu, Y.: PS and QS: quantization-aware pruning for efficient low latency neural network inference. Front. Artif. Intell. 4, 676564 (2021)","journal-title":"Front. Artif. Intell."},{"key":"3_CR7","doi-asserted-by":"publisher","unstructured":"He, Y., Zhang, X., Sun, J.: Channel pruning for accelerating very deep neural networks. In: IEEE International Conference on Computer Vision, ICCV 2017, Venice, Italy, 22\u201329 October 2017, pp. 1398\u20131406. IEEE Computer Society (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.155","DOI":"10.1109\/ICCV.2017.155"},{"key":"3_CR8","unstructured":"Hinton, G.E., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. CoRR abs\/1503.02531 (2015). https:\/\/arxiv.org\/abs\/1503.02531"},{"key":"3_CR9","doi-asserted-by":"publisher","unstructured":"Hrinchuk, O., Khrulkov, V., Mirvakhabova, L., Orlova, E.D., Oseledets, I.V.: Tensorized embedding layers. In: Cohn, T., He, Y., Liu, Y. (eds.) Findings of the Association for Computational Linguistics: EMNLP 2020, Online Event, 16\u201320 November 2020. Findings of ACL, vol. EMNLP 2020, pp. 4847\u20134860. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.436","DOI":"10.18653\/v1\/2020.findings-emnlp.436"},{"key":"3_CR10","unstructured":"Hsu, Y., Hua, T., Chang, S., Lou, Q., Shen, Y., Jin, H.: Language model compression with weighted low-rank factorization (2022). https:\/\/openreview.net\/forum?id=uPv9Y3gmAI5"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Hu, P., Peng, X., Zhu, H., Aly, M.M.S., Lin, J.: OPQ: compressing deep neural networks with one-shot pruning-quantization. In: Thirty-Fifth AAAI Conference on Artificial Intelligence, AAAI 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, IAAI 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, EAAI 2021, Virtual Event, 2\u20139 February 2021, pp. 7780\u20137788. AAAI Press (2021). https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/16950","DOI":"10.1609\/aaai.v35i9.16950"},{"key":"3_CR12","doi-asserted-by":"publisher","unstructured":"Jiao, X., et al.: Tinybert: distilling BERT for natural language understanding. In: Cohn, T., He, Y., Liu, Y. (eds.) Findings of the Association for Computational Linguistics: EMNLP 2020, Online Event, 16\u201320 November 2020. Findings of ACL, vol. EMNLP 2020, pp. 4163\u20134174. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.372","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"key":"3_CR13","doi-asserted-by":"publisher","unstructured":"Lagunas, F., Charlaix, E., Sanh, V., Rush, A.M.: Block pruning for faster transformers. In: Moens, M., Huang, X., Specia, L., Yih, S.W. (eds.) Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, EMNLP 2021, Virtual Event\/Punta Cana, Dominican Republic, 7\u201311 November 2021, pp. 10619\u201310629. Association for Computational Linguistics (2021). https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.829","DOI":"10.18653\/v1\/2021.emnlp-main.829"},{"key":"3_CR14","unstructured":"Lan, Z., Chen, M., Goodman, S., Gimpel, K., Sharma, P., Soricut, R.: ALBERT: a lite BERT for self-supervised learning of language representations. In: 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, 26\u201330 April 2020. OpenReview.net (2020). https:\/\/openreview.net\/forum?id=H1eA7AEtvS"},{"key":"3_CR15","doi-asserted-by":"publisher","unstructured":"Lewis, M., et al.: BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: Jurafsky, D., Chai, J., Schluter, N., Tetreault, J.R. (eds.) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, ACL 2020, Online, 5\u201310 July 2020, pp. 7871\u20137880. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.703","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"3_CR16","unstructured":"Li, H., Kadav, A., Durdanovic, I., Samet, H., Graf, H.P.: Pruning filters for efficient convnets. In: 5th International Conference on Learning Representations, ICLR 2017, Toulon, France, 24\u201326 April 2017, Conference Track Proceedings. OpenReview.net (2017). https:\/\/openreview.net\/forum?id=rJqFGTslg"},{"key":"3_CR17","doi-asserted-by":"publisher","unstructured":"Logacheva, V., et al.: Paradetox: detoxification with parallel data. In: Muresan, S., Nakov, P., Villavicencio, A. (eds.) Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), ACL 2022, Dublin, Ireland, 22\u201327 May 2022, pp. 6804\u20136818. Association for Computational Linguistics (2022). https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.469","DOI":"10.18653\/v1\/2022.acl-long.469"},{"key":"3_CR18","unstructured":"Michel, P., Levy, O., Neubig, G.: Are sixteen heads really better than one? In: Wallach, H.M., Larochelle, H., Beygelzimer, A., d\u2019Alch\u00e9-Buc, F., Fox, E.B., Garnett, R. (eds.) Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, 8\u201314 December 2019, Vancouver, BC, Canada, pp. 14014\u201314024 (2019). https:\/\/proceedings.neurips.cc\/paper\/2019\/hash\/2c601ad9d2ff9bc8b282670cdd54f69f-Abstract.html"},{"key":"3_CR19","doi-asserted-by":"publisher","unstructured":"Minh, H.P., Xuan, N.N., Son, T.T.: TT-ViT: vision transformer compression using tensor-train decomposition. In: Nguyen, N.T., Manolopoulos, Y., Chbeir, R., Kozierkiewicz, A., Trawinski, B. (eds.) ICCCI 2022. LNCS, vol. 13501, pp. 755\u2013767. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-16014-1_59","DOI":"10.1007\/978-3-031-16014-1_59"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Oseledets, I.V.: Tensor-train decomposition. SIAM J. Sci. Comput. 33, 2295\u20132317 (2011)","DOI":"10.1137\/090752286"},{"key":"3_CR21","unstructured":"Paszke, A., et al.: Pytorch: an imperative style, high-performance deep learning library. In: Wallach, H.M., Larochelle, H., Beygelzimer, A., d\u2019Alch\u00e9-Buc, F., Fox, E.B., Garnett, R. (eds.) Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, 8\u201314 December 2019, Vancouver, BC, Canada, pp. 8024\u20138035 (2019). https:\/\/proceedings.neurips.cc\/paper\/2019\/hash\/bdbca288fee7f92f2bfa9f7012727740-Abstract.html"},{"key":"3_CR22","unstructured":"Rahman, A., Ng, V.: Resolving complex cases of definite pronouns: the winograd schema challenge. In: Tsujii, J., Henderson, J., Pasca, M. (eds.) Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning, EMNLP-CoNLL 2012, 12\u201314 July 2012, Jeju Island, Korea, pp. 777\u2013789. ACL (2012). https:\/\/aclanthology.org\/D12-1071\/"},{"key":"3_CR23","unstructured":"Sanh, V., Debut, L., Chaumond, J., Wolf, T.: Distilbert, a distilled version of BERT: smaller, faster, cheaper and lighter. CoRR abs\/1910.01108 (2019). https:\/\/arxiv.org\/abs\/1910.01108"},{"key":"3_CR24","unstructured":"Sanh, V., Wolf, T., Rush, A.M.: Movement pruning: adaptive sparsity by fine-tuning. In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M., Lin, H. (eds.) Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, 6\u201312 December 2020, virtual (2020). https:\/\/proceedings.neurips.cc\/paper\/2020\/hash\/eae15aabaa768ae4a5993a8a4f4fa6e4-Abstract.html"},{"key":"3_CR25","unstructured":"Socher, R., et al.: Recursive deep models for semantic compositionality over a sentiment treebank. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing, EMNLP 2013, 18\u201321 October 2013, Grand Hyatt Seattle, Seattle, Washington, USA, A meeting of SIGDAT, a Special Interest Group of the ACL, pp. 1631\u20131642. ACL (2013). https:\/\/aclanthology.org\/D13-1170\/"},{"key":"3_CR26","doi-asserted-by":"publisher","unstructured":"Wang, A., Singh, A., Michael, J., Hill, F., Levy, O., Bowman, S.R.: GLUE: a multi-task benchmark and analysis platform for natural language understanding. In: Linzen, T., Chrupala, G., Alishahi, A. (eds.) Proceedings of the Workshop: Analyzing and Interpreting Neural Networks for NLP, BlackboxNLP@EMNLP 2018, Brussels, Belgium, 1 November 2018, pp. 353\u2013355. Association for Computational Linguistics (2018). https:\/\/doi.org\/10.18653\/v1\/w18-5446","DOI":"10.18653\/v1\/w18-5446"},{"key":"3_CR27","doi-asserted-by":"publisher","unstructured":"Wang, Z., Li, J.B., Qu, S., Metze, F., Strubell, E.: Squat: sharpness- and quantization-aware training for BERT. CoRR abs\/2210.07171 (2022). https:\/\/doi.org\/10.48550\/arXiv.2210.07171","DOI":"10.48550\/arXiv.2210.07171"},{"key":"3_CR28","doi-asserted-by":"publisher","first-page":"625","DOI":"10.1162\/tacl_a_00290","volume":"7","author":"A Warstadt","year":"2019","unstructured":"Warstadt, A., Singh, A., Bowman, S.R.: Neural network acceptability judgments. Trans. Assoc. Comput. Linguist. 7, 625\u2013641 (2019)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"3_CR29","doi-asserted-by":"publisher","unstructured":"Williams, A., Nangia, N., Bowman, S.R.: A broad-coverage challenge corpus for sentence understanding through inference. In: Walker, M.A., Ji, H., Stent, A. (eds.) Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2018, New Orleans, Louisiana, USA, 1\u20136 June 2018, Volume 1 (Long Papers), pp. 1112\u20131122. Association for Computational Linguistics (2018). https:\/\/doi.org\/10.18653\/v1\/n18-1101","DOI":"10.18653\/v1\/n18-1101"},{"key":"3_CR30","unstructured":"Wolf, T., et al.: Huggingface\u2019s transformers: state-of-the-art natural language processing. CoRR abs\/1910.03771 (2019). https:\/\/arxiv.org\/abs\/1910.03771"},{"key":"3_CR31","unstructured":"Yang, Z., Dai, Z., Salakhutdinov, R., Cohen, W.W.: Breaking the softmax bottleneck: a high-rank RNN language model. In: 6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, 30 April\u20133 May 2018, Conference Track Proceedings. OpenReview.net (2018). https:\/\/openreview.net\/forum?id=HkwZSG-CZ"}],"container-title":["Lecture Notes in Computer Science","Analysis of Images, Social Networks and Texts"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-54534-4_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T10:41:21Z","timestamp":1710844881000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-54534-4_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031545337","9783031545344"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-54534-4_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"12 March 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AIST","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Analysis of Images, Social Networks and Texts","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Yerevan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Armenia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aist2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aistconf.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easy Chair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"93","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"26% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1.62","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Out of the 93 submission, 17 were rejected before being sent to peer review.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}