{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T08:25:10Z","timestamp":1743063910752,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":16,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819947515"},{"type":"electronic","value":"9789819947522"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-981-99-4752-2_48","type":"book-chapter","created":{"date-parts":[[2023,7,30]],"date-time":"2023-07-30T16:02:10Z","timestamp":1690732930000},"page":"587-596","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["RA-KD: Random Attention Map Projection for Knowledge Distillation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3128-8812","authenticated-orcid":false,"given":"Linna","family":"Zhang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5265-903X","authenticated-orcid":false,"given":"Yuehui","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1973-5010","authenticated-orcid":false,"given":"Yi","family":"Cao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4134-2352","authenticated-orcid":false,"given":"Yaou","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,7,31]]},"reference":[{"key":"48_CR1","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.: BERT: pre-training of Deep Bidirectional Transformers for Language Understanding (2019). http:\/\/arxiv.org\/abs\/1810.04805, https:\/\/doi.org\/10.48550\/arXiv.1810.04805","DOI":"10.48550\/arXiv.1810.04805"},{"key":"48_CR2","first-page":"24","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I.: Language models are unsupervised multitask learners. OpenAI Blog. 1, 24 (2019)","journal-title":"OpenAI Blog."},{"key":"48_CR3","doi-asserted-by":"publisher","unstructured":"Peters, M.E., et al.: Deep contextualized word representations. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers), New Orleans, Louisiana, pp. 2227\u20132237. Association for Computational Linguistics (2018). https:\/\/doi.org\/10.18653\/v1\/N18-1202","DOI":"10.18653\/v1\/N18-1202"},{"key":"48_CR4","doi-asserted-by":"publisher","unstructured":"Lin, Z., Liu, J.Z., Yang, Z., Hua, N., Roth, D.: Pruning redundant mappings in transformer models via spectral-normalized identity prior (2020). http:\/\/arxiv.org\/abs\/2010.01791, https:\/\/doi.org\/10.48550\/arXiv.2010.01791","DOI":"10.48550\/arXiv.2010.01791"},{"key":"48_CR5","unstructured":"Fan, A., et al.: Training with Quantization Noise for Extreme Model Compression"},{"key":"48_CR6","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531. 2 (2015)"},{"key":"48_CR7","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space (2013). http:\/\/arxiv.org\/abs\/1301.3781"},{"key":"48_CR8","doi-asserted-by":"publisher","unstructured":"Pennington, J., Socher, R., Manning, C.: Glove: Global Vectors for Word Representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), Doha, Qatar, pp. 1532\u20131543. Association for Computational Linguistics (2014). https:\/\/doi.org\/10.3115\/v1\/D14-1162","DOI":"10.3115\/v1\/D14-1162"},{"key":"48_CR9","doi-asserted-by":"crossref","unstructured":"Joulin, A., Grave, E., Bojanowski, P., Mikolov, T.: bag of tricks for efficient text classification. In: Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers, Valencia, Spain, pp. 427\u2013431. Association for Computational Linguistics (2017)","DOI":"10.18653\/v1\/E17-2068"},{"key":"48_CR10","unstructured":"Wu, Y., et al.: Google\u2019s neural machine translation system: bridging the gap between human and machine translation. arXiv preprint arXiv:1609.08144 (2016)"},{"key":"48_CR11","doi-asserted-by":"publisher","unstructured":"Sanh, V., Debut, L., Chaumond, J., Wolf, T.: DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter (2019). https:\/\/doi.org\/10.48550\/arXiv.1910.01108","DOI":"10.48550\/arXiv.1910.01108"},{"key":"48_CR12","doi-asserted-by":"publisher","unstructured":"Sun, Z., Yu, H., Song, X., Liu, R., Yang, Y., Zhou, D.: MobileBERT: a Compact task-agnostic BERT for resource-limited devices. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 2158\u20132170. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.195","DOI":"10.18653\/v1\/2020.acl-main.195"},{"key":"48_CR13","doi-asserted-by":"publisher","unstructured":"Jiao, X., et al.: TinyBERT: distilling BERT for natural language understanding. In: Findings of the Association for Computational Linguistics: EMNLP 2020, pp. 4163\u20134174. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.372","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"key":"48_CR14","doi-asserted-by":"crossref","unstructured":"Xu, C., Zhou, W., Ge, T., Wei, F., Zhou, M.: BERT-of-Theseus: compressing BERT by progressive module replacing (2020). http:\/\/arxiv.org\/abs\/2002.02925","DOI":"10.18653\/v1\/2020.emnlp-main.633"},{"key":"48_CR15","doi-asserted-by":"crossref","unstructured":"Sun, S., Cheng, Y., Gan, Z., Liu, J.: Patient knowledge distillation for BERT model compression (2019). http:\/\/arxiv.org\/abs\/1908.09355","DOI":"10.18653\/v1\/D19-1441"},{"key":"48_CR16","doi-asserted-by":"publisher","unstructured":"Wang, W., Wei, F., Dong, L., Bao, H., Yang, N., Zhou, M.: MiniLM: deep self-attention distillation for task-agnostic compression of pre-trained transformers (2020). https:\/\/doi.org\/10.48550\/arXiv.2002.10957","DOI":"10.48550\/arXiv.2002.10957"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-4752-2_48","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T23:15:03Z","timestamp":1690931703000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-4752-2_48"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9789819947515","9789819947522"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-4752-2_48","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"31 July 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Zhengzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 August 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 August 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2023a","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/2023\/index.htm","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}