{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:40:59Z","timestamp":1740102059552,"version":"3.37.3"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62006201"],"award-info":[{"award-number":["62006201"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,18]]},"DOI":"10.1109\/ijcnn54540.2023.10191351","type":"proceedings-article","created":{"date-parts":[[2023,8,2]],"date-time":"2023-08-02T17:30:03Z","timestamp":1690997403000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["GamMa: Efficient Fine-Tuning of Pre-Trained Language Models Using Gradient Activation Mapping Masking"],"prefix":"10.1109","author":[{"given":"Anchun","family":"Gui","sequence":"first","affiliation":[{"name":"School of Informatics, Xiamen University,Xiamen,China"}]},{"given":"Jinqiang","family":"Ye","sequence":"additional","affiliation":[{"name":"School of Informatics, Xiamen University,Xiamen,China"}]},{"given":"Han","family":"Xiao","sequence":"additional","affiliation":[{"name":"School of Informatics, Xiamen University,Xiamen,China"}]}],"member":"263","reference":[{"year":"2019","author":"liu","journal-title":"RoBERTa A Robustly optimized BERT Pretraining Approach","key":"ref13"},{"doi-asserted-by":"publisher","key":"ref35","DOI":"10.18653\/v1\/N19-1112"},{"key":"ref12","article-title":"Attention is All you Need","volume":"30","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref34","article-title":"On Identifiability in Transformers","author":"brunner","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref15","first-page":"1","article-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume":"21","author":"raffel","year":"2020","journal-title":"Journal of Machine Learning Research"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.18653\/v1\/2020.acl-main.703"},{"doi-asserted-by":"publisher","key":"ref31","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.18653\/v1\/2020.emnlp-main.174"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1609\/aaai.v35i14.17533"},{"doi-asserted-by":"publisher","key":"ref33","DOI":"10.18653\/v1\/D19-1445"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.18653\/v1\/W18-5446"},{"key":"ref32","doi-asserted-by":"crossref","first-page":"37","DOI":"10.18653\/v1\/P19-3007","article-title":"A Multiscale Visualization of Attention in the Transformer Model","author":"vig","year":"2019","journal-title":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics System Demonstrations"},{"key":"ref2","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref1","first-page":"4171","article-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding","author":"devlin","year":"2019","journal-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics Human Language Technologies Volume 1 (Long and Short Papers)"},{"key":"ref17","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"radford","year":"2019","journal-title":"OpenAIRE blog"},{"key":"ref16","article-title":"Improving language understanding by generative pre-training","author":"radford","year":"2018","journal-title":"OpenAIRE blog"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"ref18","first-page":"5986","article-title":"BERT and PALs: Projected Attention Layers for Efficient Adaptation in Multi-Task Learning","author":"stickland","year":"2019","journal-title":"Proceedings of the 36th International Conference on Machine Learning"},{"year":"2022","author":"ding","journal-title":"Delta Tuning A Comprehensive Study of Parameter Efficient Methods for Pre-trained Language Models","key":"ref24"},{"key":"ref23","article-title":"Towards a Unified View of Parameter-Efficient Transfer Learning","author":"he","year":"2021","journal-title":"International Conference on Learning Representations"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.18653\/v1\/2021.emnlp-main.749"},{"year":"2019","author":"lee","journal-title":"What would elsa do? freezing layers during transformer fine-tuning","key":"ref25"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.18653\/v1\/2021.acl-long.353"},{"year":"2022","author":"fu","journal-title":"AdapterBias Parameter-efficient Token-dependent Representation Shift for Adapters in NLP Tasks","key":"ref22"},{"key":"ref21","article-title":"Compacter: Efficient Low-Rank Hypercomplex Adapter Layers","author":"mahabadi","year":"2021","journal-title":"Advances in neural information processing systems"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.18653\/v1\/2022.acl-long.125"},{"key":"ref27","article-title":"Mixout: Effective Regularization to Finetune Large-scale Pretrained Language Models","author":"lee","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref29","first-page":"2435","article-title":"How fine can fine-tuning be? Learning efficient language models","author":"radiya-dixit","year":"2020","journal-title":"Proceedings of the Twenty Third International Conference on Artificial Intelligence and Statistics"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/CVPR.2016.319"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.18653\/v1\/2022.acl-short.1"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/ICCV.2017.74"},{"key":"ref4","first-page":"2790","article-title":"Parameter-Efficient Transfer Learning for NLP","author":"houlsby","year":"2019","journal-title":"Proceedings of the 36th International Conference on Machine Learning"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.18653\/v1\/2021.findings-acl.121"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.18653\/v1\/2021.acl-long.378"},{"year":"2021","author":"hu","journal-title":"LoRA Low-Rank Adaptation of Large Language Models","key":"ref5"}],"event":{"name":"2023 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2023,6,18]]},"location":"Gold Coast, Australia","end":{"date-parts":[[2023,6,23]]}},"container-title":["2023 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10190990\/10190992\/10191351.pdf?arnumber=10191351","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T17:47:05Z","timestamp":1692640025000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10191351\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,18]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/ijcnn54540.2023.10191351","relation":{},"subject":[],"published":{"date-parts":[[2023,6,18]]}}}