{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,31]],"date-time":"2025-12-31T12:21:17Z","timestamp":1767183677132,"version":"3.41.0"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,18]]},"DOI":"10.1109\/ijcnn54540.2023.10191206","type":"proceedings-article","created":{"date-parts":[[2023,8,2]],"date-time":"2023-08-02T17:30:03Z","timestamp":1690997403000},"page":"1-8","source":"Crossref","is-referenced-by-count":2,"title":["Gradient Sparsification For Masked Fine-Tuning of Transformers"],"prefix":"10.1109","author":[{"given":"James","family":"O'Neill","sequence":"first","affiliation":[{"name":"Huawei Ireland Research Center,Dublin,Ireland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sourav","family":"Dutta","sequence":"additional","affiliation":[{"name":"Huawei Ireland Research Center,Dublin,Ireland"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","article-title":"Adapterdrop: On the efficiency of adapters in transformers","author":"r\u00fcckl\u00e9","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.7"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1071"},{"key":"ref14","article-title":"Reducing transformer depth on demand with structured dropout","author":"fan","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref31","article-title":"Universal dependencies v2: An evergrowing multilingual treebank collection","author":"nivre","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref30","article-title":"Introduction to the conll-2003 shared task: Language-independent named entity recognition","author":"sang","year":"2003","journal-title":"ArXiv Preprint"},{"key":"ref11","article-title":"Adapter-fusion: Non-destructive task composition for transfer learning","author":"pfeiffer","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref10","first-page":"2790","article-title":"Parameter-efficient transfer learning for NLP","author":"houlsby","year":"2019","journal-title":"Proceedings of the 36th International Conference on Machine Learning ICML 2019"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.653"},{"key":"ref2","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"0","journal-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics Human Language Technologies NAACL-HLT 2019 Minneapolis MN USA June 2&#x2013;7 2019 Volume 1 (Long and Short Papers)"},{"key":"ref1","article-title":"Improving language understanding by generative pre-training","author":"radford","year":"2018","journal-title":"OpenAir"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1572"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.189"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-2012"},{"key":"ref18","article-title":"Quasi-recurrent neural networks","author":"bradbury","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref24","first-page":"1929","article-title":"Dropout: a simple way to prevent neural networks from overfitting","volume":"15","author":"srivastava","year":"2014","journal-title":"The Journal of Machine Learning Research"},{"key":"ref23","first-page":"2039","article-title":"Just pick a sign: Optimizing deep multitask models with gradient sign dropout","volume":"33","author":"chen","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.484"},{"key":"ref25","article-title":"XTREME: A massively multilingual multi-task benchmark for evaluating cross-lingual generalization","author":"hu","year":"0","journal-title":"Proceedings of the 37th International Conference on Machine Learning ICML 2020 12&#x2013;18 July 2020 Virtual Conference"},{"key":"ref20","article-title":"Filter: An enhanced fusion method for cross-lingual language understanding","author":"fang","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref22","article-title":"Autofreeze: Automati-cally freezing model blocks to accelerate fine-tuning","author":"liu","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.174"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1252"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1031"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1382"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.747"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1031"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.617"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.747"},{"key":"ref3","article-title":"Cross-lingual language model pretraining","author":"lample","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.463"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-4302"}],"event":{"name":"2023 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2023,6,18]]},"location":"Gold Coast, Australia","end":{"date-parts":[[2023,6,23]]}},"container-title":["2023 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10190990\/10190992\/10191206.pdf?arnumber=10191206","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T17:49:14Z","timestamp":1747936154000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10191206\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,18]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/ijcnn54540.2023.10191206","relation":{},"subject":[],"published":{"date-parts":[[2023,6,18]]}}}