{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T20:21:15Z","timestamp":1742934075164,"version":"3.40.3"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031723490"},{"type":"electronic","value":"9783031723506"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-72350-6_28","type":"book-chapter","created":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T12:14:50Z","timestamp":1726661690000},"page":"410-422","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Efficient Fine-Tuning for\u00a0Low-Resource Tibetan Pre-trained Language Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-8351-4689","authenticated-orcid":false,"given":"Mingjun","family":"Zhou","sequence":"first","affiliation":[]},{"given":"Zhuoma","family":"Daiqing","sequence":"additional","affiliation":[]},{"given":"Nuo","family":"Qun","sequence":"additional","affiliation":[]},{"given":"Tashi","family":"Nyima","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,17]]},"reference":[{"unstructured":"Kenton, J., Devlin, M.-W.C., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL-HLT (2019)","key":"28_CR1"},{"unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019)","key":"28_CR2"},{"doi-asserted-by":"crossref","unstructured":"Conneau, A., et al.: Unsupervised cross-lingual representation learning at scale. arXiv preprint arXiv:1911.02116 (2019)","key":"28_CR3","DOI":"10.18653\/v1\/2020.acl-main.747"},{"issue":"8","key":"28_CR4","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997). https:\/\/doi.org\/10.1162\/neco.1997.9.8.1735","journal-title":"Neural Comput."},{"unstructured":"Chung, J., et al.: Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555 (2014)","key":"28_CR5"},{"issue":"140","key":"28_CR6","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(140), 1\u201367 (2020)","journal-title":"J. Mach. Learn. Res."},{"unstructured":"Radford, A., et al.: Improving language understanding by generative pre-training (2018)","key":"28_CR7"},{"unstructured":"Radford, A., et al.: Language models are unsupervised multitask learners. OpenAI Blog 1(8), 9 (2019)","key":"28_CR8"},{"key":"28_CR9","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)","key":"28_CR10"},{"doi-asserted-by":"crossref","unstructured":"Schick, T., Sch\u00fctze, H.: Exploiting cloze questions for few shot text classification and natural language inference. arXiv preprint arXiv:2001.07676 (2020)","key":"28_CR11","DOI":"10.18653\/v1\/2021.eacl-main.20"},{"doi-asserted-by":"crossref","unstructured":"Schick, T., Schmid, H., Sch\u00fctze, H.: Automatically identifying words that can serve as labels for few-shot text classification. arXiv preprint arXiv:2010.13641 (2020)","key":"28_CR12","DOI":"10.18653\/v1\/2020.coling-main.488"},{"key":"28_CR13","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"472","DOI":"10.1007\/978-3-319-69005-6_39","volume-title":"Chinese Computational Linguistics and Natural Language Processing Based on Naturally Annotated Big Data","author":"N Qun","year":"2017","unstructured":"Qun, N., Li, X., Qiu, X., Huang, X.: End-to-end neural text classification for Tibetan. In: Sun, M., Wang, X., Chang, B., Xiong, D. (eds.) CCL\/NLP-NABD -2017. LNCS (LNAI), vol. 10565, pp. 472\u2013480. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-69005-6_39"},{"doi-asserted-by":"crossref","unstructured":"Gao, T., Fisch, A., Chen, D.: Making pre-trained language models better few-shot learners. arXiv preprint arXiv:2012.15723 (2020)","key":"28_CR14","DOI":"10.18653\/v1\/2021.acl-long.295"},{"doi-asserted-by":"crossref","unstructured":"Shin, T., et al.: AutoPrompt: eliciting knowledge from language models with automatically generated prompts. arXiv preprint arXiv:2010.15980 (2020)","key":"28_CR15","DOI":"10.18653\/v1\/2020.emnlp-main.346"},{"key":"28_CR16","doi-asserted-by":"publisher","first-page":"414","DOI":"10.1162\/tacl_a_00468","volume":"10","author":"E Ben-David","year":"2022","unstructured":"Ben-David, E., Oved, N., Reichart, R.: PADA: example-based prompt learning for on-the-fly adaptation to unseen domains. Trans. Assoc. Comput. Linguist. 10, 414\u2013433 (2022)","journal-title":"Trans. Assoc. Comput. Linguist."},{"doi-asserted-by":"crossref","unstructured":"Li, X.L., Liang, P.: Prefix-tuning: optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190 (2021)","key":"28_CR17","DOI":"10.18653\/v1\/2021.acl-long.353"},{"doi-asserted-by":"crossref","unstructured":"Lester, B., Al-Rfou, R., Constant, N.: The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691 (2021)","key":"28_CR18","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"doi-asserted-by":"crossref","unstructured":"Vu, T., et al.: SPoT: better frozen model adaptation through soft prompt transfer. arXiv preprint arXiv:2110.07904 (2021)","key":"28_CR19","DOI":"10.18653\/v1\/2022.acl-long.346"},{"doi-asserted-by":"crossref","unstructured":"Hu, S., et al.: Knowledgeable prompt-tuning: incorporating knowledge into prompt verbalizer for text classification. arXiv preprint arXiv:2108.02035 (2021)","key":"28_CR20","DOI":"10.18653\/v1\/2022.acl-long.158"},{"unstructured":"Zhang, X., Zhao, J., LeCun, Y.: Character-level convolutional networks for text classification. In: Advances in Neural Information Processing Systems, vol. 28 (2015)","key":"28_CR21"},{"doi-asserted-by":"crossref","unstructured":"Jin, F., et al.: Instance-aware prompt learning for language understanding and generation. In: ACM Transactions on Asian and Low-Resource Language Information Processing, vol. 22, issue 7, pp. 1\u201318 (2023)","key":"28_CR22","DOI":"10.1145\/3604613"},{"doi-asserted-by":"crossref","unstructured":"Wu, Z., et al.: IDPG: an instance-dependent prompt generation method. arXiv preprint arXiv:2204.04497 (2022)","key":"28_CR23","DOI":"10.18653\/v1\/2022.naacl-main.403"},{"doi-asserted-by":"crossref","unstructured":"Chen, Y., et al.: Exploring lottery prompts for pre-trained language models. arXiv preprint arXiv:2305.19500 (2023)","key":"28_CR24","DOI":"10.18653\/v1\/2023.acl-long.860"},{"unstructured":"Li, C., et al.: Measuring the intrinsic dimension of objective landscapes. arXiv preprint arXiv:1804.08838 (2018)","key":"28_CR25"},{"doi-asserted-by":"crossref","unstructured":"Aghajanyan, A., Gupta, S., Zettlemoyer, L.: Intrinsic dimensionality explains the effectiveness of language model fine-tuning. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers) (2021)","key":"28_CR26","DOI":"10.18653\/v1\/2021.acl-long.568"},{"unstructured":"Houlsby, N., et al.: Parameter-efficient transfer learning for NLP. In: International Conference on Machine Learning. PMLR (2019)","key":"28_CR27"},{"doi-asserted-by":"crossref","unstructured":"Lin, Z., Madotto, A., Fung, P.: Exploring versatile generative language model via parameter-efficient transfer learning. In: Findings of the Association for Computational Linguistics: EMNLP 2020, pp. 441\u2013459 (2020)","key":"28_CR28","DOI":"10.18653\/v1\/2020.findings-emnlp.41"},{"unstructured":"Touvron, H., et al.: LLaMA: open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)","key":"28_CR29"},{"unstructured":"Dettmers, T., et al.: QLoRA: efficient finetuning of quantized LLMs. In: Advances in Neural Information Processing Systems, vol. 36 (2024)","key":"28_CR30"},{"unstructured":"Chen, Y., et al.: LongLoRA: efficient fine-tuning of long-context large language models. arXiv preprint arXiv:2309.12307 (2023)","key":"28_CR31"},{"unstructured":"Yang, Z., Xu, Z., Cui, Y., et al.: CINO: A Chinese minority pre-trained language model. In: Proceedings of the 29th International Conference on Computational Linguistics, pp. 3937\u20133949 (2022)","key":"28_CR32"},{"doi-asserted-by":"crossref","unstructured":"Liu, S., Deng, J., Sun, Y., et al.: TiBERT: Tibetan pre-trained language model. In: 2022 IEEE International Conference on Systems, Man, and Cybernetics (SMC), pp. 2956\u20132961. IEEE (2022)","key":"28_CR33","DOI":"10.1109\/SMC53654.2022.9945074"},{"doi-asserted-by":"crossref","unstructured":"Zhang, J., Kazhuo, D., Gadeng, L., et al.: Research and application of Tibetan pre-training language model based on BERT. In: Proceedings of the 2022 2nd International Conference on Control and Intelligent Robotics, pp. 519\u2013524 (2022)","key":"28_CR34","DOI":"10.1145\/3548608.3559255"},{"doi-asserted-by":"crossref","unstructured":"Ding, N., Hu, S., Zhao, W., et al.: OpenPrompt: an open-source framework for prompt-learning. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: System Demonstrations, pp. 105\u2013113 (2022)","key":"28_CR35","DOI":"10.18653\/v1\/2022.acl-demo.10"},{"doi-asserted-by":"crossref","unstructured":"Hu, S., Ding, N., Zhao, W., et al.: OpenDelta: a plug-and-play library for parameter-efficient adaptation of pre-trained models. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations), pp. 274\u2013281 (2023)","key":"28_CR36","DOI":"10.18653\/v1\/2023.acl-demo.26"},{"doi-asserted-by":"crossref","unstructured":"Liang, Y., Lv, H., Li, Y., et al.: Tibetan-BERT-wwm: a Tibetan pretrained model with whole word masking for text classification. In: IEEE Transactions on Computational Social Systems (2024)","key":"28_CR37","DOI":"10.1109\/TCSS.2024.3374633"},{"doi-asserted-by":"crossref","unstructured":"Zhou, M., Daiqing, Z., Qun, N., et al.: Tibetan punctuation is all you need in pretrained language model. In: 2023 International Conference on Intelligent Management and Software Engineering (IMSE), pp. 161\u2013168. IEEE (2023)","key":"28_CR38","DOI":"10.1109\/IMSE61332.2023.00041"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks and Machine Learning \u2013 ICANN 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72350-6_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T12:19:51Z","timestamp":1726661991000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72350-6_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031723490","9783031723506"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72350-6_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"17 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lugano","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Switzerland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"33","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icann2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}