{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T21:14:13Z","timestamp":1740172453764,"version":"3.37.3"},"reference-count":86,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61976180","61925601","62276152"],"award-info":[{"award-number":["61976180","61925601","62276152"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key R&amp;D Program of China","award":["2022ZD0160502"],"award-info":[{"award-number":["2022ZD0160502"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/taslp.2023.3331096","type":"journal-article","created":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T19:08:12Z","timestamp":1699643292000},"page":"486-496","source":"Crossref","is-referenced-by-count":0,"title":["Gradual Syntactic Label Replacement for Language Model Pre-Training"],"prefix":"10.1109","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8705-9598","authenticated-orcid":false,"given":"Yile","family":"Wang","sequence":"first","affiliation":[{"name":"Institute for AI Industry Research, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5214-2268","authenticated-orcid":false,"given":"Yue","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Engineering, Westlake University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1374-5979","authenticated-orcid":false,"given":"Peng","family":"Li","sequence":"additional","affiliation":[{"name":"Institute for AI Industry Research, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3087-242X","authenticated-orcid":false,"given":"Yang","family":"Liu","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"issue":"8","key":"ref2","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"key":"ref3","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. 34th Int. Conf. Neural Inf. Process. Syst.","author":"Brown","year":"2020"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.703"},{"issue":"140","key":"ref6","first-page":"5485","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref7","first-page":"1341","article-title":"FRAGE: Frequency-agnostic word representation","volume-title":"Proc. 32nd Int. Conf. Neural Inf. Process. Syst.","author":"Gong","year":"2018"},{"key":"ref8","article-title":"Representation degeneration problem in training natural language generation models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Gao","year":"2019"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.3"},{"journal-title":"Human Behavior and the Principle of Least Effort: An Introduction to Human Ecology","year":"2016","author":"Zipf","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1310.4546"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1162"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6403"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1207\/s15516709cog1402_1"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21236\/ADA273556"},{"article-title":"Luminosoinsight\/wordfreq: v2.2","year":"2018","author":"Speer","key":"ref18"},{"article-title":"Googles neural machine translation system: Bridging the gap between human and machine translation","year":"2016","author":"Wu","key":"ref19"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.11"},{"key":"ref21","article-title":"Decoupled weight decay regularization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Loshchilov","year":"2019"},{"article-title":"Roberta: A robustly optimized BERT pretraining approach","year":"2019","author":"Liu","key":"ref22"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.831"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1249"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/w18-5446"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1101"},{"article-title":"Quora question pairs","year":"2018","author":"Chen","key":"ref27"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1264"},{"key":"ref29","first-page":"1631","article-title":"Recursive deep models for semantic compositionality over a Sentiment Treebank","volume-title":"Proc. Conf. Empirical Methods Natural Lang. Process.","author":"Socher","year":"2013"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00290"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/s17-2001"},{"key":"ref32","article-title":"Automatically constructing a corpus of sentential paraphrases","volume-title":"Proc. 3rd Int. Workshop Paraphrasing","author":"Dolan","year":"2005"},{"key":"ref33","first-page":"8","article-title":"The fifth PASCAL recognizing textual entailment challenge","volume-title":"Proc. Text Anal. Conf.","author":"Bentivogli","year":"2009"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.3115\/1119176.1119195"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-2124"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1144"},{"key":"ref37","article-title":"Pointer sentinel mixture models","volume-title":"Proc. 5th Int. Conf. Learn. Representations","author":"Merity","year":"2017"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/d18-1009"},{"key":"ref39","article-title":"ALBERT: A lite BERT for self-supervised learning of language representations","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lan","year":"2020"},{"article-title":"A framework for few-shot language model evaluation","year":"2021","author":"Gao","key":"ref40"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.26615\/978-954-452-072-4_112"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.502"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/K19-1058"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.181"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/p18-1198"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1112"},{"key":"ref47","first-page":"218","article-title":"News-Please: A generic news crawler and extractor","volume-title":"Proc. 15th Int. Symp. Inf. Sci.","author":"Hamborg","year":"2017"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.eacl-main.217"},{"key":"ref49","first-page":"1189","article-title":"Self-paced learning for latent variable models","volume-title":"Proc. 23rd Int. Conf. Neural Inf. Process. Syst.","author":"Kumar","year":"2010"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v29i1.9608"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.96"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/d19-1006"},{"key":"ref53","article-title":"Improving neural language generation with spectrum control","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang","year":"2019"},{"key":"ref54","article-title":"Isotropy in the contextual embedding space: Clusters and manifolds","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Cai","year":"2020"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.403"},{"key":"ref56","article-title":"All-but-the-top: Simple and effective postprocessing for word representations","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Mu","year":"2018"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-short.73"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00106"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1419"},{"key":"ref60","article-title":"What do you learn from context? Probing for sentence structure in contextualized word representations","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Tenney","year":"2019"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.118"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.118"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.228"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.420"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.423"},{"article-title":"LERT: A linguistically-motivated pre-trained language model","year":"2022","author":"Cui","key":"ref66"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3069908"},{"article-title":"Curriculum learning for language modeling","year":"2021","author":"Campos","key":"ref68"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS51385.2021.00025"},{"article-title":"Curriculum learning: A regularization method for efficient and stable billion-scale GPT model pre-training","year":"2021","author":"Li","key":"ref70"},{"article-title":"MLOps: From model-centric to data-centric AI","year":"2021","author":"Ng","key":"ref71"},{"article-title":"Data-centric AI requires rethinking data notion","year":"2021","author":"Hajij","key":"ref72"},{"article-title":"Dataclue: A benchmark suite for data-centric NLP","year":"2021","author":"Xu","key":"ref73"},{"article-title":"Active-learning-as-a-service: An efficient MLOps system for data-centric AI","year":"2022","author":"Huang","key":"ref74"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1145\/3533028.3533310"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-007-0090-8"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.14778\/2994509.2994514"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1670"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.427"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.sustainlp-1.15"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.577"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.20"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.295"},{"key":"ref84","article-title":"Multitask prompted training enables zero-shot task generalization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Sanh","year":"2022"},{"key":"ref85","article-title":"Finetuned language models are zero-shot learners","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wei","year":"2022"},{"article-title":"reStructured Pre-training","year":"2022","author":"Yuan","key":"ref86"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/10304349\/10315017.pdf?arnumber=10315017","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,2]],"date-time":"2024-03-02T17:27:19Z","timestamp":1709400439000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10315017\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":86,"URL":"https:\/\/doi.org\/10.1109\/taslp.2023.3331096","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"type":"print","value":"2329-9290"},{"type":"electronic","value":"2329-9304"}],"subject":[],"published":{"date-parts":[[2024]]}}}