{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T16:18:20Z","timestamp":1759335500072,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":34,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819772315"},{"type":"electronic","value":"9789819772322"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-7232-2_22","type":"book-chapter","created":{"date-parts":[[2024,8,27]],"date-time":"2024-08-27T16:02:47Z","timestamp":1724774567000},"page":"327-342","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Smaller Can Be Better: Efficient Data Selection for\u00a0Pre-training Models"],"prefix":"10.1007","author":[{"given":"Guang","family":"Fang","sequence":"first","affiliation":[]},{"given":"Shihui","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Mingxin","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yulan","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Hao","family":"Huang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,28]]},"reference":[{"unstructured":"Axelrod, A., He, X., Gao, J.: Domain adaptation via pseudo in-domain data selection. In: Proceedings of the 2011 Conference on Empirical Methods in Natural Language Processing (EMNLP 2011), pp. 355\u2013362 (2011)","key":"22_CR1"},{"doi-asserted-by":"crossref","unstructured":"Bapna, A., Arivazhagan, N., Firat, O.: Simple, scalable adaptation for neural machine translation. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP 2019), pp. 1538\u20131548 (2019)","key":"22_CR2","DOI":"10.18653\/v1\/D19-1165"},{"doi-asserted-by":"crossref","unstructured":"Bengio, Y., Louradour, J., Collobert, R., Weston, J.: Curriculum learning. In: Proceedings of the 26th International Conference on Machine Learning (ICML 2009), pp. 41\u201348 (2009)","key":"22_CR3","DOI":"10.1145\/1553374.1553380"},{"doi-asserted-by":"crossref","unstructured":"Chen, B., Huang, F.: Semi-supervised convolutional networks for translation adaptation with tiny amount of in-domain data. In: Proceedings of the 20th SIGNLL Conference on Computational Natural Language Learning (CoNLL 2016), pp. 314\u2013323 (2016)","key":"22_CR4","DOI":"10.18653\/v1\/K16-1031"},{"key":"22_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"352","DOI":"10.1007\/978-3-030-59410-7_25","volume-title":"Database Systems for Advanced Applications","author":"Y Deng","year":"2020","unstructured":"Deng, Y., et al.: From code to natural language: type-aware sketch-based Seq2Seq learning. In: Nah, Y., Cui, B., Lee, S.-W., Yu, J.X., Moon, Y.-S., Whang, S.E. (eds.) DASFAA 2020. LNCS, vol. 12112, pp. 352\u2013368. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-59410-7_25"},{"unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)","key":"22_CR6"},{"key":"22_CR7","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1007\/s10590-015-9176-1","volume":"29","author":"S Eetemadi","year":"2015","unstructured":"Eetemadi, S., Lewis, W., Toutanova, K., Radha, H.: Survey of data-selection methods in statistical machine translation. Mach. Transl. 29, 189\u2013223 (2015)","journal-title":"Mach. Transl."},{"doi-asserted-by":"crossref","unstructured":"Feng, Y., Xia, P., Van\u00a0Durme, B., Sedoc, J.: Automatic document selection for efficient encoder pretraining. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing (EMNLP 2022), pp. 9522\u20139530 (2022)","key":"22_CR8","DOI":"10.18653\/v1\/2022.emnlp-main.647"},{"doi-asserted-by":"crossref","unstructured":"Killamsetty, K., Sivasubramanian, D., Ramakrishnan, G., Iyer, R.: Glister: Generalization based data subset selection for efficient and robust learning. In: Proceedings of the 35th AAAI Conference on Artificial Intelligence (AAAI 2021), pp. 8110\u20138118 (2021)","key":"22_CR9","DOI":"10.1609\/aaai.v35i9.16988"},{"doi-asserted-by":"crossref","unstructured":"Li, S., Zhao, Z., Hu, R., Li, W., Liu, T., Du, X.: Analogical reasoning on chinese morphological and semantic relations. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (ACL 2018), pp. 138\u2013143 (2018)","key":"22_CR10","DOI":"10.18653\/v1\/P18-2023"},{"unstructured":"Liu, Y., et al.: Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)","key":"22_CR11"},{"unstructured":"Liu, Z., Sun, M., Zhou, T., Huang, G., Darrell, T.: Rethinking the value of network pruning. In: International Conference on Learning Representations (ICLR 2018) (2018)","key":"22_CR12"},{"doi-asserted-by":"crossref","unstructured":"Qiu, Y., Li, H., Li, S., Jiang, Y., Hu, R., Yang, L.: Revisiting correlations between intrinsic and extrinsic evaluations of word embeddings. In: China National Conference on Chinese Computational Linguistics (CCL 2018), pp. 209\u2013221 (2018)","key":"22_CR13","DOI":"10.1007\/978-3-030-01716-3_18"},{"unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I., et\u00a0al.: Improving language understanding by generative pre-training. OpenAI blog (2018)","key":"22_CR14"},{"issue":"8","key":"22_CR15","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I., et al.: Language models are unsupervised multitask learners. OpenAI blog 1(8), 9 (2019)","journal-title":"OpenAI blog"},{"issue":"1","key":"22_CR16","first-page":"5485","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(1), 5485\u20135551 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"22_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.psychres.2021.114135","volume":"304","author":"J Sarzynska-Wawer","year":"2021","unstructured":"Sarzynska-Wawer, J., et al.: Detecting formal thought disorder by deep contextualized word representations. Psychiatry Res. 304, 114135 (2021)","journal-title":"Psychiatry Res."},{"key":"22_CR18","doi-asserted-by":"publisher","first-page":"351","DOI":"10.1613\/jair.1.13566","volume":"75","author":"D Saunders","year":"2022","unstructured":"Saunders, D.: Domain adaptation and multi-domain adaptation for neural machine translation: A survey. J. Artif. Intell. Res. 75, 351\u2013424 (2022)","journal-title":"J. Artif. Intell. Res."},{"doi-asserted-by":"crossref","unstructured":"Sun, C., Qiu, X., Xu, Y., Huang, X.: How to fine-tune bert for text classification? In: China National Conference on Chinese Computational Linguistics (CCL 2019), pp. 194\u2013206 (2019)","key":"22_CR19","DOI":"10.1007\/978-3-030-32381-3_16"},{"doi-asserted-by":"crossref","unstructured":"Trung, N., Phung, D., Nguyen, T.: Unsupervised domain adaptation for event detection using domain-specific adapters. In: Findings of the Association for Computational Linguistics (ACL-IJCNLP 2021), pp. 4015\u20134025 (2021)","key":"22_CR20","DOI":"10.18653\/v1\/2021.findings-acl.351"},{"unstructured":"Turc, I., Chang, M., Lee, K., Toutanova, K.: Well-read students learn better: On the importance of pre-training compact models. arXiv preprint p. arXiv:1908.08962 (2019)","key":"22_CR21"},{"unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems 30 (NeurIPS 2017), pp. 6000\u20136010 (2017)","key":"22_CR22"},{"doi-asserted-by":"crossref","unstructured":"Wang, A., Singh, A., Michael, J., Hill, F., Levy, O., Bowman, S.: GLUE: A multi-task benchmark and analysis platform for natural language understanding. In: Proceedings of the 2018 EMNLP Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP (EMNLP 2018), pp. 353\u2013355 (2018)","key":"22_CR23","DOI":"10.18653\/v1\/W18-5446"},{"doi-asserted-by":"crossref","unstructured":"Wang, W., Caswell, I., Chelba, C.: Dynamically composing domain-data selection with clean-data selection by \u201cco-curricular learning\" for neural machine translation. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (ACL 2019), pp. 1282\u20131292 (2019)","key":"22_CR24","DOI":"10.18653\/v1\/P19-1123"},{"issue":"9","key":"22_CR25","first-page":"4555","volume":"44","author":"X Wang","year":"2021","unstructured":"Wang, X., Chen, Y., Zhu, W.: A survey on curriculum learning. IEEE Trans. Pattern Anal. Mach. Intell. 44(9), 4555\u20134576 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"unstructured":"Xie, Q., Dai, Z., Hovy, E., Luong, T., Le, Q.: Unsupervised data augmentation for consistency training. In: Advances in Neural Information Processing Systems 33 (NeurIPS 2020), pp. 6256\u20136268 (2020)","key":"22_CR26"},{"unstructured":"Xu, B.: Nlp chinese corpus: Large scale chinese corpus for nlp. Zenodo (2019)","key":"22_CR27"},{"doi-asserted-by":"crossref","unstructured":"Xu, C., et al.: Dynamic curriculum learning for low-resource neural machine translation. In: Proceedings of the 28th International Conference on Computational Linguistics (COLING 2020) pp. 3977\u20133989 (2020)","key":"22_CR28","DOI":"10.18653\/v1\/2020.coling-main.352"},{"doi-asserted-by":"crossref","unstructured":"Xu, L., et\u00a0al.: CLUE: A chinese language understanding evaluation benchmark. In: Proceedings of the 28th International Conference on Computational Linguistics (COLING 2020), pp. 4762\u20134772 (2020)","key":"22_CR29","DOI":"10.18653\/v1\/2020.coling-main.419"},{"unstructured":"Yang, Z., Dai, Z., Yang, Y., Carbonell, J., Salakhutdinov, R., Le, Q.V.: XLNet: Generalized autoregressive pretraining for language understanding. In: Advances in Neural Information Processing Systems 32 (NeurIPS 2019), pp. 5753\u20135763 (2019)","key":"22_CR30"},{"doi-asserted-by":"crossref","unstructured":"Zhan, R., Liu, X., Wong, D.F., Chao, L.S.: Meta-curriculum learning for domain adaptation in neural machine translation. In: Proceedings of the 35th AAAI Conference on Artificial Intelligence (AAAI 2021), pp. 14310\u201314318 (2021)","key":"22_CR31","DOI":"10.1609\/aaai.v35i16.17683"},{"unstructured":"Zhang, X., LeCun, Y.: Which encoding is the best for text classification in chinese, english, japanese and korean? arXiv preprint p. arXiv:1708.02657 (2017)","key":"22_CR32"},{"unstructured":"Zhang, X., Zhao, J., LeCun, Y.: Character-level convolutional networks for text classification. In: Advances in Neural Information Processing Systems 28 (NeurIPS 2015), pp. 649\u2013657 (2015)","key":"22_CR33"},{"issue":"06","key":"22_CR34","doi-asserted-by":"publisher","first-page":"2103","DOI":"10.1109\/TMC.2020.3036390","volume":"21","author":"R Zhou","year":"2022","unstructured":"Zhou, R., et al.: Online task offloading for 5G small cell networks. IEEE Trans. Mob. Comput. 21(06), 2103\u20132115 (2022)","journal-title":"IEEE Trans. Mob. Comput."}],"container-title":["Lecture Notes in Computer Science","Web and Big Data"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-7232-2_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T09:39:02Z","timestamp":1732700342000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-7232-2_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819772315","9789819772322"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-7232-2_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"28 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"APWeb-WAIM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asia-Pacific Web (APWeb) and Web-Age Information Management (WAIM) Joint International Conference on Web and Big Data","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Jinhua","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"apwebwaim2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/apweb2024.zjnu.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}