{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T08:30:55Z","timestamp":1743064255106,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":49,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819783663"},{"type":"electronic","value":"9789819783670"}],"license":[{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8367-0_5","type":"book-chapter","created":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T11:55:20Z","timestamp":1732794920000},"page":"75-90","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["$$E^3$$: Optimizing Language Model Training for\u00a0Translation via\u00a0Enhancing Efficiency and\u00a0Effectiveness"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8178-6636","authenticated-orcid":false,"given":"Linqing","family":"Chen","sequence":"first","affiliation":[]},{"given":"Weilei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Dongyang","family":"Hu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,29]]},"reference":[{"key":"5_CR1","unstructured":"Rasooli, M.S., Tetreault, J.: Yara Parser: A Fast and Accurate Dependency Parser. Computing Research Repository, arXiv:1503.06733 (2015). http:\/\/arxiv.org\/abs\/1503.06733"},{"key":"5_CR2","unstructured":"Ando, R.K., Zhang, T., Bartlett, P.: A framework for learning predictive structures from multiple tasks and unlabeled data. J. Mach. Learn. Res. vol. 6, pp. 1817\u20131853 (2005). https:\/\/www.jmlr.org\/papers\/volume6\/ando05a\/ando05a.pdf"},{"key":"5_CR3","doi-asserted-by":"crossref","unstructured":"Cooley, J.W., Tukey, J.W.: An algorithm for the machine calculation of complex Fourier series. Math. Comput. 19(90), 297\u2013301 (1965). https:\/\/www.ams.org\/journals\/mcom\/1965-19-090\/S0025-5718-1965-0178586-1\/S0025-5718-1965-0178586-1.pdf","DOI":"10.1090\/S0025-5718-1965-0178586-1"},{"key":"5_CR4","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: Improving the transformer translation modelwith document-level context. In: Conference on Empirical Methods in Natural Language Processing (2018)","DOI":"10.18653\/v1\/D18-1049"},{"key":"5_CR5","unstructured":"Zhang, Z., Zhang, A., Li, M., Smola, A.: Automatic Chain of Thought Prompting in Large Language Models. arXiv preprint arXiv:2210.03493 (2022)"},{"key":"5_CR6","unstructured":"Vaswani, A.: Attention is All you Need. In: NIPS (2017)"},{"key":"5_CR7","unstructured":"Touvron, H., et al.: LLaMA: Open and Efficient Foundation Language Models. ArXiv, abs\/2302.13971 (2023)"},{"key":"5_CR8","doi-asserted-by":"crossref","unstructured":"Xue, L., et al.: mT5: a massively multilingual pre-trained text-to-text transformer. In: North American Chapter of the Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2021.naacl-main.41"},{"key":"5_CR9","unstructured":"Raffel, C., et al.: Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. ArXiv, abs\/1910.10683 (2019)"},{"key":"5_CR10","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I.: Language models are unsupervised multitask learners (2019)"},{"key":"5_CR11","unstructured":"Kenton, J.D., Toutanova, L.K.: BERT: pre-training of deep bidirectional transformers for language understanding. ArXiv, abs\/1810.04805 (2019)"},{"key":"5_CR12","unstructured":"brightmart, T.: brightmart\/nlp_chinese_corpus: release version 1.0. Zenodo, v1.0 (2019). https:\/\/doi.org\/10.5281\/zenodo.3402023"},{"key":"5_CR13","unstructured":"Brants, T., Popat, A., Xu, P., Och , F.J., Dean, J.: Large language models in machine translation. In: Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL), pp. 858\u2013867. Association for Computational Linguistics, Prague, Czech Republic (2007). https:\/\/aclanthology.org\/D07-1090"},{"key":"5_CR14","doi-asserted-by":"crossref","unstructured":"Phan, L., et al.: Enriching biomedical knowledge for low-resource language through large-scale translation. In: Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics, pp. 3131\u20133142. Association for Computational Linguistics, Dubrovnik, Croatia (2023). https:\/\/aclanthology.org\/2023.eacl-main.228","DOI":"10.18653\/v1\/2023.eacl-main.228"},{"key":"5_CR15","unstructured":"Mohit, B., Liberato, F., Hwa, R.: Language model adaptation for difficult to translate phrases. In: Proceedings of the 13th Annual conference of the European Association for Machine Translation, EAMT 2009, Barcelona, Spain, Map 14-15, 2009. European Association for Machine Translation (2009). https:\/\/aclanthology.org\/2009.eamt-1.22\/"},{"key":"5_CR16","doi-asserted-by":"crossref","unstructured":"Vaswani, A., Zhao, Y., Fossum, V., Chiang, D.: Decoding with large-scale neural language models improves translation. In: Conference on Empirical Methods in Natural Language Processing (2013)","DOI":"10.18653\/v1\/D13-1140"},{"key":"5_CR17","doi-asserted-by":"crossref","unstructured":"Zhu, W., et al.: Multilingual Machine Translation with Large Language Models: Empirical Results and Analysis. ArXiv, abs\/2304.04675 (2023)","DOI":"10.18653\/v1\/2024.findings-naacl.176"},{"key":"5_CR18","unstructured":"OpenAI. T:GPT-4 Technical Report. ArXiv, abs\/2303.08774 (2023)"},{"key":"5_CR19","unstructured":"Author, S., et al.: A Pretrainer\u2019s Guide to Training Data: Measuring the Effects of Data Age, Domain Coverage, Quality, & Toxicity. ArXiv, abs\/2305.13169 (2023)"},{"key":"5_CR20","doi-asserted-by":"crossref","unstructured":"Tan, Z., Zhang, X., Wang, S., Liu, Y.: MSP: multi-stage prompting for making pre-trained language models better translators. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 6131\u20136142. Association for Computational Linguistics, Dublin, Ireland (2022). https:\/\/aclanthology.org\/2022.acl-long.424","DOI":"10.18653\/v1\/2022.acl-long.424"},{"key":"5_CR21","unstructured":"Han, L., Erofeev, G., Sorokina, I., Gladkoff, S., Nenadic, G.: Examining large pre-trained language models for machine translation: what you don\u2019t know about it. In: Proceedings of the Seventh Conference on Machine Translation (WMT), pp. 908\u2013919. Association for Computational Linguistics, Abu Dhabi, United Arab Emirates (Hybrid) (2022). https:\/\/aclanthology.org\/2022.wmt-1.84"},{"key":"5_CR22","unstructured":"Lee, D.: Two-phase cross-lingual language model fine-tuning for machine translation quality estimation. In: Proceedings of the Fifth Conference on Machine Translation, pp. 1024\u20131028. Association for Computational Linguistics, Online (2020). https:\/\/aclanthology.org\/2020.wmt-1.118"},{"key":"5_CR23","doi-asserted-by":"crossref","unstructured":"Zheng, F., Reid, M., Marrese-Taylor, E., Matsuo, Y.: Low-resource machine translation using cross-lingual language model pretraining. In: Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas, pp. 234\u2013240. Association for Computational Linguistics, Online (2021). https:\/\/aclanthology.org\/2021.americasnlp-1.26","DOI":"10.18653\/v1\/2021.americasnlp-1.26"},{"key":"5_CR24","doi-asserted-by":"crossref","unstructured":"Agrawal, S., Zhou, C., Lewis, M., Zettlemoyer, L., Ghazvininejad, M.: In-context Examples Selection for Machine Translation. CoRR, abs\/2212.02437 (2022). https:\/\/doi.org\/10.48550\/arXiv.2212.02437","DOI":"10.18653\/v1\/2023.findings-acl.564"},{"key":"5_CR25","unstructured":"Tom, B., et al.: Language Models are Few-Shot Learners. In: Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual (2020). https:\/\/proceedings.neurips.cc\/paper\/2020\/hash\/1457c0d6bfcb4967418bfb8ac142f64a-Abstract.html"},{"key":"5_CR26","unstructured":"Chowdhery, A., et al.: PaLM: Scaling Language Modeling with Pathways. CoRR, abs\/2204.02311 (2022). https:\/\/doi.org\/10.48550\/arXiv.2204.02311"},{"key":"5_CR27","unstructured":"Costa-juss\u00e0, M.R, et al.: No Language Left Behind: Scaling Human-Centered Machine Translation. CoRR, abs\/2207.04672 (2022). https:\/\/doi.org\/10.48550\/arXiv.2207.04672"},{"key":"5_CR28","unstructured":"Dong, Q., et al.: A Survey for In-context Learning. CoRR, abs\/2301.00234 (2023). https:\/\/doi.org\/10.48550\/arXiv.2301.00234"},{"key":"5_CR29","unstructured":"Guerreiro, N.M., et al.: Hallucinations in Large Multilingual Translation Models. CoRR, abs\/2303.16104 (2023). https:\/\/doi.org\/10.48550\/arXiv.2303.16104"},{"key":"5_CR30","unstructured":"Kaplan, J., et al.: Scaling Laws for Neural Language Models. CoRR, abs\/2001.08361 (2020). https:\/\/arxiv.org\/abs\/2001.08361"},{"key":"5_CR31","unstructured":"Li, M., et al.: In-Context Learning with Many Demonstration Examples. CoRR, abs\/2302.04931 (2023). https:\/\/doi.org\/10.48550\/arXiv.2302.04931"},{"key":"5_CR32","unstructured":"Min, s., et al.: Rethinking the Role of Demonstrations: What Makes In-Context Learning Work?. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, EMNLP 2022, Abu Dhabi, United Arab Emirates, December 7-11, 2022, pp. 11048\u201311064. Association for Computational Linguistics (2022). https:\/\/aclanthology.org\/2022.emnlp-main.759"},{"key":"5_CR33","unstructured":"Moslem, Y., Haque, R., Kelleher, J.D, Way, A.: Adaptive Machine Translation with Large Language Models. CoRR, abs\/2301.13294 (2023). https:\/\/doi.org\/10.48550\/arXiv.2301.13294"},{"key":"5_CR34","unstructured":"Ren, X., et al.: PanGu-$$\\varSigma $$: Towards Trillion Parameter Language Model with Sparse Heterogeneous Computing. CoRR, abs\/2303.10845 (2023). https:\/\/doi.org\/10.48550\/arXiv.2303.10845"},{"key":"5_CR35","unstructured":"Vilar, D., et al.: Prompting PaLM for Translation: Assessing Strategies and Performance. CoRR, abs\/2211.09102 (2022). https:\/\/doi.org\/10.48550\/arXiv.2211.09102"},{"key":"5_CR36","unstructured":"Wei, J., et al.: Emergent abilities of large language models. Trans. Mach. Learn. Res. (2022). https:\/\/openreview.net\/forum?id=yzkSU5zdwD"},{"key":"5_CR37","unstructured":"Wei, J., et al.: Finetuned language models are zero-shot learners. In: The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, April 25-29, 2022. OpenReview.net (2022). https:\/\/openreview.net\/forum?id=gEZrGCozdqR"},{"key":"5_CR38","unstructured":"Jerry, W., et al.: Larger language models do in-context learning differently. CoRR, abs\/2303.03846 (2023). https:\/\/doi.org\/10.48550\/arXiv.2303.03846"},{"key":"5_CR39","unstructured":"Wu, Z., et al.: OpenICL: An Open-Source Framework for In-context Learning. CoRR, abs\/2303.029\u201c\u2018latex 13 (2023). https:\/\/doi.org\/10.48550\/arXiv.2303.02913"},{"key":"5_CR40","unstructured":"Zhang, B., Haddow, B., Birch, A.: Prompting Large Language Model for Machine Translation: A Case Study. CoRR, abs\/2301.07069 (2023). https:\/\/doi.org\/10.48550\/arXiv.2301.07069"},{"key":"5_CR41","doi-asserted-by":"crossref","unstructured":"Zhang, H., Zhang, Y., Zhang, R., Yang, D.: Robustness of demonstration-based learning under limited data scenario. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, EMNLP 2022, Abu Dhabi, United Arab Emirates, December 7-11, 2022, pp. 1769\u20131782. Association for Computational Linguistics (2022). https:\/\/aclanthology.org\/2022.emnlp-main.116","DOI":"10.18653\/v1\/2022.emnlp-main.116"},{"key":"5_CR42","unstructured":"Zhang, S., et al.: OPT: Open Pre-trained Transformer Language Models. CoRR, abs\/2205.01068 (2022). https:\/\/doi.org\/10.48550\/arXiv.2205.01068"},{"key":"5_CR43","unstructured":"Cui, Y., Yang, Z., Yao, X.: Efficient and Effective Text Encoding for Chinese LLaMA and Alpaca. arXiv preprint arXiv:2304.08177 (2023). https:\/\/arxiv.org\/abs\/2304.08177"},{"key":"5_CR44","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: BLEU: a method for automatic evaluation of machine translation. In: Proceedings of ACL, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"5_CR45","unstructured":"Henighan, T., et al.: Scaling Laws for Autoregressive Generative Modeling. ArXiv, abs\/2010.14701 (2020). https:\/\/api.semanticscholar.org\/CorpusID:225094178"},{"key":"5_CR46","doi-asserted-by":"crossref","unstructured":"Yuan, S., et al.: WuDaoCorpora: A Super Large-scale Chinese Corpora for Pre-training Language Models. AI Open (2021)","DOI":"10.1016\/j.aiopen.2021.06.001"},{"key":"5_CR47","unstructured":"Touvron, H., et al.: Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)"},{"key":"5_CR48","unstructured":"Bawden, R., et al.: Findings of the WMT 2020 biomedical translation shared task: basque, italian and russian as new additional languages. In: Proceedings of the Fifth Conference on Machine Translation, pp. 660\u2013687. Association for Computational Linguistics, Online (2020). https:\/\/aclanthology.org\/2020.wmt-1.76"},{"key":"5_CR49","unstructured":"Loshchilov, I., Hutter. F.: Decoupled Weight Decay Regularization. In: International Conference on Learning Representations (2018)"}],"container-title":["Lecture Notes in Computer Science","Chinese Computational Linguistics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8367-0_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T12:04:23Z","timestamp":1732795463000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8367-0_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,29]]},"ISBN":["9789819783663","9789819783670"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8367-0_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,29]]},"assertion":[{"value":"29 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CCL","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China National Conference on Chinese Computational Linguistics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taiyuan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 July 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 July 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cncl2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/cips-cl.org\/static\/CCL2024\/en\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}