{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,6]],"date-time":"2025-07-06T20:20:21Z","timestamp":1751833221694,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":48,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819981441"},{"type":"electronic","value":"9789819981458"}],"license":[{"start":{"date-parts":[[2023,11,27]],"date-time":"2023-11-27T00:00:00Z","timestamp":1701043200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,27]],"date-time":"2023-11-27T00:00:00Z","timestamp":1701043200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8145-8_19","type":"book-chapter","created":{"date-parts":[[2023,11,26]],"date-time":"2023-11-26T23:02:21Z","timestamp":1701039741000},"page":"238-254","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Multi-Task Feature Self-Distillation for\u00a0Semi-Supervised Machine Translation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-9168-4125","authenticated-orcid":false,"given":"Yuxian","family":"Wan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenlin","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhen","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,11,27]]},"reference":[{"key":"19_CR1","doi-asserted-by":"publisher","first-page":"1798","DOI":"10.1109\/TPAMI.2013.50","volume":"35","author":"Y Bengio","year":"2012","unstructured":"Bengio, Y., Courville, A.C., Vincent, P.: Representation learning: a review and new perspectives. IEEE Trans. Pattern Anal. Mach. Intell. 35, 1798\u20131828 (2012)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"19_CR2","first-page":"263","volume":"19","author":"PF Brown","year":"1993","unstructured":"Brown, P.F., Pietra, S.D., Pietra, V.J.D., Mercer, R.L.: The mathematics of statistical machine translation: parameter estimation. Comput. Linguist. 19, 263\u2013311 (1993)","journal-title":"Comput. Linguist."},{"key":"19_CR3","doi-asserted-by":"crossref","unstructured":"Caron, M., et al.: Emerging properties in self-supervised vision transformers. In: 2021 IEEE\/CVF International Conference on Computer Vision, ICCV 2021, Montreal, QC, Canada, 10\u201317 October 2021, pp. 9630\u20139640 (2021)","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"19_CR4","unstructured":"Chen, T., Kornblith, S., Swersky, K., Norouzi, M., Hinton, G.E.: Big self-supervised models are strong semi-supervised learners. arXiv arXiv:2006.10029 (2020)"},{"key":"19_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Y., Gan, Z., Cheng, Y., Liu, J., Liu, J.: Distilling knowledge learned in BERT for text generation. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, ACL 2020, Online, 5\u201310 July 2020, pp. 7893\u20137905 (2020)","DOI":"10.18653\/v1\/2020.acl-main.705"},{"key":"19_CR6","unstructured":"Cheng, Q., Huang, J., Duan, Y.: Semantically consistent data augmentation for neural machine translation via conditional masked language model. In: International Conference on Computational Linguistics (2022)"},{"key":"19_CR7","unstructured":"Fang, Z., Wang, J., Wang, L., Zhang, L., Yang, Y., Liu, Z.: SEED: self-supervised distillation for visual representation. arXiv arXiv:2101.04731 (2021)"},{"key":"19_CR8","unstructured":"Grill, J.B., et al.: Bootstrap your own latent: a new approach to self-supervised learning. arXiv arXiv:2006.07733 (2020)"},{"key":"19_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.B.: Momentum contrast for unsupervised visual representation learning. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2020, Seattle, WA, USA, 13\u201319 June 2020, pp. 9726\u20139735 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"19_CR10","unstructured":"Hinton, G.E., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. CoRR abs\/1503.02531 (2015)"},{"key":"19_CR11","doi-asserted-by":"crossref","unstructured":"Hoang, C.D.V., Koehn, P., Haffari, G., Cohn, T.: Iterative back-translation for neural machine translation. In: NMT@ACL (2018)","DOI":"10.18653\/v1\/W18-2703"},{"key":"19_CR12","doi-asserted-by":"crossref","unstructured":"Hu, J., Xia, M., Neubig, G., Carbonell, J.G.: Domain adaptation of neural machine translation by lexicon induction. In: Annual Meeting of the Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/P19-1286"},{"key":"19_CR13","unstructured":"Imankulova, A., Dabre, R., Fujita, A., Imamura, K.: Exploiting out-of-domain parallel data through multilingual transfer learning for low-resource neural machine translation. arXiv arXiv:1907.03060 (2019)"},{"key":"19_CR14","unstructured":"Imankulova, A., Sato, T., Komachi, M.: Improving low-resource neural machine translation with filtered pseudo-parallel corpus. In: WAT@IJCNLP (2017)"},{"key":"19_CR15","doi-asserted-by":"crossref","unstructured":"Jean, S., Cho, K., Memisevic, R., Bengio, Y.: On using very large target vocabulary for neural machine translation. arXiv arXiv:1412.2007 (2014)","DOI":"10.3115\/v1\/P15-1001"},{"key":"19_CR16","doi-asserted-by":"crossref","unstructured":"Jiao, W., Wang, X., Tu, Z., Shi, S., Lyu, M.R., King, I.: Self-training sampling with monolingual data uncertainty for neural machine translation. In: Annual Meeting of the Association for Computational Linguistics (2021)","DOI":"10.18653\/v1\/2021.acl-long.221"},{"key":"19_CR17","unstructured":"Jin, D., Jin, Z., Zhou, J.T., Szolovits, P.: A simple baseline to semi-supervised domain adaptation for machine translation. arXiv arXiv:2001.08140 (2020)"},{"key":"19_CR18","unstructured":"Khandelwal, U., Fan, A., Jurafsky, D., Zettlemoyer, L., Lewis, M.: Nearest neighbor machine translation. CoRR abs\/2010.00710 (2020)"},{"key":"19_CR19","unstructured":"Lample, G., Conneau, A.: Cross-lingual language model pretraining. In: Neural Information Processing Systems (2019)"},{"key":"19_CR20","unstructured":"Lample, G., Conneau, A., Denoyer, L., Ranzato, M.: Unsupervised machine translation using monolingual corpora only. In: 6th International Conference on Learning Representations, ICLR 2018, Conference Track Proceedings, Vancouver, BC, Canada, 30 April\u20133 May 2018 (2018)"},{"key":"19_CR21","unstructured":"Lee, K., Firat, O., Agarwal, A., Fannjiang, C., Sussillo, D.: Hallucinations in neural machine translation (2018)"},{"key":"19_CR22","doi-asserted-by":"crossref","unstructured":"Lewis, M., et al.: BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: Annual Meeting of the Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"19_CR23","doi-asserted-by":"publisher","first-page":"417","DOI":"10.1162\/0891201042544884","volume":"30","author":"FJ Och","year":"2004","unstructured":"Och, F.J., Ney, H.: The alignment template approach to statistical machine translation. Comput. Linguist. 30, 417\u2013449 (2004)","journal-title":"Comput. Linguist."},{"key":"19_CR24","doi-asserted-by":"crossref","unstructured":"Ott, M., et al.: fairseq: a fast, extensible toolkit for sequence modeling. In: Ammar, W., Louis, A., Mostafazadeh, N. (eds.) Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Demonstrations, Minneapolis, MN, USA, 2\u20137 June 2019, pp. 48\u201353 (2019)","DOI":"10.18653\/v1\/N19-4009"},{"key":"19_CR25","doi-asserted-by":"crossref","unstructured":"Pan, X., Wang, M., Wu, L., Li, L.: Contrastive learning for many-to-many multilingual neural machine translation. arXiv arXiv:2105.09501 (2021)","DOI":"10.18653\/v1\/2021.acl-long.21"},{"key":"19_CR26","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.: BLEU: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, 6\u201312 July 2002, Philadelphia, PA, USA, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"19_CR27","doi-asserted-by":"publisher","first-page":"838","DOI":"10.1137\/0330046","volume":"30","author":"B Polyak","year":"1992","unstructured":"Polyak, B., Juditsky, A.B.: Acceleration of stochastic approximation by averaging. SIAM J. Control. Optim. 30, 838\u2013855 (1992)","journal-title":"SIAM J. Control. Optim."},{"key":"19_CR28","doi-asserted-by":"crossref","unstructured":"Post, M.: A call for clarity in reporting BLEU scores. In: Bojar, O., et al. (eds.) Proceedings of the Third Conference on Machine Translation: Research Papers, WMT 2018, Belgium, Brussels, 31 October\u20131 November 2018, pp. 186\u2013191 (2018)","DOI":"10.18653\/v1\/W18-6319"},{"key":"19_CR29","doi-asserted-by":"crossref","unstructured":"Raina, R., Battle, A.J., Lee, H., Packer, B., Ng, A.Y.: Self-taught learning: transfer learning from unlabeled data. In: Ghahramani, Z. (ed.) Proceedings of the Twenty-Fourth International Conference on Machine Learning, ICML 2007, Corvallis, Oregon, USA, 20\u201324 June 2007, vol. 227, pp. 759\u2013766 (2007)","DOI":"10.1145\/1273496.1273592"},{"key":"19_CR30","doi-asserted-by":"crossref","unstructured":"Raunak, V., Menezes, A., Junczys-Dowmunt, M.: The curious case of hallucinations in neural machine translation. arXiv arXiv:2104.06683 (2021)","DOI":"10.18653\/v1\/2021.naacl-main.92"},{"key":"19_CR31","unstructured":"Ruiter, D., Klakow, D., van Genabith, J., Espa\u00f1a-Bonet, C.: Integrating unsupervised data generation into self-supervised neural machine translation for low-resource languages. arXiv arXiv:2107.08772 (2021)"},{"key":"19_CR32","unstructured":"Ruppert, D.: Efficient estimations from a slowly convergent Robbins-Monro process (1988)"},{"key":"19_CR33","doi-asserted-by":"crossref","unstructured":"Sennrich, R., Haddow, B., Birch, A.: Improving neural machine translation models with monolingual data. arXiv arXiv:1511.06709 (2015)","DOI":"10.18653\/v1\/P16-1009"},{"key":"19_CR34","doi-asserted-by":"crossref","unstructured":"Sennrich, R., Haddow, B., Birch, A.: Neural machine translation of rare words with subword units. arXiv arXiv:1508.07909 (2015)","DOI":"10.18653\/v1\/P16-1162"},{"key":"19_CR35","doi-asserted-by":"crossref","unstructured":"Siddhant, A., et al.: Leveraging monolingual data with self-supervision for multilingual neural machine translation. In: Annual Meeting of the Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.acl-main.252"},{"key":"19_CR36","doi-asserted-by":"crossref","unstructured":"Siddhant, A., et al.: Leveraging monolingual data with self-supervision for multilingual neural machine translation. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, ACL 2020, Online, 5\u201310 July 2020, pp. 2827\u20132835 (2020)","DOI":"10.18653\/v1\/2020.acl-main.252"},{"key":"19_CR37","unstructured":"Sutskever, I., Vinyals, O., Le, Q.V.: Sequence to sequence learning with neural networks. In: Advances in Neural Information Processing Systems 27: Annual Conference on Neural Information Processing Systems 2014, 8\u201313 December 2014, Montreal, Quebec, Canada, pp. 3104\u20133112 (2014)"},{"key":"19_CR38","unstructured":"Tarvainen, A., Valpola, H.: Mean teachers are better role models: weight-averaged consistency targets improve semi-supervised deep learning results. In: 5th International Conference on Learning Representations, ICLR 2017, Workshop Track Proceedings, Toulon, France, 24\u201326 April 2017 (2017)"},{"key":"19_CR39","doi-asserted-by":"crossref","unstructured":"Taskar, B., Lacoste-Julien, S., Klein, D.: A discriminative matching approach to word alignment. In: Proceedings of the Conference on Human Language Technology Conference and Conference on Empirical Methods in Natural Language Processing, HLT\/EMNLP 2005, 6\u20138 October 2005, Vancouver, British Columbia, Canada, pp. 73\u201380 (2005)","DOI":"10.3115\/1220575.1220585"},{"key":"19_CR40","doi-asserted-by":"crossref","unstructured":"Tong, Y., Chen, Y., Zhang, G., Zheng, J., Zhu, H., Shi, X.: Generating diverse back-translations via constraint random decoding. In: CCMT (2021)","DOI":"10.1007\/978-981-16-7512-6_8"},{"key":"19_CR41","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, 4\u20139 December 2017, Long Beach, CA, USA, pp. 5998\u20136008 (2017)"},{"key":"19_CR42","doi-asserted-by":"crossref","unstructured":"Wang, R., Tan, X., Luo, R., Qin, T., Liu, T.Y.: A survey on low-resource neural machine translation. In: International Joint Conference on Artificial Intelligence (2021)","DOI":"10.24963\/ijcai.2021\/629"},{"key":"19_CR43","unstructured":"Wei, Y., et al.: Contrastive learning rivals masked image modeling in fine-tuning via feature distillation. arXiv arXiv:2205.14141 (2022)"},{"key":"19_CR44","doi-asserted-by":"crossref","unstructured":"Wu, J., Wang, X.E., Wang, W.Y.: Extract and edit: an alternative to back-translation for unsupervised neural machine translation. In: North American Chapter of the Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/N19-1120"},{"key":"19_CR45","doi-asserted-by":"crossref","unstructured":"Xie, Q., Hovy, E.H., Luong, M.T., Le, Q.V.: Self-training with noisy student improves ImageNet classification. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10684\u201310695 (2019)","DOI":"10.1109\/CVPR42600.2020.01070"},{"key":"19_CR46","doi-asserted-by":"crossref","unstructured":"Yang, Z., Sun, R., Wan, X.: Nearest neighbor knowledge distillation for neural machine translation. In: Carpuat, M., de Marneffe, M., Ru\u00edz, I.V.M. (eds.) NAACL, pp. 5546\u20135556 (2022)","DOI":"10.18653\/v1\/2022.naacl-main.406"},{"key":"19_CR47","unstructured":"Zhang, T., et al.: Frequency-aware contrastive learning for neural machine translation. In: AAAI Conference on Artificial Intelligence (2021)"},{"key":"19_CR48","doi-asserted-by":"crossref","unstructured":"Zheng, X., et al.: Adaptive nearest neighbor machine translation. In: Zong, C., Xia, F., Li, W., Navigli, R. (eds.) Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing, ACL\/IJCNLP 2021, (Volume 2: Short Papers), Virtual Event, 1\u20136 August 2021, pp. 368\u2013374 (2021)","DOI":"10.18653\/v1\/2021.acl-short.47"}],"container-title":["Communications in Computer and Information Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8145-8_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T18:56:35Z","timestamp":1710356195000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8145-8_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,27]]},"ISBN":["9789819981441","9789819981458"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8145-8_19","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2023,11,27]]},"assertion":[{"value":"27 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Changsha","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 November 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iconip2023.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1274","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"650","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"51% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.14","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.46","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}