{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T00:13:06Z","timestamp":1758845586892,"version":"3.37.3"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2022,8,8]],"date-time":"2022-08-08T00:00:00Z","timestamp":1659916800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,8,8]],"date-time":"2022-08-08T00:00:00Z","timestamp":1659916800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,4]]},"DOI":"10.1007\/s10489-022-03678-y","type":"journal-article","created":{"date-parts":[[2022,8,8]],"date-time":"2022-08-08T09:03:18Z","timestamp":1659949398000},"page":"9298-9320","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Reminding the incremental language model via data-free self-distillation"],"prefix":"10.1007","volume":"53","author":[{"given":"Han","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruiliu","family":"Fu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chengzhang","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuejun","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jun","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xing","family":"Bai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yonghong","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9272-2614","authenticated-orcid":false,"given":"Qingwei","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,8,8]]},"reference":[{"issue":"1","key":"3678_CR1","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1023\/A:1007331723572","volume":"28","author":"MB Ring","year":"1997","unstructured":"Ring MB (1997) CHILD: A first step towards continual learning. Mach Learn 28(1):77\u2013104. https:\/\/doi.org\/10.1023\/A:1007331723572","journal-title":"Mach Learn"},{"key":"3678_CR2","doi-asserted-by":"crossref","unstructured":"McCloskey M, Cohen NJ (1989) Catastrophic interference in connectionist networks: the sequential learning problem. In: Psychology of learning and motivation, vol 24, Elsevier, pp 109\u2013165","DOI":"10.1016\/S0079-7421(08)60536-8"},{"issue":"4","key":"3678_CR3","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1016\/S1364-6613(99)01294-2","volume":"3","author":"RM French","year":"1999","unstructured":"French RM (1999) Catastrophic forgetting in connectionist networks. Trends in Cognitive Sciences 3(4):128\u2013135","journal-title":"Trends in Cognitive Sciences"},{"issue":"4","key":"3678_CR4","doi-asserted-by":"publisher","first-page":"497","DOI":"10.1109\/5326.983933","volume":"31","author":"R Polikar","year":"2001","unstructured":"Polikar R, Upda L, Upda SS, Honavar VG (2001) Learn++: an incremental learning algorithm for supervised neural networks. IEEE Trans Syst Man Cybern Part C 31(4):497\u2013508. https:\/\/doi.org\/10.1109\/5326.983933","journal-title":"IEEE Trans Syst Man Cybern Part C"},{"key":"3678_CR5","doi-asserted-by":"publisher","unstructured":"Chen Z, Liu B (2018) Lifelong Machine Learning, Second Edition. Synthesis Lectures on Artificial Intelligence and Machine Learning Morgan & Claypool Publishers. https:\/\/doi.org\/10.2200\/S00832ED1V01Y201802AIM037","DOI":"10.2200\/S00832ED1V01Y201802AIM037"},{"issue":"12","key":"3678_CR6","doi-asserted-by":"publisher","first-page":"2935","DOI":"10.1109\/TPAMI.2017.2773081","volume":"40","author":"Z Li","year":"2017","unstructured":"Li Z, Hoiem D (2017) Learning without forgetting. IEEE Trans Pattern Anal Mach Intell 40 (12):2935\u20132947","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"3678_CR7","unstructured":"Lopez-Paz D, Ranzato M (2017) Gradient episodic memory for continual learning. In: Guyon I, von Luxburg U, Bengio S, Wallach HM, Fergus R, Vishwanathan SVN, Garnett R (eds) Advances in neural information processing systems 30: annual conference on neural information processing systems 2017, December 4-9, 2017, Long Beach, CA, USA, pp 6467\u20136476. https:\/\/proceedings.neurips.cc\/paper\/2017\/hash\/f87522788a2be2d171666752f97ddebb-Abstract.html"},{"key":"3678_CR8","unstructured":"Chaudhry A, Ranzato M, Rohrbach M, Elhoseiny M (2019) Efficient lifelong learning with A-GEM. In: 7th International conference on learning representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019. OpenReview.net. https:\/\/openreview.net\/forum?id=Hkf2_sC5FX"},{"key":"3678_CR9","unstructured":"de Masson d\u2019Autume C, Ruder S, Kong L, Yogatama D (2019) Episodic memory in lifelong language learning. In: Wallach HM, Larochelle H, Beygelzimer A, d\u2019Alch\u00e9-Buc F, Fox EB, Garnett R (eds) Advances in neural information processing systems 32: annual conference on neural information processing systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada, pp 13122\u201313131. https:\/\/proceedings.neurips.cc\/paper\/2019\/hash\/f8d2e80c1458ea2501f98a2cafadb397-Abstract.html"},{"key":"3678_CR10","doi-asserted-by":"crossref","unstructured":"Wang Z, Mehta SV, P\u00f3czos B., Carbonell J (2020) Efficient meta lifelong-learning with limited memory. In: EMNLP","DOI":"10.18653\/v1\/2020.emnlp-main.39"},{"key":"3678_CR11","unstructured":"Sun FK, Ho CH, Lee HY (2019) Lamol: Language modeling for lifelong language learning. Proceedings of the ICLR 2020"},{"key":"3678_CR12","doi-asserted-by":"publisher","unstructured":"Chuang YS, Su SY, Chen YN (2020) Lifelong language knowledge distillation. In: Proceedings of the 2020 conference on empirical methods in natural language processing (EMNLP). https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.233. Association for Computational Linguistics, Online, pp 2914\u20132924","DOI":"10.18653\/v1\/2020.emnlp-main.233"},{"key":"3678_CR13","doi-asserted-by":"publisher","unstructured":"Sun J, Wang S, Zhang J, Zong C (2020) Distill and replay for continual language learning. In: Proceedings of the 28th international conference on computational linguistics. https:\/\/doi.org\/10.18653\/v1\/2020.coling-main.318. International Committee on Computational Linguistics, Barcelona, Spain (Online), pp 3569\u20133579","DOI":"10.18653\/v1\/2020.coling-main.318"},{"key":"3678_CR14","doi-asserted-by":"publisher","unstructured":"Kanwatchara K, Horsuwan T, Lertvittayakumjorn P, Kijsirikul B, Vateekul P (2021) Rational LAMOL: a rationale-based lifelong learning framework. In: Zong C, Xia F, Li W, Navigli R (eds) Proceedings of the 59th annual meeting of the association for computational linguistics and the 11th international joint conference on natural language processing, ACL\/IJCNLP 2021, (Volume 1: Long Papers), Virtual Event, August 1-6, 2021, Association for Computational Linguistics, pp 2942\u20132953. https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.229","DOI":"10.18653\/v1\/2021.acl-long.229"},{"issue":"1","key":"3678_CR15","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1007\/s10489-020-01786-1","volume":"51","author":"C Li","year":"2021","unstructured":"Li C, Li Y, Zhao Y, Peng P, Geng X (2021) SLER: Self-generated long-term experience replay for continual reinforcement learning. Appl Intell 51(1):185\u2013201. https:\/\/doi.org\/10.1007\/s10489-020-01786-1","journal-title":"Appl Intell"},{"key":"3678_CR16","unstructured":"McCann B, Keskar NS, Xiong C, Socher R (2018) The natural language decathlon:, Multitask learning as question answering. arXiv:1806.08730"},{"issue":"8","key":"3678_CR17","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford A, Wu J, Child R, Luan D, Amodei D, Sutskever I (2019) Language models are unsupervised multitask learners. OpenAI blog 1(8):9","journal-title":"OpenAI blog"},{"key":"3678_CR18","unstructured":"Furlanello T, Lipton Z, Tschannen M, Itti L, Anandkumar A (2018) Born again neural networks. In: International conference on machine learning, PMLR, pp 1607\u20131616"},{"key":"3678_CR19","unstructured":"Arazo E, Ortego D, Albert P, O\u2019Connor N, McGuinness K (2019) Unsupervised label noise modeling and loss correction. In: International conference on machine learning, PMLR, pp 312\u2013321"},{"issue":"2","key":"3678_CR20","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1023\/A:1026543900054","volume":"40","author":"Y Rubner","year":"2000","unstructured":"Rubner Y, Tomasi C, Guibas LJ (2000) The earth mover\u2019s distance as a metric for image retrieval. Int J Comput Vis 40(2):99\u2013121","journal-title":"Int J Comput Vis"},{"key":"3678_CR21","doi-asserted-by":"publisher","first-page":"54","DOI":"10.1016\/j.neunet.2019.01.012","volume":"113","author":"GI Parisi","year":"2019","unstructured":"Parisi GI, Kemker R, Part JL, Kanan C, Wermter S (2019) Continual lifelong learning with neural networks: a review. Neural Netw 113:54\u201371. https:\/\/doi.org\/10.1016\/j.neunet.2019.01.012","journal-title":"Neural Netw"},{"key":"3678_CR22","doi-asserted-by":"publisher","first-page":"52","DOI":"10.3389\/fnbot.2020.00052","volume":"14","author":"S Heinrich","year":"2020","unstructured":"Heinrich S, Yao Y, Hinz T, Liu Z, Hummel T, Kerzel M, Weber C, Wermter S (2020) Crossmodal language grounding in an embodied neurocognitive model. Frontiers Neurorobotics 14:52. https:\/\/doi.org\/10.3389\/fnbot.2020.00052","journal-title":"Frontiers Neurorobotics"},{"issue":"6","key":"3678_CR23","doi-asserted-by":"publisher","first-page":"3339","DOI":"10.1007\/s10489-020-01984-x","volume":"51","author":"N Capuano","year":"2021","unstructured":"Capuano N, Greco L, Ritrovato P, Vento M (2021) Sentiment analysis for customer relationship management: an incremental learning approach. Appl Intell 51(6):3339\u20133352. https:\/\/doi.org\/10.1007\/s10489-020-01984-x","journal-title":"Appl Intell"},{"key":"3678_CR24","doi-asserted-by":"publisher","first-page":"607","DOI":"10.1016\/j.neunet.2021.07.021","volume":"143","author":"A Cossu","year":"2021","unstructured":"Cossu A, Carta A, Lomonaco V, Bacciu D (2021) Continual learning for recurrent neural networks: an empirical evaluation. Neural Netw 143:607\u2013627. https:\/\/doi.org\/10.1016\/j.neunet.2021.07.021","journal-title":"Neural Netw"},{"key":"3678_CR25","unstructured":"Shin H, Lee JK, Kim J, Kim J (2017) Continual learning with deep generative replay. In: Guyon I, von Luxburg U, Bengio S, Wallach HM, Fergus R, Vishwanathan SVN, Garnett R (eds) Advances in neural information processing systems 30: annual conference on neural information processing systems 2017, December 4-9, 2017, Long Beach, CA, USA, pp 2990\u20132999. https:\/\/proceedings.neurips.cc\/paper\/2017\/hash\/0efbe98067c6c73dba1250d2beaa81f9-Abstract.html"},{"key":"3678_CR26","unstructured":"Kemker R, Kanan C (2018) Fearnet: Brain-inspired model for incremental learning. In: 6th International conference on learning representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings. OpenReview.net. https:\/\/openreview.net\/forum?id=SJ1Xmf-Rb"},{"key":"3678_CR27","unstructured":"Schwarz J, Czarnecki W, Luketina J, Grabska-Barwinska A, Teh YW, Pascanu R, Hadsell R (2018) Progress & compress: A scalable framework for continual learning. In: Dy JG, Krause A (eds) Proceedings of the 35th international conference on machine learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10-15, 2018, Proceedings of Machine Learning Research, vol 80, PMLR, pp 4535\u20134544. http:\/\/proceedings.mlr.press\/v80\/schwarz18a.html"},{"key":"3678_CR28","doi-asserted-by":"crossref","unstructured":"Zhai M, Chen L, Tung F, He J, Nawhal M, Mori G (2019) Lifelong gan: Continual learning for conditional image generation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2759\u20132768","DOI":"10.1109\/ICCV.2019.00285"},{"key":"3678_CR29","doi-asserted-by":"publisher","unstructured":"van de Ven GM, Siegelmann HT, Tolias AS (2020) Brain-inspired replay for continual learning with artificial neural networks. Nat Commun 11. https:\/\/doi.org\/10.1038\/s41467-020-17866-2","DOI":"10.1038\/s41467-020-17866-2"},{"issue":"13","key":"3678_CR30","doi-asserted-by":"publisher","first-page":"3521","DOI":"10.1073\/pnas.1611835114","volume":"114","author":"J Kirkpatrick","year":"2017","unstructured":"Kirkpatrick J, Pascanu R, Rabinowitz N, Veness J, Desjardins G, Rusu AA, Milan K, Quan J, Ramalho T, Grabska-Barwinska A et al (2017) Overcoming catastrophic forgetting in neural networks. Proceedings of the National Academy of Sciences 114(13):3521\u20133526","journal-title":"Proceedings of the National Academy of Sciences"},{"key":"3678_CR31","unstructured":"Zenke F, Poole B, Ganguli S (2017) Continual learning through synaptic intelligence. In: Precup D, Teh YW (eds) Proceedings of the 34th international conference on machine learning, ICML 2017, Sydney, NSW, Australia, 6-11 August 2017, Proceedings of Machine Learning Research, vol 70, PMLR, pp 3987\u20133995. http:\/\/proceedings.mlr.press\/v70\/zenke17a.html"},{"key":"3678_CR32","doi-asserted-by":"crossref","unstructured":"Aljundi R, Babiloni F, Elhoseiny M, Rohrbach M, Tuytelaars T (2018) Memory aware synapses: Learning what (not) to forget. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 139\u2013154","DOI":"10.1007\/978-3-030-01219-9_9"},{"key":"3678_CR33","unstructured":"Lee S, Kim J, Jun J, Ha J, Zhang B (2017) Overcoming catastrophic forgetting by incremental moment matching. In: Guyon I, von Luxburg U, Bengio S, Wallach HM, Fergus R, Vishwanathan SVN, Garnett R (eds) Advances in neural information processing systems 30: annual conference on neural information processing systems 2017, December 4-9, 2017, Long Beach, CA, USA, pp 4652\u20134662. https:\/\/proceedings.neurips.cc\/paper\/2017\/hash\/f708f064faaf32a43e4d3c784e6af9ea-Abstract.html"},{"key":"3678_CR34","unstructured":"nostalgebraist (2020) Interpreting gpt: the logit lens. https:\/\/www.lesswrong.com\/posts\/AcKRB8wDpdaN6v6ru\/interpreting-gpt-the-logit-lens. Accessed 31st Aug 2020"},{"key":"3678_CR35","unstructured":"Alammar J (2021) Finding the words to say: Hidden state visualizations for language models. https:\/\/jalammar.github.io\/hidden-states\/"},{"key":"3678_CR36","doi-asserted-by":"publisher","unstructured":"Li J, Liu X, Zhao H, Xu R, Yang M, Jin Y (2020) BERT-EMD: Many-to-many layer mapping for BERT compression with earth mover\u2019s distance. In: Proceedings of the 2020 conference on empirical methods in natural language processing (EMNLP), Association for Computational Linguistics, Online, pp 3009\u20133018. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.242, https:\/\/www.aclweb.org\/anthology\/2020.emnlp-main.242","DOI":"10.18653\/v1\/2020.emnlp-main.242"},{"key":"3678_CR37","doi-asserted-by":"publisher","unstructured":"Devlin J, Chang M, Lee K, Toutanova K (2019) BERT: Pre-training of deep bidirectional transformers for language understanding. In: Burstein J, Doran C, Solorio T (eds) Proceedings of the 2019 conference of the north american chapter of the association for computational linguistics: human language technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2-7, 2019, Volume 1 (Long and Short Papers), Association for Computational Linguistics, pp 4171\u20134186. https:\/\/doi.org\/10.18653\/v1\/n19-1423","DOI":"10.18653\/v1\/n19-1423"},{"key":"3678_CR38","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/3446776","volume":"64","author":"C Zhang","year":"2021","unstructured":"Zhang C, Bengio S, Hardt M, Recht B, Vinyals O (2021) Understanding deep learning (still) requires rethinking generalization. Commun ACM 64:107\u2013115","journal-title":"Commun ACM"},{"key":"3678_CR39","unstructured":"Reed SE, Lee H, Anguelov D, Szegedy C, Erhan D, Rabinovich A (2015) Training deep neural networks on noisy labels with bootstrapping. In: Bengio Y, LeCun Y (eds) 3rd international conference on learning representations, ICLR 2015, San Diego, CA, USA, May 7-9, 2015, Workshop Track Proceedings. 1412.6596"},{"key":"3678_CR40","doi-asserted-by":"publisher","unstructured":"Papineni K, Roukos S, Ward T, Zhu W (2002) Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th annual meeting of the association for computational linguistics, July 6-12, 2002, Philadelphia, PA, USA, ACL, pp 311\u2013318. https:\/\/doi.org\/10.3115\/1073083.1073135, https:\/\/aclanthology.org\/P02-1040\/","DOI":"10.3115\/1073083.1073135"},{"key":"3678_CR41","unstructured":"Zhang X, Zhao JJ, LeCun Y (2015) Character-level convolutional networks for text classification. In: Cortes C, Lawrence ND, Lee DD, Sugiyama M, Garnett R (eds) Advances in neural information processing systems 28: annual conference on neural information processing systems 2015, December 7-12, 2015, Montreal, Quebec, Canada, pp 649\u2013657 . https:\/\/proceedings.neurips.cc\/paper\/2015\/hash\/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03678-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-022-03678-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03678-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T09:23:11Z","timestamp":1682846591000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-022-03678-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,8]]},"references-count":41,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2023,4]]}},"alternative-id":["3678"],"URL":"https:\/\/doi.org\/10.1007\/s10489-022-03678-y","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"type":"print","value":"0924-669X"},{"type":"electronic","value":"1573-7497"}],"subject":[],"published":{"date-parts":[[2022,8,8]]},"assertion":[{"value":"22 April 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 August 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}