{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T08:54:51Z","timestamp":1743065691701,"version":"3.40.3"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031159367"},{"type":"electronic","value":"9783031159374"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-15937-4_42","type":"book-chapter","created":{"date-parts":[[2022,9,6]],"date-time":"2022-09-06T08:15:35Z","timestamp":1662452135000},"page":"496-507","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Using Multiple Heads to\u00a0Subsize Meta-memorization Problem"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2923-6047","authenticated-orcid":false,"given":"Lu","family":"Wang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0377-3810","authenticated-orcid":false,"given":"K. L.","family":"Eddie Law","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,9,7]]},"reference":[{"key":"42_CR1","unstructured":"Finn, C., Abbeel, P., Levine, S.: Model-agnostic meta-learning for fast adaptation of deep networks. In: ICML, vol. 70, pp. 1126\u20131135 (2017). http:\/\/proceedings.mlr.press\/v70\/finn17a.html"},{"key":"42_CR2","doi-asserted-by":"crossref","unstructured":"Hospedales, T.M., Antoniou, A., Micaelli, P., Storkey, A.J.: Meta-learning in neural networks: a survey. IEEE Trans. Pattern Anal. Mach. Intell. (2021). https:\/\/ieeexplore.ieee.org\/document\/9428530","DOI":"10.1109\/TPAMI.2021.3079209"},{"key":"42_CR3","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"7587","key":"42_CR4","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"42_CR5","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: NAACL-HLT (2019)"},{"key":"42_CR6","unstructured":"Marcus, G.: Deep learning: a critical appraisal. CoRR abs\/1801.00631 (2018). https:\/\/arxiv.org\/abs\/1801.00631"},{"key":"42_CR7","unstructured":"Yin, M., Tucker, G., Zhou, M., Levine, S., Finn, C.: Meta-learning without memorization. In: ICLR (2020)"},{"key":"42_CR8","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1016\/j.patrec.2021.05.010","volume":"149","author":"S Doveh","year":"2021","unstructured":"Doveh, S., et al.: MetAdapt: meta-learned task-adaptive architecture for few-shot classification. Pattern Recognit. Lett. 149, 130\u2013136 (2021)","journal-title":"Pattern Recognit. Lett."},{"key":"42_CR9","unstructured":"Raghu, A., Raghu, M., Bengio, S., Vinyals, O.: Rapid learning or feature reuse? towards understanding the effectiveness of MAML. In: ICLR (2020)"},{"key":"42_CR10","unstructured":"Nichol, A., Achiam, J., Schulman, J.: On first-order meta-learning algorithms. CoRR abs\/1803.02999 (2018). https:\/\/arxiv.org\/abs\/1803.02999"},{"issue":"1","key":"42_CR11","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., Hinton, G.E., Krizhevsky, A., Sutskever, I., Salakhutdinov, R.: Dropout: a simple way to prevent neural networks from overfitting. J. Mach. Learn. Res. 15(1), 1929\u20131958 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"42_CR12","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. In: ICML (2015)"},{"key":"42_CR13","unstructured":"Rajendran, J., Irpan, A., Jang, E.: Meta-learning requires meta-augmentation. In: NeurIPS (2020)"},{"key":"42_CR14","unstructured":"Pan, E., Rajak, P., Shrivastava, S.: Meta-regularization by enforcing mutual-exclusiveness. CoRR abs\/2101.09819 (2021)"},{"key":"42_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"675","DOI":"10.1007\/978-3-030-58529-7_40","volume-title":"Computer Vision \u2013 ECCV 2020","author":"H Tian","year":"2020","unstructured":"Tian, H., Liu, B., Yuan, X.-T., Liu, Q.: Meta-learning with network pruning. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12364, pp. 675\u2013700. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58529-7_40"},{"key":"42_CR16","unstructured":"Zintgraf, L.M., Shiarlis, K., Kurin, V., Hofmann, K., Whiteson, S.: Fast context adaptation via meta-learning. In: ICML (2019)"},{"key":"42_CR17","unstructured":"Havasi, M., et al.: Training independent subnetworks for robust prediction. In: ICLR (2021)"},{"issue":"10","key":"42_CR18","doi-asserted-by":"publisher","first-page":"1345","DOI":"10.1109\/TKDE.2009.191","volume":"22","author":"SJ Pan","year":"2010","unstructured":"Pan, S.J., Yang, Q.: A survey on transfer learning. IEEE Trans. Knowl. Data Eng. 22(10), 1345\u20131359 (2010)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"42_CR19","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21, 140:1\u2013140:67 (2020). https:\/\/jmlr.org\/papers\/v21\/20-074.html"},{"key":"42_CR20","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-1-4615-5529-2_1","volume-title":"Learning to Learn","author":"S Thrun","year":"1998","unstructured":"Thrun, S., Pratt, L.Y.: Learning to learn: introduction and overview. In: Thrun, S., Pratt, L.Y. (eds.) Learning to Learn, pp. 3\u201317. Springer, Boston (1998). https:\/\/doi.org\/10.1007\/978-1-4615-5529-2_1"},{"key":"42_CR21","unstructured":"Snell, J., Swersky, K., Zemel, R.S.: Prototypical networks for few-shot learning. In: NeurIPS (2017)"},{"key":"42_CR22","unstructured":"Metz, L., Maheswaranathan, N., Cheung, B., Sohl-Dickstein, J.: Meta-learning update rules for unsupervised representation learning. In: ICLR (2019)"},{"key":"42_CR23","unstructured":"Alet, F., Schneider, M.F., Lozano-P\u00e9rez, T., Kaelbling, L.P.: Meta-learning curiosity algorithms. In: ICLR (2020)"},{"key":"42_CR24","unstructured":"Franceschi, L., Frasconi, P., Salzo, S., Grazzi, R., Pontil, M.: Bilevel programming for hyperparameter optimization and meta-learning. In: ICML (2018)"},{"key":"42_CR25","doi-asserted-by":"crossref","unstructured":"Elsken, T., Staffler, B., Metzen, J.H., Hutter, F.: Meta-learning of neural architectures for few-shot learning. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.01238"},{"key":"42_CR26","unstructured":"Finn, C.: Learning to Learn with Gradients. Ph.D. thesis, University of California, Berkeley, USA (2018). https:\/\/escholarship.org\/uc\/item\/0987d4n3"},{"key":"42_CR27","unstructured":"Mishra, N., Rohaninejad, M., Chen, X., Abbeel, P.: A simple neural attentive meta-learner. In: ICLR (2018)"},{"key":"42_CR28","unstructured":"Yoon, J., Kim, T., Dia, O., Kim, S., Bengio, Y., Ahn, S.: Bayesian model-agnostic meta-learning. In: NeurIPS (2018)"},{"key":"42_CR29","doi-asserted-by":"crossref","unstructured":"Jamal, M.A., Qi, G.: Task agnostic meta-learning for few-shot learning. In: CVPR, pp. 11719\u201311727 (2019)","DOI":"10.1109\/CVPR.2019.01199"},{"key":"42_CR30","doi-asserted-by":"crossref","unstructured":"Lee, K., Maji, S., Ravichandran, A., Soatto, S.: Meta-learning with differentiable convex optimization. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01091"},{"key":"42_CR31","doi-asserted-by":"crossref","unstructured":"Tseng, H., Chen, Y., Tsai, Y., Liu, S., Lin, Y., Yang, M.: Regularizing meta-learning via gradient dropout. In: ACCV (2020)","DOI":"10.1007\/978-3-030-69538-5_14"},{"key":"42_CR32","unstructured":"Lee, H., Nam, T., Yang, E., Hwang, S.J.: Meta dropout: learning to perturb latent features for generalization. In: ICLR (2020)"},{"key":"42_CR33","unstructured":"Yao, H., et al.: Improving generalization in meta-learning via task augmentation. In: ICML (2021)"},{"key":"42_CR34","unstructured":"Han, Y., Huang, G., Song, S., Yang, L., Wang, H., Wang, Y.: Dynamic neural networks: a survey. CoRR abs\/2102.04906 (2021). https:\/\/arxiv.org\/abs\/2102.04906"},{"key":"42_CR35","doi-asserted-by":"crossref","unstructured":"Teerapittayanon, S., McDanel, B., Kung, H.T.: Branchynet: fast inference via early exiting from deep neural networks. In: ICPR (2016)","DOI":"10.1109\/ICPR.2016.7900006"},{"key":"42_CR36","unstructured":"Keskar, N.S., Mudigere, D., Nocedal, J., Smelyanskiy, M., Tang, P.T.P.: On large-batch training for deep learning: generalization gap and sharp minima. In: ICLR (2017)"},{"key":"42_CR37","unstructured":"Xie, Z., Sato, I., Sugiyama, M.: A diffusion theory for deep learning dynamics: Stochastic gradient descent exponentially favors flat minima. In: Keskar (2021)"},{"key":"42_CR38","unstructured":"Li, H., Xu, Z., Taylor, G., Studer, C., Goldstein, T.: Visualizing the loss landscape of neural nets. In: Bengio, S., Wallach, H.M., Larochelle, H., Grauman, K., Cesa-Bianchi, N., Garnett, R. (eds.) NeurIPS (2018)"},{"key":"42_CR39","unstructured":"De Bernardi, M.: Loss-landscapes. https:\/\/pypi.org\/project\/loss-landscapes\/3.0.6\/"},{"key":"42_CR40","unstructured":"Bertinetto, L., Henriques, J.F., Torr, P.H.S., Vedaldi, A.: Meta-learning with differentiable closed-form solvers. In: ICLR (2019)"},{"key":"42_CR41","unstructured":"Frankle, J., Carbin, M.: The lottery ticket hypothesis: finding sparse, trainable neural networks. In: ICLR (2019)"},{"key":"42_CR42","unstructured":"Krizhevsky, A.: Learning multiple layers of features from tiny images. Technical report, University of Toronto (2009)"},{"key":"42_CR43","unstructured":"Paszke, A., et al.: Pytorch: an imperative style, high-performance deep learning library. In: Wallach, H., Larochelle, H., Beygelzimer, A., d\u2019 Alch\u00e9-Buc, F., Fox, E., Garnett, R. (eds.) Advances in Neural Information Processing Systems 32, pp. 8024\u20138035. Curran Associates, Inc. (2019). https:\/\/proceedings.neurips.cc\/paper\/2019\/file\/bdbca288fee7f92f2bfa9f7012727740-Paper.pdf"},{"key":"42_CR44","unstructured":"Arnold, S.M.R., Mahajan, P., Datta, D., Bunner, I., Zarkias, K.S.: learn2learn: a library for meta-learning research. CoRR abs\/2008.12284 (2020). https:\/\/arxiv.org\/abs\/2008.12284"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks and Machine Learning \u2013 ICANN 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-15937-4_42","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,3]],"date-time":"2024-10-03T08:33:11Z","timestamp":1727944391000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-15937-4_42"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031159367","9783031159374"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-15937-4_42","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"7 September 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bristol","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icann2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/e-nns.org\/icann2022\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"561","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"255","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"45% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}