{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,22]],"date-time":"2025-12-22T04:38:39Z","timestamp":1766378319930,"version":"3.40.3"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031703676"},{"type":"electronic","value":"9783031703683"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70368-3_26","type":"book-chapter","created":{"date-parts":[[2024,8,30]],"date-time":"2024-08-30T23:03:11Z","timestamp":1725058991000},"page":"434-449","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Boosting Long-Tail Data Classification with\u00a0Sparse Prototypical Networks"],"prefix":"10.1007","author":[{"given":"Alexei","family":"Figueroa","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jens-Michalis","family":"Papaioannou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Conor","family":"Fallon","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexandra","family":"Bekiaridou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Keno","family":"Bressem","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stavros","family":"Zanos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Felix","family":"Gers","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wolfgang","family":"Nejdl","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexander","family":"L\u00f6ser","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,8,22]]},"reference":[{"key":"26_CR1","doi-asserted-by":"crossref","unstructured":"van Aken, B., Papaioannou, J., Mayrdorfer, M., Budde, K., Gers, F.A., L\u00f6ser, A.: Clinical outcome prediction from admission notes using self-supervised knowledge integration. In: EACL, pp. 881\u2013893. Association for Computational Linguistics (2021)","DOI":"10.18653\/v1\/2021.eacl-main.75"},{"key":"26_CR2","unstructured":"van Aken, B., et al.: This patient looks like that patient: Prototypical networks for interpretable diagnosis prediction from clinical text. In: AACL\/IJCNLP (1), pp. 172\u2013184. Association for Computational Linguistics (2022)"},{"key":"26_CR3","doi-asserted-by":"crossref","unstructured":"Atanasova, P., Simonsen, J.G., Lioma, C., Augenstein, I.: A diagnostic study of explainability techniques for text classification. In: EMNLP (1), pp. 3256\u20133274. Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.263"},{"key":"26_CR4","unstructured":"Bengio, Y., L\u00e9onard, N., Courville, A.C.: Estimating or propagating gradients through stochastic neurons for conditional computation. CoRR abs\/1308.3432 (2013)"},{"key":"26_CR5","doi-asserted-by":"crossref","unstructured":"Bergstra, J., Yamins, D., Cox, D.D.: Hyperopt: a python library for optimizing the hyperparameters of machine learning algorithms. In: SciPy, pp. 13\u201319. scipy.org (2013)","DOI":"10.25080\/Majora-8b375195-003"},{"key":"26_CR6","unstructured":"Bricken, T., Davies, X., Singh, D., Krotov, D., Kreiman, G.: Sparse distributed memory is a continual learner. In: ICLR. OpenReview.net (2023)"},{"key":"26_CR7","unstructured":"Chen, C., Li, O., Tao, D., Barnett, A., Rudin, C., Su, J.: This looks like that: deep learning for interpretable image recognition. In: NeurIPS, pp. 8928\u20138939 (2019)"},{"issue":"6364","key":"26_CR8","doi-asserted-by":"publisher","first-page":"793","DOI":"10.1126\/science.aam9868","volume":"358","author":"S Dasgupta","year":"2017","unstructured":"Dasgupta, S., Stevens, C.F., Navlakha, S.: A neural algorithm for a fundamental computing problem. Science 358(6364), 793\u2013796 (2017)","journal-title":"Science"},{"key":"26_CR9","doi-asserted-by":"crossref","unstructured":"Deuschel, J., et al.: Multi-prototype few-shot learning in histopathology. In: ICCVW, pp. 620\u2013628. IEEE (2021)","DOI":"10.1109\/ICCVW54120.2021.00075"},{"key":"26_CR10","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: NAACL-HLT (1), pp. 4171\u20134186. Association for Computational Linguistics (2019)"},{"key":"26_CR11","unstructured":"Du, N., et al.: Glam: efficient scaling of language models with mixture-of-experts. In: ICML. Proceedings of Machine Learning Research, vol.\u00a0162, pp. 5547\u20135569. PMLR (2022)"},{"key":"26_CR12","unstructured":"Fedus, W., Zoph, B., Shazeer, N.: Switch transformers: scaling to trillion parameter models with simple and efficient sparsity. J. Mach. Learn. Res. 23, 120:1\u2013120:39 (2022)"},{"key":"26_CR13","unstructured":"Grundmann, P., Oberhauser, T., Gers, F.A., L\u00f6ser, A.: Attention networks for augmenting clinical text with support sets for diagnosis prediction. In: COLING, pp. 4765\u20134775. International Committee on Computational Linguistics (2022)"},{"key":"26_CR14","doi-asserted-by":"crossref","unstructured":"Gururangan, S., Swayamdipta, S., Levy, O., Schwartz, R., Bowman, S.R., Smith, N.A.: Annotation artifacts in natural language inference data. In: NAACL-HLT (2), pp. 107\u2013112. Association for Computational Linguistics (2018)","DOI":"10.18653\/v1\/N18-2017"},{"key":"26_CR15","doi-asserted-by":"crossref","unstructured":"Hase, P., Chen, C., Li, O., Rudin, C.: Interpretable image recognition with hierarchical prototypes. In: HCOMP, pp. 32\u201340. AAAI Press (2019)","DOI":"10.1609\/hcomp.v7i1.5265"},{"key":"26_CR16","doi-asserted-by":"publisher","unstructured":"Johnson, A., Bulgarelli, L., Pollard, T., Horng, S., Celi, L.A., Mark, R.: MIMIC-IV (2021). https:\/\/doi.org\/10.13026\/s6n6-xd98. https:\/\/physionet.org\/content\/mimiciv\/1.0\/","DOI":"10.13026\/s6n6-xd98"},{"key":"26_CR17","unstructured":"Lepikhin, D., et al.: Gshard: scaling giant models with conditional computation and automatic sharding. In: ICLR. OpenReview.net (2021)"},{"key":"26_CR18","unstructured":"Li, X., Tian, T., Liu, Y., Yu, H., Cao, J., Ma, Z.: Adaptive multi-prototype relation network. In: 2020 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC), pp. 1707\u20131712 (2020)"},{"key":"26_CR19","unstructured":"Liang, Y., et al.: Can a fruit fly learn word embeddings? In: ICLR. OpenReview.net (2021)"},{"key":"26_CR20","unstructured":"Liaw, R., Liang, E., Nishihara, R., Moritz, P., Gonzalez, J.E., Stoica, I.: Tune: a research platform for distributed model selection and training. CoRR abs\/1807.05118 (2018)"},{"key":"26_CR21","doi-asserted-by":"publisher","first-page":"956","DOI":"10.1162\/tacl_a_00500","volume":"10","author":"A Naik","year":"2022","unstructured":"Naik, A., Lehman, J., Ros\u00e9, C.P.: Adapting to the long tail: a meta-analysis of transfer learning research for language understanding tasks. Trans. Assoc. Comput. Linguist. 10, 956\u2013980 (2022)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"26_CR22","doi-asserted-by":"crossref","unstructured":"Naik, A., Parasa, S., Feldman, S., Wang, L.L., Hope, T.: Literature-augmented clinical outcome prediction. In: NAACL-HLT (Findings), pp. 438\u2013453. Association for Computational Linguistics (2022)","DOI":"10.18653\/v1\/2022.findings-naacl.33"},{"key":"26_CR23","unstructured":"Papaioannou, J., et al.: Cross-lingual knowledge transfer for clinical phenotyping. In: LREC, pp. 900\u2013909. European Language Resources Association (2022)"},{"key":"26_CR24","doi-asserted-by":"crossref","unstructured":"Poliak, A., Naradowsky, J., Haldar, A., Rudinger, R., Durme, B.V.: Hypothesis only baselines in natural language inference. In: *SEM@NAACL-HLT, pp. 180\u2013191. Association for Computational Linguistics (2018)","DOI":"10.18653\/v1\/S18-2023"},{"key":"26_CR25","doi-asserted-by":"crossref","unstructured":"Searle, T., Ibrahim, Z.M., Dobson, R.J.B.: Experimental evaluation and development of a silver-standard for the MIMIC-III clinical coding dataset. CoRR abs\/2006.07332 (2020)","DOI":"10.18653\/v1\/2020.bionlp-1.8"},{"key":"26_CR26","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1007\/978-3-642-23808-6_10","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"K Sechidis","year":"2011","unstructured":"Sechidis, K., Tsoumakas, G., Vlahavas, I.: On the stratification of multi-label data. In: Gunopulos, D., Hofmann, T., Malerba, D., Vazirgiannis, M. (eds.) ECML PKDD 2011. LNCS (LNAI), vol. 6913, pp. 145\u2013158. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-23808-6_10"},{"key":"26_CR27","unstructured":"Shazeer, N., et al.: Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. In: ICLR (Poster). OpenReview.net (2017)"},{"key":"26_CR28","unstructured":"Shen, Y., Dasgupta, S., Navlakha, S.: Algorithmic insights on continual learning from fruit flies. CoRR abs\/2107.07617 (2021)"},{"key":"26_CR29","unstructured":"Snell, J., Swersky, K., Zemel, R.S.: Prototypical networks for few-shot learning. In: NIPS, pp. 4077\u20134087 (2017)"},{"issue":"4","key":"26_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.patter.2023.100729","volume":"4","author":"R Tinn","year":"2023","unstructured":"Tinn, R., et al.: Fine-tuning large neural language models for biomedical natural language processing. Patterns 4(4), 100729 (2023)","journal-title":"Patterns"},{"key":"26_CR31","unstructured":"Winter, B., Rosero, A.F., L\u00f6ser, A., Gers, F.A., Siu, A.: KIMERA: injecting domain knowledge into vacant transformer heads. In: LREC, pp. 363\u2013373. European Language Resources Association (2022)"},{"key":"26_CR32","doi-asserted-by":"crossref","unstructured":"Xie, S., Vosoughi, S., Hassanpour, S.: Proto-LM: a prototypical network-based framework for built-in interpretability in large language models. In: EMNLP (Findings), pp. 3964\u20133979. Association for Computational Linguistics (2023)","DOI":"10.18653\/v1\/2023.findings-emnlp.261"},{"key":"26_CR33","doi-asserted-by":"crossref","unstructured":"Zellers, R., Holtzman, A., Bisk, Y., Farhadi, A., Choi, Y.: Hellaswag: can a machine really finish your sentence? In: ACL (1), pp. 4791\u20134800. Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/P19-1472"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70368-3_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,30]],"date-time":"2024-08-30T23:14:04Z","timestamp":1725059644000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70368-3_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031703676","9783031703683"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70368-3_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"22 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors declare no relevant competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vilnius","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lithuania","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2024.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}