{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T15:56:35Z","timestamp":1759334195918,"version":"build-2065373602"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,28]],"date-time":"2025-04-28T00:00:00Z","timestamp":1745798400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,28]],"date-time":"2025-04-28T00:00:00Z","timestamp":1745798400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100002701","name":"Ministry of Education","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002701","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002701","name":"Ministry of Education","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002701","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,28]]},"DOI":"10.1109\/aicas64808.2025.11173092","type":"proceedings-article","created":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T17:52:35Z","timestamp":1758822755000},"page":"1-5","source":"Crossref","is-referenced-by-count":0,"title":["On-Chip Learning via Transformer In-Context Learning"],"prefix":"10.1109","author":[{"given":"Jan","family":"Finkbeiner","sequence":"first","affiliation":[{"name":"Fakult&#x00E4;t fr Elektrotechnik und Informationstechnik, RWTH Aachen,Aachen,Germany,52074"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Emre","family":"Neftci","sequence":"additional","affiliation":[{"name":"Fakult&#x00E4;t fr Elektrotechnik und Informationstechnik, RWTH Aachen,Aachen,Germany,52074"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"article-title":"What learning algorithm is incontext learning\u0192 investigations with linear models","year":"2022","author":"Aky\u00fcrek","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.678"},{"article-title":"xlstm: Extended long shortterm memory","year":"2024","author":"Beck","key":"ref3"},{"key":"ref4","article-title":"Longformer: The long-document transformer","author":"Beltagy","year":"2020","journal-title":"CoRR"},{"article-title":"On the opportunities and risks of foundation models","year":"2021","author":"Bommasani","key":"ref5"},{"key":"ref6","first-page":"18771901","article-title":"Language models are fewshot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref7","first-page":"1887818891","article-title":"Data distributional properties drive emergent incontext learning in transformers","volume-title":"Advances in Neural Information Processing Systems","volume":"35","author":"Chan","year":"2022"},{"issue":"99","key":"ref8","first-page":"11","article-title":"Loihi: A neuromorphic manycore processor with onchip learning","volume":"PP","author":"Davies","year":"2018","journal-title":"IEEE Micro"},{"key":"ref9","first-page":"11261135","article-title":"Modelagnostic metalearning for fast adaptation of deep networks","volume-title":"Proceedings of the 34th International Conference on Machine LearningVolume 70","author":"Finn"},{"key":"ref10","first-page":"3058330598","article-title":"What can transformers learn incontext\u0192 a case study of simple function classes","volume":"35","author":"Garg","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"volume-title":"Deep learning","year":"2016","author":"Goodfellow","key":"ref11"},{"article-title":"Mamba: Lineartime sequence modeling with selective state spaces","year":"2023","author":"Gu","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10802683"},{"journal-title":"Training compute-optimal large language models","year":"2022","author":"Hoffmann","key":"ref14"},{"article-title":"Scaling laws for neural language models","year":"2020","author":"Kaplan","key":"ref15"},{"issue":"1","key":"ref16","doi-asserted-by":"crossref","first-page":"112","DOI":"10.1038\/s41467-021-22364-0","article-title":"Robust highdimensional memoryaugmented neural networks","volume":"12","author":"Karunaratne","year":"2021","journal-title":"Nature communications"},{"article-title":"Full stack optimization of transformer inference: a survey","year":"2023","author":"Kim","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.23919\/date51398.2021.9474146"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1126\/science.aab3050"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1038\/s43588-025-00854-1"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ted.2021.3110464"},{"journal-title":"The lazy neuron phenomenon: On emergence of activation sparsity in transformers","year":"2023","author":"Li","key":"ref22"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3714983.3714987"},{"journal-title":"Training-free activation sparsity in large language models","year":"2024","author":"Liu","key":"ref24"},{"journal-title":"The era of 1-bit llms: All large language models are in 1.58 bits","year":"2024","author":"Ma","key":"ref25"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-022-33629-7"},{"journal-title":"Relu strikes back: Exploiting activation sparsity in large language models","year":"2023","author":"Mirzadeh","key":"ref27"},{"key":"ref28","first-page":"1842","article-title":"Meta-learning with memory-augmented neural networks","volume-title":"Proceedings of The 33rd International Conference on Machine Learning, volume 48 of Proceedings of Machine Learning Research","author":"Santoro"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2023.3282046"},{"journal-title":"Emulating brainlike rapid learning in neuromorphic edge computing","year":"2024","author":"Stewart","key":"ref30"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"article-title":"Matching networks for one shot learning","year":"2016","author":"Vinyals","key":"ref32"},{"key":"ref33","first-page":"3515135174","article-title":"Transformers learn incontext by gradient descent","volume-title":"International Conference on Machine Learning","author":"Oswald"},{"article-title":"Uncovering mesaoptimization algorithms in transformers","year":"2023","author":"Oswald","key":"ref34"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1038\/s44335-024-00004-2"},{"journal-title":"Smoothquant: Accurate and efficient post-training quantization for large language models","year":"2024","author":"Xiao","key":"ref36"},{"article-title":"Gated linear attention transformers with hardwareefficient training","year":"2023","author":"Yang","key":"ref37"},{"journal-title":"Scalable matmul-free language modeling","year":"2024","author":"Zhu","key":"ref38"}],"event":{"name":"2025 IEEE 7th International Conference on Artificial Intelligence Circuits and Systems (AICAS)","start":{"date-parts":[[2025,4,28]]},"location":"Bordeaux, France","end":{"date-parts":[[2025,4,30]]}},"container-title":["2025 IEEE 7th International Conference on Artificial Intelligence Circuits and Systems (AICAS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11172731\/11173086\/11173092.pdf?arnumber=11173092","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T12:51:35Z","timestamp":1759236695000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11173092\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,28]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/aicas64808.2025.11173092","relation":{},"subject":[],"published":{"date-parts":[[2025,4,28]]}}}