{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:36:51Z","timestamp":1757619411296,"version":"3.44.0"},"publisher-location":"Singapore","reference-count":27,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819500192"},{"type":"electronic","value":"9789819500208"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-95-0020-8_27","type":"book-chapter","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T09:17:08Z","timestamp":1753262228000},"page":"319-330","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Token Memory Transformer with Infinite Context"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-5298-6633","authenticated-orcid":false,"given":"Taize","family":"Sun","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7867-4281","authenticated-orcid":false,"given":"Katsuhide","family":"Fujita","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1838-4789","authenticated-orcid":false,"given":"Konstantin","family":"Markov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6992-7566","authenticated-orcid":false,"given":"Shengbo","family":"Chang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,7,24]]},"reference":[{"key":"27_CR1","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Summary of chatgpt-related research and perspective towards the future of large language models. Meta-Radiol., 100017 (2023)","DOI":"10.1016\/j.metrad.2023.100017"},{"key":"27_CR2","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inform. Process. Syst. 30 (2017)"},{"key":"27_CR3","unstructured":"Brown, T.B.: Language models are few-shot learners. arXiv preprint arXiv:2005.14165 (2020)"},{"key":"27_CR4","doi-asserted-by":"publisher","unstructured":"Collins, A.M., Quillian, M.R.: Retrieval time from semantic memory. J. Verbal Learn. Verbal Behav. 8(2), 240\u2013247 (1969). https:\/\/doi.org\/10.1016\/S0022-5371(69)80069-1, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0022537169800691","DOI":"10.1016\/S0022-5371(69)80069-1"},{"key":"27_CR5","unstructured":"Munkhdalai, T., Faruqui, M., Gopal, S.: Leave no context behind: Efficient infinite context transformers with infini-attention. arXiv preprint arXiv:2404.07143 (2024)"},{"key":"27_CR6","unstructured":"Wu, Y., Rabe, M.N., Hutchins, D., Szegedy, C.: Memorizing transformers. arXiv preprint arXiv:2203.08913 (2022)"},{"key":"27_CR7","doi-asserted-by":"crossref","unstructured":"Chen, R., Wang, J., Yu, L.C., Zhang, X.: Learning to memorize entailment and discourse relations for persona-consistent dialogues. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 12653\u201312661 (2023)","DOI":"10.1609\/aaai.v37i11.26489"},{"key":"27_CR8","unstructured":"Rae, J.W., Potapenko, A., Jayakumar, S.M., Hillier, C., Lillicrap, T.P.: Compressive transformers for long-range sequence modelling. arXiv preprint (2019), https:\/\/arxiv.org\/abs\/1911.05507"},{"key":"27_CR9","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. arXiv e-prints (2019)"},{"key":"27_CR10","doi-asserted-by":"crossref","unstructured":"Kry\u015bci\u0144ski, W., Rajani, N., Agarwal, D., Xiong, C., Radev, D.: Booksum: A collection of datasets for long-form narrative summarization (2021)","DOI":"10.18653\/v1\/2022.findings-emnlp.488"},{"key":"27_CR11","doi-asserted-by":"crossref","unstructured":"Dai, Z.: Transformer-xl: Attentive language models beyond a fixed-length context. arXiv preprint arXiv:1901.02860 (2019)","DOI":"10.18653\/v1\/P19-1285"},{"key":"27_CR12","unstructured":"Rae, J.W., Potapenko, A., Jayakumar, S.M., Lillicrap, T.P.: Compressive transformers for long-range sequence modelling. arXiv preprint arXiv:1911.05507 (2019)"},{"key":"27_CR13","first-page":"11079","volume":"35","author":"A Bulatov","year":"2022","unstructured":"Bulatov, A., Kuratov, Y., Burtsev, M.: Recurrent memory transformer. Adv. Neural. Inf. Process. Syst. 35, 11079\u201311091 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"2","key":"27_CR14","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1207\/s15516709cog1402_1","volume":"14","author":"JL Elman","year":"1990","unstructured":"Elman, J.L.: Finding structure in time. Cogn. Sci. 14(2), 179\u2013211 (1990)","journal-title":"Cogn. Sci."},{"key":"27_CR15","doi-asserted-by":"crossref","unstructured":"Chevalier, A., Wettig, A., Ajith, A., Chen, D.: Adapting language models to compress contexts. arXiv preprint arXiv:2305.14788 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.232"},{"key":"27_CR16","doi-asserted-by":"crossref","unstructured":"Graves, A., Graves, A.: Long short-term memory. Supervised sequence labelling with recurrent neural networks, pp. 37\u201345 (2012)","DOI":"10.1007\/978-3-642-24797-2_4"},{"key":"27_CR17","unstructured":"Schlag, I., Munkhdalai, T., Schmidhuber, J.: Learning associative inference using fast weight memory. arXiv preprint arXiv:2011.07831 (2020)"},{"key":"27_CR18","unstructured":"Katharopoulos, A., Vyas, A., Pappas, N., Fleuret, F.: Transformers are rnns: fast autoregressive transformers with linear attention. In: International Conference on Machine Learning, pp. 5156\u20135165. PMLR (2020)"},{"key":"27_CR19","unstructured":"Clevert, D.A.: Fast and accurate deep network learning by exponential linear units (elus). arXiv preprint arXiv:1511.07289 (2015)"},{"key":"27_CR20","unstructured":"Xia, M., Gao, T., Zeng, Z., Chen, D.: Sheared llama: accelerating language model pre-training via structured pruning. arXiv preprint arXiv:2310.06694 (2023)"},{"key":"27_CR21","unstructured":"Touvron, H., et al.: Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)"},{"key":"27_CR22","unstructured":"Computer, T.: Redpajama: An open source recipe to reproduce llama training dataset (2023).\u00a0https:\/\/github.com\/togethercomputer\/RedPajama-Data"},{"key":"27_CR23","unstructured":"Shazeer, N., Stern, M.: Adafactor: adaptive learning rates with sublinear memory cost. In: International Conference on Machine Learning. pp. 4596\u20134604. PMLR (2018)"},{"key":"27_CR24","unstructured":"Lin, C.Y.: Rouge: a package for automatic evaluation of summaries. In: Text Summarization Branches Out, pp. 74\u201381 (2004)"},{"key":"27_CR25","doi-asserted-by":"crossref","unstructured":"Lewis, M.: Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461 (2019)","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"27_CR26","doi-asserted-by":"crossref","unstructured":"Xiao, W., Beltagy, I., Carenini, G., Cohan, A.: Primera: pyramid-based masked sentence pre-training for multi-document summarization. arXiv preprint arXiv:2110.08499 (2021)","DOI":"10.18653\/v1\/2022.acl-long.360"},{"key":"27_CR27","unstructured":"Bertsch, A., Alon, U., Neubig, G., Gormley, M.: Unlimiformer: long-range transformers with unlimited length input. Adv. Neural Inform. Process. Syst. 36 (2024)"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-0020-8_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T19:41:37Z","timestamp":1757274097000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-0020-8_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819500192","9789819500208"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-0020-8_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"24 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ningbo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/icg\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}