{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T16:33:00Z","timestamp":1781022780295,"version":"3.54.1"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031333736","type":"print"},{"value":"9783031333743","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-33374-3_35","type":"book-chapter","created":{"date-parts":[[2023,5,26]],"date-time":"2023-05-26T10:02:30Z","timestamp":1685095350000},"page":"443-454","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["LSG Attention: Extrapolation of\u00a0Pretrained Transformers to\u00a0Long Sequences"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0819-9056","authenticated-orcid":false,"given":"Charles","family":"Condevaux","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5630-2743","authenticated-orcid":false,"given":"S\u00e9bastien","family":"Harispe","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2023,5,27]]},"reference":[{"key":"35_CR1","doi-asserted-by":"crossref","unstructured":"Ainslie, J., et al.: ETC: encoding long and structured inputs in transformers. arXiv preprint arXiv:2004.08483 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.19"},{"key":"35_CR2","unstructured":"Andoni, A., Indyk, P., Laarhoven, T., Razenshteyn, I.P., Schmidt, L.: Practical and optimal LSH for angular distance. CoRR arXiv:abs\/1509.02897 (2015)"},{"key":"35_CR3","unstructured":"Beltagy, I., Peters, M.E., Cohan, A.: Longformer: the long-document transformer. arXiv:2004.05150 (2020)"},{"key":"35_CR4","doi-asserted-by":"crossref","unstructured":"Britz, D., Guan, M.Y., Luong, M.T.: Efficient attention using a fixed-size memory representation. arXiv preprint arXiv:1707.00110 (2017)","DOI":"10.18653\/v1\/D17-1040"},{"key":"35_CR5","doi-asserted-by":"crossref","unstructured":"Chalkidis, I., Fergadiotis, M., Malakasiotis, P., Aletras, N., Androutsopoulos, I.: LEGAL-BERT: the muppets straight out of law school. In: Findings of the Association for Computational Linguistics: EMNLP 2020, pp. 2898\u20132904 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.261"},{"key":"35_CR6","doi-asserted-by":"crossref","unstructured":"Chalkidis, I., et al.: LexGLUE: a benchmark dataset for legal language understanding in english. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics. Dubln, Ireland (2022)","DOI":"10.18653\/v1\/2022.acl-long.297"},{"key":"35_CR7","unstructured":"Child, R., Gray, S., Radford, A., Sutskever, I.: Generating long sequences with sparse transformers. arXiv preprint arXiv:1904.10509 (2019)"},{"key":"35_CR8","unstructured":"Chiu, C.C., Raffel, C.: Monotonic chunkwise attention. arXiv preprint arXiv:1712.05382 (2017)"},{"key":"35_CR9","unstructured":"Choromanski, K., et al.: Rethinking attention with performers. arXiv:2009.14794 (2021)"},{"key":"35_CR10","doi-asserted-by":"crossref","unstructured":"Clark, K., Khandelwal, U., Levy, O., Manning, C.D.: What does BERT look at? an analysis of bert\u2019s attention. arXiv preprint arXiv:1906.04341 (2019)","DOI":"10.18653\/v1\/W19-4828"},{"key":"35_CR11","doi-asserted-by":"crossref","unstructured":"Cohan, A., et al.: A discourse-aware attention model for abstractive summarization of long documents. Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 2 (Short Papers) (2018)","DOI":"10.18653\/v1\/N18-2097"},{"key":"35_CR12","doi-asserted-by":"crossref","unstructured":"Dai, Z., Yang, Z., Yang, Y., Carbonell, J., Le, Q.V., Salakhutdinov, R.: Transformer-XL: attentive language models beyond a fixed-length context. arXiv preprint arXiv:1901.02860 (2019)","DOI":"10.18653\/v1\/P19-1285"},{"key":"35_CR13","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"35_CR14","doi-asserted-by":"crossref","unstructured":"Fabbri, A.R., Li, I., She, T., Li, S., Radev, D.R.: Multi-news: a large-scale multi-document summarization dataset and abstractive hierarchical model (2019)","DOI":"10.18653\/v1\/P19-1102"},{"key":"35_CR15","doi-asserted-by":"crossref","unstructured":"Guo, M., et al.: Longt5: efficient text-to-text transformer for long sequences. CoRR arXiv:abs\/2112.07916 (2021)","DOI":"10.18653\/v1\/2022.findings-naacl.55"},{"key":"35_CR16","doi-asserted-by":"crossref","unstructured":"Guo, Q., Qiu, X., Liu, P., Shao, Y., Xue, X., Zhang, Z.: Star-transformer. arXiv preprint arXiv:1902.09113 (2019)","DOI":"10.18653\/v1\/N19-1133"},{"key":"35_CR17","doi-asserted-by":"publisher","first-page":"40707","DOI":"10.1109\/ACCESS.2019.2907992","volume":"7","author":"J He","year":"2019","unstructured":"He, J., Wang, L., Liu, L., Feng, J., Wu, H.: Long document classification from local word glimpses via recurrent attention learning. IEEE Access 7, 40707\u201340718 (2019)","journal-title":"IEEE Access"},{"key":"35_CR18","doi-asserted-by":"crossref","unstructured":"Huang, L., Cao, S., Parulian, N., Ji, H., Wang, L.: Efficient attentions for long document summarization. In: Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. Association for Computational Linguistics, Online (Jun 2021)","DOI":"10.18653\/v1\/2021.naacl-main.112"},{"key":"35_CR19","unstructured":"Katharopoulos, A., Vyas, A., Pappas, N., Fleuret, F.: Transformers are RNNs: fast autoregressive transformers with linear attention. CoRR arXiv:abs\/2006.16236 (2020)"},{"key":"35_CR20","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization (2014)"},{"key":"35_CR21","unstructured":"Kitaev, N., Kaiser, L., Levskaya, A.: Reformer: the efficient transformer. CoRR arXiv:abs\/2001.04451 (2020)"},{"key":"35_CR22","doi-asserted-by":"crossref","unstructured":"Lewis, M., et al.: BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 7871\u20137880. Association for Computational Linguistics, Online (Jul 2020)","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"35_CR23","unstructured":"Liu, Y., et al.: Roberta: a robustly optimized BERT pretraining approach. CoRR arXiv:abs\/1907.11692 (2019)"},{"key":"35_CR24","unstructured":"Maas, A.L., Daly, R.E., Pham, P.T., Huang, D., Ng, A.Y., Potts, C.: Learning word vectors for sentiment analysis. In: Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies, pp. 142\u2013150. Association for Computational Linguistics (Jun 2011)"},{"key":"35_CR25","unstructured":"Rae, J.W., Potapenko, A., Jayakumar, S.M., Lillicrap, T.P.: Compressive transformers for long-range sequence modelling. arXiv preprint arXiv:1911.05507 (2019)"},{"issue":"140","key":"35_CR26","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(140), 1\u201367 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"35_CR27","doi-asserted-by":"crossref","unstructured":"Raganato, A., Scherrer, Y., Tiedemann, J.: Fixed encoder self-attention patterns in transformer-based machine translation. arXiv preprint arXiv:2002.10260 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.49"},{"key":"35_CR28","unstructured":"Rohde, T., Wu, X., Liu, Y.: Hierarchical learning for generation with long source sequences. CoRR arXiv:abs\/2104.07545 (2021)"},{"key":"35_CR29","doi-asserted-by":"crossref","unstructured":"Sharma, E., Li, C., Wang, L.: BIGPATENT: a large-scale dataset for abstractive and coherent summarization. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 2204\u20132213. Association for Computational Linguistics, Florence, Italy (Jul 2019)","DOI":"10.18653\/v1\/P19-1212"},{"key":"35_CR30","doi-asserted-by":"crossref","unstructured":"Shaw, P., Uszkoreit, J., Vaswani, A.: Self-attention with relative position representations. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 2 (Short Papers), pp. 464\u2013468. New Orleans, Louisiana (Jun 2018)","DOI":"10.18653\/v1\/N18-2074"},{"key":"35_CR31","unstructured":"Shen, T., Zhou, T., Long, G., Jiang, J., Zhang, C.: Bi-directional block self-attention for fast and memory-efficient sequence modeling. arXiv preprint arXiv:1804.00857 (2018)"},{"key":"35_CR32","unstructured":"Shen, Z., Zhang, M., Yi, S., Yan, J., Zhao, H.: Factorized attention: self-attention with linear complexities. CoRR arXiv:abs\/1812.01243 (2018)"},{"key":"35_CR33","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"35_CR34","unstructured":"Wang, S., Li, B.Z., Khabsa, M., Fang, H., Ma, H.: Linformer: self-attention with linear complexity. CoRR arXiv:abs\/2006.04768 (2020)"},{"key":"35_CR35","unstructured":"Wu, F., Fan, A., Baevski, A., Dauphin, Y.N., Auli, M.: Pay less attention with lightweight and dynamic convolutions. arXiv preprint arXiv:1901.10430 (2019)"},{"key":"35_CR36","doi-asserted-by":"crossref","unstructured":"Xiao, W., Beltagy, I., Carenini, G., Cohan, A.: PRIMERA: pyramid-based masked sentence pre-training for multi-document summarization. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 5245\u20135263. Dublin, Ireland (May 2022)","DOI":"10.18653\/v1\/2022.acl-long.360"},{"key":"35_CR37","unstructured":"Zaheer, M., et al.: Big bird: transformers for longer sequences. In: Advances in Neural Information Processing Systems, vol. 33 (2020)"},{"key":"35_CR38","unstructured":"Zhang, J., Zhao, Y., Saleh, M., Liu, P.J.: Pegasus: pre-training with extracted gap-sentences for abstractive summarization (2019)"},{"key":"35_CR39","doi-asserted-by":"crossref","unstructured":"Zhang, X., Wei, F., Zhou, M.: Hibert: document level pre-training of hierarchical bidirectional transformers for document summarization. arXiv preprint arXiv:1905.06566 (2019)","DOI":"10.18653\/v1\/P19-1499"},{"key":"35_CR40","doi-asserted-by":"crossref","unstructured":"Zhu, C., Liu, Y., Mei, J., Zeng, M.: Mediasum: a large-scale media interview dataset for dialogue summarization. arXiv preprint arXiv:2103.06410 (2021)","DOI":"10.18653\/v1\/2021.naacl-main.474"}],"container-title":["Lecture Notes in Computer Science","Advances in Knowledge Discovery and Data Mining"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-33374-3_35","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T20:04:25Z","timestamp":1710360265000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-33374-3_35"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031333736","9783031333743"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-33374-3_35","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"27 May 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PAKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific-Asia Conference on Knowledge Discovery and Data Mining","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Osaka","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 May 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 May 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pakdd2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/pakdd2023.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"813","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"143","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"18% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.5","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"10","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}