{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T10:23:21Z","timestamp":1768213401574,"version":"3.49.0"},"publisher-location":"Cham","reference-count":37,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031703584","type":"print"},{"value":"9783031703591","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70359-1_23","type":"book-chapter","created":{"date-parts":[[2024,8,29]],"date-time":"2024-08-29T04:02:43Z","timestamp":1724904163000},"page":"386-402","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Pointer-Guided Pre-training: Infusing Large Language Models with\u00a0Paragraph-Level Contextual Awareness"],"prefix":"10.1007","author":[{"given":"Lars","family":"Hillebrand","sequence":"first","affiliation":[]},{"given":"Prabhupad","family":"Pradhan","sequence":"additional","affiliation":[]},{"given":"Christian","family":"Bauckhage","sequence":"additional","affiliation":[]},{"given":"Rafet","family":"Sifa","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,22]]},"reference":[{"key":"23_CR1","unstructured":"Achiam, J., Adler, S., Agarwal, S., Ahmad, L., Akkaya, I., Aleman, F.L., Almeida, D., Altenschmidt, J., Altman, S., Anadkat, S., et\u00a0al.: Gpt-4 technical report. arXiv:2303.08774 (2023)"},{"key":"23_CR2","doi-asserted-by":"crossref","unstructured":"Beltagy, I., Lo, K., Cohan, A.: Scibert: A pretrained language model for scientific text. In: Proc. EMNLP (2019)","DOI":"10.18653\/v1\/D19-1371"},{"key":"23_CR3","doi-asserted-by":"crossref","unstructured":"Brack, A., Hoppe, A., Buscherm\u00f6hle, P., Ewerth, R.: Cross-domain multi-task learning for sequential sentence classification in research papers. In: Proc. JCDL (2022)","DOI":"10.1145\/3529372.3530922"},{"key":"23_CR4","unstructured":"Brown, T., Mann, B., Ryder, N., Subbiah, M., Kaplan, J.D., Dhariwal, P., Neelakantan, A., Shyam, P., Sastry, G., Askell, A., et\u00a0al.: Language models are few-shot learners. In: Proc. NeurIPS (2020)"},{"key":"23_CR5","doi-asserted-by":"crossref","unstructured":"Cho, K., van Merrienboer, B., Gulcehre, C., Bahdanau, D., Bougares, F., Schwenk, H., Bengio, Y.: Learning phrase representations using rnn encoder\u2013decoder for statistical machine translation. In: Proc. EMNLP (2014)","DOI":"10.3115\/v1\/D14-1179"},{"key":"23_CR6","unstructured":"Chowdhury, S.B.R., Brahman, F., Chaturvedi, S.: Is everything in order? a simple way to order sentences. In: Proc. EMNLP (2021)"},{"key":"23_CR7","unstructured":"Chung, J., Gulcehre, C., Cho, K., Bengio, Y.: Empirical evaluation of gated recurrent neural networks on sequence modeling. In: Proc. NeurIPS (2014)"},{"key":"23_CR8","unstructured":"Clark, K., Luong, M.T., Le, Q.V., Manning, C.D.: Electra: Pre-training text encoders as discriminators rather than generators. In: Proc. ICLR (2020)"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Cohan, A., Beltagy, I., King, D., Dalvi, B., Weld, D.: Pretrained language models for sequential sentence classification. In: Proc. EMNLP (2019)","DOI":"10.18653\/v1\/D19-1383"},{"key":"23_CR10","doi-asserted-by":"crossref","unstructured":"Cui, B., Li, Y., Chen, M., Zhang, Z.: Deep attentive sentence ordering network. In: Proc. EMNLP (2018)","DOI":"10.18653\/v1\/D18-1465"},{"key":"23_CR11","doi-asserted-by":"crossref","unstructured":"Cui, Y., Che, W., Liu, T., Qin, B., Yang, Z.: Pre-training with whole word masking for chinese bert. IEEE\/ACM TASLP (2021)","DOI":"10.1109\/TASLP.2021.3124365"},{"key":"23_CR12","unstructured":"Dao, T., Fu, D.Y., Ermon, S., Rudra, A., R\u00e9, C.: FlashAttention: Fast and memory-efficient exact attention with IO-awareness. In: Proc. NeurIPS (2022)"},{"key":"23_CR13","doi-asserted-by":"crossref","unstructured":"Dernoncourt, F., Lee, J.Y.: PubMed 200k RCT: a dataset for sequential sentence classification in medical abstracts. In: Proc. IJCNLP (2017)","DOI":"10.18653\/v1\/E17-2110"},{"key":"23_CR14","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: Pre-training of deep bidirectional transformers for language understanding. In: Proc. NAACL (2019)"},{"key":"23_CR15","unstructured":"He, P., Gao, J., Chen, W.: DeBERTav3: Improving deBERTa using ELECTRA-style pre-training with gradient-disentangled embedding sharing. In: Proc. ICLR (2023)"},{"key":"23_CR16","doi-asserted-by":"crossref","unstructured":"Hillebrand, L., Deu\u00dfer, T., Dilmaghani, T., Kliem, B., Loitz, R., Bauckhage, C., Sifa, R.: Kpi-bert: A joint named entity recognition and relation extraction model for financial reports. In: Proc. ICPR (2022)","DOI":"10.1109\/ICPR56361.2022.9956191"},{"key":"23_CR17","doi-asserted-by":"crossref","unstructured":"Hillebrand, L., Pielka, M., Leonhard, D., Deu\u00dfer, T., Dilmaghani, T., Kliem, B., Loitz, R., Morad, M., Temath, C., Bell, T., Stenzel, R., Sifa, R.: sustain.ai: a recommender system to analyze sustainability reports. In: Proc. ICAIL (2023)","DOI":"10.1145\/3594536.3595131"},{"key":"23_CR18","unstructured":"Jiang, A.Q., Sablayrolles, A., Roux, A., Mensch, A., Savary, B., Bamford, C., Chaplot, D.S., Casas, D.d.l., Hanna, E.B., Bressand, F., et\u00a0al.: Mixtral of experts. arXiv:2401.04088 (2024)"},{"key":"23_CR19","doi-asserted-by":"crossref","unstructured":"Jin, D., Szolovits, P.: Hierarchical neural networks for sequential sentence classification in medical scientific abstracts. In: Proc. EMNLP (2018)","DOI":"10.18653\/v1\/D18-1349"},{"key":"23_CR20","doi-asserted-by":"crossref","unstructured":"Khattab, O., Zaharia, M.: Colbert: Efficient and effective passage search via contextualized late interaction over bert. In: Proc. SIGIR (2020)","DOI":"10.1145\/3397271.3401075"},{"key":"23_CR21","doi-asserted-by":"crossref","unstructured":"Kim, S.N., Martinez, D., Cavedon, L., Yencken, L.: Automatic classification of sentences to support evidence based medicine. In: BMC Bioinformatics (2011)","DOI":"10.1186\/1471-2105-12-S2-S5"},{"key":"23_CR22","unstructured":"Lewis, P., Perez, E., Piktus, A., Petroni, F., Karpukhin, V., Goyal, N., K\u00fcttler, H., Lewis, M., Yih, W.t., Rockt\u00e4schel, T., et\u00a0al.: Retrieval-augmented generation for knowledge-intensive nlp tasks. In: Proc. NeurIPS (2020)"},{"key":"23_CR23","unstructured":"Liu, Y., Ott, M., Goyal, N., Du, J., Joshi, M., Chen, D., Levy, O., Lewis, M., Zettlemoyer, L., Stoyanov, V.: RoBERTa: A robustly optimized BERT pretraining approach. arXiv:1907.11692 (2019)"},{"key":"23_CR24","doi-asserted-by":"crossref","unstructured":"Logeswaran, L., Lee, H., Radev, D.: Sentence ordering and coherence modeling using recurrent neural networks. In: Proc. AAAI (2018)","DOI":"10.1609\/aaai.v32i1.11997"},{"key":"23_CR25","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: Proc. ICLR (2018)"},{"key":"23_CR26","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-bert: Sentence embeddings using siamese bert-networks. In: Proc. EMNLP. pp. 3982\u20133992 (2019)","DOI":"10.18653\/v1\/D19-1410"},{"key":"23_CR27","doi-asserted-by":"crossref","unstructured":"Richardson, W.S., Wilson, M.C., Nishikawa, J., Hayward, R.S.: The well-built clinical question: a key to evidence-based decisions. ACP journal club (1995)","DOI":"10.7326\/ACPJC-1995-123-3-A12"},{"key":"23_CR28","doi-asserted-by":"crossref","unstructured":"Schuster, M., Nakajima, K.: Japanese and korean voice search. In: Proc. ICASSP (2012)","DOI":"10.1109\/ICASSP.2012.6289079"},{"key":"23_CR29","doi-asserted-by":"crossref","unstructured":"Shang, X., Ma, Q., Lin, Z., Yan, J., Chen, Z.: A span-based dynamic local attention model for sequential sentence classification. In: Proc. ACL\/IJCNLP (2021)","DOI":"10.18653\/v1\/2021.acl-short.26"},{"key":"23_CR30","doi-asserted-by":"crossref","unstructured":"Su, J., Ahmed, M., Lu, Y., Pan, S., Bo, W., Liu, Y.: Roformer: Enhanced transformer with rotary position embedding. Neurocomputing (2024)","DOI":"10.1016\/j.neucom.2023.127063"},{"key":"23_CR31","unstructured":"Touvron, H., Martin, L., Stone, K., Albert, P., Almahairi, A., Babaei, Y., Bashlykov, N., Batra, S., Bhargava, P., Bhosale, S., et\u00a0al.: Llama 2: Open foundation and fine-tuned chat models. arXiv:2307.09288 (2023)"},{"key":"23_CR32","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. In: Proc. NeurIPS (2017)"},{"key":"23_CR33","unstructured":"Vinyals, O., Fortunato, M., Jaitly, N.: Pointer networks. In: Proc. NeurIPS (2015)"},{"key":"23_CR34","doi-asserted-by":"crossref","unstructured":"Wettig, A., Gao, T., Zhong, Z., Chen, D.: Should you mask 15% in masked language modeling? In: Proc. EACL (2023)","DOI":"10.18653\/v1\/2023.eacl-main.217"},{"key":"23_CR35","doi-asserted-by":"crossref","unstructured":"Yamada, K., Hirao, T., Sasano, R., Takeda, K., Nagata, M.: Sequential span classification with neural semi-markov crfs for biomedical abstracts. In: Proc. EMNLP (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.77"},{"key":"23_CR36","unstructured":"Yasunaga, M., Bosselut, A., Ren, H., Zhang, X., Manning, C.D., Liang, P.S., Leskovec, J.: Deep bidirectional language-knowledge graph pretraining. In: Proc. NeurIPS (2022)"},{"key":"23_CR37","doi-asserted-by":"crossref","unstructured":"Yasunaga, M., Leskovec, J., Liang, P.: Linkbert: Pretraining language models with document links. In: Proc. ACL (2022)","DOI":"10.18653\/v1\/2022.acl-long.551"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70359-1_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T07:27:08Z","timestamp":1768202828000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70359-1_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031703584","9783031703591"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70359-1_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"22 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vilnius","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lithuania","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2024.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}