{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T04:16:24Z","timestamp":1746159384604,"version":"3.40.4"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031913976","type":"print"},{"value":"9783031913983","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-91398-3_21","type":"book-chapter","created":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T22:06:08Z","timestamp":1746137168000},"page":"274-287","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Extracting Information in a Low-Resource Setting: Case Study on Bioinformatics Workflows"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1988-1875","authenticated-orcid":false,"given":"Cl\u00e9mence","family":"Sebe","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7439-1441","authenticated-orcid":false,"given":"Sarah","family":"Cohen-Boulakia","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0755-2361","authenticated-orcid":false,"given":"Olivier","family":"Ferret","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1846-9144","authenticated-orcid":false,"given":"Aur\u00e9lie","family":"N\u00e9v\u00e9ol","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,2]]},"reference":[{"issue":"4","key":"21_CR1","doi-asserted-by":"publisher","first-page":"555","DOI":"10.1162\/coli.07-034-R2","volume":"34","author":"R Artstein","year":"2008","unstructured":"Artstein, R., Poesio, M.: Inter-coder agreement for computational linguistics. Comput. Linguist. 34(4), 555\u2013596 (2008)","journal-title":"Comput. Linguist."},{"key":"21_CR2","doi-asserted-by":"publisher","unstructured":"Beltagy, I., Lo, K., Cohan, A.: SciBERT: a pretrained language model for scientific text. In: Inui, K., Jiang, J., Ng, V., Wan, X. (eds.) Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 3615\u20133620. Association for Computational Linguistics, Hong Kong, China (November 2019). https:\/\/doi.org\/10.18653\/v1\/D19-1371, https:\/\/aclanthology.org\/D19-1371","DOI":"10.18653\/v1\/D19-1371"},{"key":"21_CR3","doi-asserted-by":"publisher","unstructured":"Chen, Q., et\u00a0al.: A systematic evaluation of large language models for biomedical natural language processing: benchmarks, baselines, and recommendations (September 2024). https:\/\/doi.org\/10.48550\/arXiv.2305.16326, http:\/\/arxiv.org\/abs\/2305.16326","DOI":"10.48550\/arXiv.2305.16326"},{"key":"21_CR4","doi-asserted-by":"publisher","first-page":"284","DOI":"10.1016\/j.future.2017.01.012","volume":"75","author":"S Cohen-Boulakia","year":"2017","unstructured":"Cohen-Boulakia, S., Belhajjame, K., Collin, O., Chopard, J., Froidevaux, C., et al.: Scientific workflows for computational reproducibility in the life sciences: status, challenges and opportunities. Futur. Gener. Comput. Syst. 75, 284\u2013298 (2017)","journal-title":"Futur. Gener. Comput. Syst."},{"key":"21_CR5","doi-asserted-by":"publisher","first-page":"316","DOI":"10.1038\/nbt.3820","volume":"35","author":"P Di Tommaso","year":"2017","unstructured":"Di Tommaso, P., Chatzou, M., Floden, E.W., Barja, P., Palumbo, E., Notredame, C.: Nextflow enables reproducible computational workflows. Nature Biotech 35, 316\u2013319 (2017). https:\/\/doi.org\/10.1038\/nbt.3820","journal-title":"Nature Biotech"},{"key":"21_CR6","doi-asserted-by":"crossref","unstructured":"Du, C., Cohoon, J., Lopez, P., Howison, J.: Softcite dataset: a dataset of software mentions in biomedical and economic research publications. J. Assoc. Inf. Sci. Technol. 72(7), 870\u2013884 (2021). https:\/\/onlinelibrary.wiley.com\/doi\/abs\/10.1002\/asi.24454","DOI":"10.1002\/asi.24454"},{"key":"21_CR7","doi-asserted-by":"crossref","unstructured":"Fort, K.: Collaborative Annotation for Reliable Natural Language Processing: Technical and Sociological Aspects. Wiley-ISTE, Hoboken, July 2016. https:\/\/hal.archives-ouvertes.fr\/hal-01324322","DOI":"10.1002\/9781119306696"},{"key":"21_CR8","unstructured":"Fort, K., Nazarenko, A., Rosset, S.: Modeling the complexity of manual annotation tasks: a grid of analysis. In: Kay, M., Boitet, C. (eds.) Proceedings of COLING 2012, pp. 895\u2013910. The COLING 2012 Organizing Committee, Mumbai, India (December 2012). https:\/\/aclanthology.org\/C12-1055"},{"key":"21_CR9","doi-asserted-by":"crossref","unstructured":"Gee, L., Zugarini, A., Rigutini, L., Torroni, P.: Fast vocabulary transfer for language model compression. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track, pp. 409\u2013416. Association for Computational Linguistics, Abu Dhabi, UAE (December 2022). https:\/\/aclanthology.org\/2022.emnlp-industry.41","DOI":"10.18653\/v1\/2022.emnlp-industry.41"},{"key":"21_CR10","unstructured":"Grouin, C., Rosset, S., Zweigenbaum, P., Fort, K., Galibert, O., Quintard, L.: Proposal for an extension of traditional named entities: from guidelines to evaluation, an overview. In: Proceedings of the Linguistic Annotation Workshop (LAW-V), pp. 92\u2013100. Portland, OR, 23\u201324 June 2011. http:\/\/www.aclweb.org\/anthology\/W11-0411"},{"key":"21_CR11","doi-asserted-by":"crossref","unstructured":"Gr\u00fcning, B., et al.: Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat. Methods 15(7), 475\u2013476 (2018)","DOI":"10.1038\/s41592-018-0046-7"},{"key":"21_CR12","doi-asserted-by":"publisher","unstructured":"Hong, J., Kim, T., Lim, H., Choo, J.: AVocaDo: strategy for adapting vocabulary to downstream domain. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 4692\u20134700. Association for Computational Linguistics, Online and Punta Cana, Dominican Republic (November 2021). https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.385","DOI":"10.18653\/v1\/2021.emnlp-main.385"},{"key":"21_CR13","doi-asserted-by":"crossref","unstructured":"Ison, J., et al.: Tools and data services registry: a community effort to document bioinformatics resources. Nucl. Acids Res. 44(D1), D38\u2013D47 (2016)","DOI":"10.1093\/nar\/gkv1116"},{"key":"21_CR14","unstructured":"Istrate, A.M., Li, D., Taraborelli, D., Torkar, M., Veytsman, B., Williams, I.: A large dataset of software mentions in the biomedical literature (September 2022). http:\/\/arxiv.org\/abs\/2209.00693"},{"key":"21_CR15","doi-asserted-by":"crossref","unstructured":"Kieser, S., Brown, J., Zdobnov, E.M., Trajkovski, M., McCue, L.A.: ATLAS: a Snakemake workflow for assembly, annotation, and genomic binning of metagenome sequence data. BMC Bioinformatics 21(1), 257 (2020)","DOI":"10.1186\/s12859-020-03585-4"},{"key":"21_CR16","doi-asserted-by":"publisher","first-page":"2520","DOI":"10.1093\/bioinformatics\/bts480","volume":"28","author":"J K\u00f6ster","year":"2012","unstructured":"K\u00f6ster, J., Rahmann, S.: Snakemake - a scalable bioinformatics workflow engine. Bioinformatics (Oxford, England) 28, 2520\u20132 (2012). https:\/\/doi.org\/10.1093\/bioinformatics\/bts480","journal-title":"Bioinformatics (Oxford, England)"},{"key":"21_CR17","unstructured":"Liu, Z., Jiang, F., Hu, Y., Shi, C., Fung, P.: NER-BERT: a pre-trained model for low-resource entity tagging. arXiv preprint arXiv:2112.00405 (2021)"},{"key":"21_CR18","doi-asserted-by":"crossref","unstructured":"Naguib, M., Tannier, X., N\u00e9v\u00e9ol, A.: Few-shot clinical entity recognition in English, French and Spanish: masked language models outperform generative model prompting. In: Al-Onaizan, Y., Bansal, M., Chen, Y.N. (eds.) Findings of the Association for Computational Linguistics: EMNLP 2024, pp. 6829\u20136852. Association for Computational Linguistics, Miami, Florida, USA (November 2024). https:\/\/aclanthology.org\/2024.findings-emnlp.400","DOI":"10.18653\/v1\/2024.findings-emnlp.400"},{"key":"21_CR19","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1093\/bib\/bbz130","volume":"22","author":"M Neves","year":"2019","unstructured":"Neves, M., \u0160eva, J.: An extensive review of tools for manual annotation of documents. Brief. Bioinform. 22, 146\u2013163 (2019). https:\/\/doi.org\/10.1093\/bib\/bbz130","journal-title":"Brief. Bioinform."},{"key":"21_CR20","unstructured":"NLM: MEDLINE, PubMed, and PMC (PubMed Central): How are they different? (2021). https:\/\/www.nlm.nih.gov\/bsd\/difference.html"},{"key":"21_CR21","doi-asserted-by":"crossref","unstructured":"Pan, H., Zhang, Q., Dragut, E., Caragea, C., Latecki, L.J.: DMDD: a large-scale dataset for dataset mentions detection. Trans. Assoc. Comput. Linguist. 11, 1132\u20131146 (2023)","DOI":"10.1162\/tacl_a_00592"},{"key":"21_CR22","unstructured":"Sebe, C., Cohen-Boulakia, S., Ferret, O., N\u00e9v\u00e9ol, A.: Extraction d\u2018entit\u00e9s nomm\u00e9es d\u00e9crivant des cha\u00eenes de traitement bioinformatiques dans des articles scientifiques en anglais. In: Balaguer, M., Bendahman, N., Ho-dac, L.M., Mauclair, J., G\u00a0Moreno, J., Pinquier, J. (eds.) Actes de la 31\u00e8me Conf\u00e9rence sur le Traitement Automatique des Langues Naturelles, volume 1 : articles longs et prises de position, pp. 422\u2013434. ATALA and AFPC, Toulouse, France (July 2024). https:\/\/aclanthology.org\/2024.jeptalnrecital-taln.30\/"},{"key":"21_CR23","unstructured":"Stenetorp, P., Pyysalo, S., Topic, G., Ohta, T., Ananiadou, S., Tsujii, J.: Brat: a web-based tool for NLP-assisted text annotation. In: Proceedings of the Demonstrations at the 13th Conference of the European Chapter of the Association for Computational Linguistics, pp. 102\u2013107 (April 2012)"},{"key":"21_CR24","doi-asserted-by":"crossref","unstructured":"da Veiga Leprevost, F., et al.: BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics 33(16), 2580\u20132582 (2017)","DOI":"10.1093\/bioinformatics\/btx192"},{"key":"21_CR25","doi-asserted-by":"publisher","unstructured":"Verspoor, K., et al.: Annotating the biomedical literature for the Human Variome. Database J. Biol. Databases Curation 2013, bat019 (2013). https:\/\/doi.org\/10.1093\/database\/bat019","DOI":"10.1093\/database\/bat019"},{"key":"21_CR26","unstructured":"Wajsb\u00fcrt, P.: Extraction et normalisation d\u2019entit\u00e9s simples et structur\u00e9es dans les documents m\u00e9dicaux. Th\u00e8se de doctorat, Sorbonne universit\u00e9 (December 2021). https:\/\/www.theses.fr\/2021SORUS541"},{"key":"21_CR27","doi-asserted-by":"crossref","unstructured":"Yates, J.A.F., et al.: Reproducible, portable, and efficient ancient genome reconstruction with nf-core\/eager. PeerJ 9, e10947 (2021). https:\/\/peerj.com\/articles\/10947","DOI":"10.7717\/peerj.10947"},{"key":"21_CR28","doi-asserted-by":"publisher","unstructured":"Zaratiana, U., Tomeh, N., Holat, P., Charnois, T.: GLiNER: generalist model for named entity recognition using bidirectional transformer. In: Duh, K., Gomez, H., Bethard, S. (eds.) Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pp. 5364\u20135376. Association for Computational Linguistics, Mexico City, Mexico (June 2024). https:\/\/doi.org\/10.18653\/v1\/2024.naacl-long.300, https:\/\/aclanthology.org\/2024.naacl-long.300\/","DOI":"10.18653\/v1\/2024.naacl-long.300"}],"container-title":["Lecture Notes in Computer Science","Advances in Intelligent Data Analysis XXIII"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-91398-3_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T22:06:24Z","timestamp":1746137184000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-91398-3_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031913976","9783031913983"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-91398-3_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"2 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"IDA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Intelligent Data Analysis","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Konstanz","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 May 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 May 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ida2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/ida2025.blogs.dsv.su.se\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}