{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:06:49Z","timestamp":1750309609439,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"DFG","award":["59288952"],"award-info":[{"award-number":["59288952"]}]},{"name":"CAPES","award":["88887.671481\/2022-00"],"award-info":[{"award-number":["88887.671481\/2022-00"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,31]]},"DOI":"10.1145\/3672608.3707858","type":"proceedings-article","created":{"date-parts":[[2025,5,14]],"date-time":"2025-05-14T18:26:21Z","timestamp":1747247181000},"page":"980-987","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Improving Natural Product Knowledge Extraction from Academic Literature with Enhanced PDF Text Extraction and Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8550-4368","authenticated-orcid":false,"given":"Paulo","family":"Viviurka do Carmo","sequence":"first","affiliation":[{"name":"HTWK Faculty of Computer Science AKSW, Leipzig, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9093-8195","authenticated-orcid":false,"given":"Marcos Paulo","family":"Silva G\u00f4lo","sequence":"additional","affiliation":[{"name":"Institute of Mathematical and Computer Sciences University of S\u00e3o Paulo, S\u00e3o Carlos, Brazil"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8348-2357","authenticated-orcid":false,"given":"Jonas","family":"Gwozdz","sequence":"additional","affiliation":[{"name":"HTWK Faculty of Computer Science AKSW, Leipzig, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3111-9405","authenticated-orcid":false,"given":"Edgard","family":"Marx","sequence":"additional","affiliation":[{"name":"HTWK Faculty of Computer Science AKSW, Leipzig, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2309-3487","authenticated-orcid":false,"given":"Ricardo","family":"Marcondes Marcacini","sequence":"additional","affiliation":[{"name":"Institute of Mathematical and Computer Sciences University of S\u00e3o Paulo, S\u00e3o Carlos, Brazil"}]}],"member":"320","published-online":{"date-parts":[[2025,5,14]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Machine Learning for Text","author":"Aggarwal Charu","unstructured":"Charu Aggarwal. 2018. Machine Learning for Text (1st ed.). Springer Publishing Company, Incorporated, United States.","edition":"1"},{"key":"e_1_3_2_1_2_1","volume-title":"Nougat: Neural Optical Understanding for Academic Documents. arXiv:2308.13418 [cs.LG]","author":"Blecher Lukas","year":"2023","unstructured":"Lukas Blecher, Guillem Cucurull, Thomas Scialom, and Robert Stojnic. 2023. Nougat: Neural Optical Understanding for Academic Documents. arXiv:2308.13418 [cs.LG]"},{"key":"e_1_3_2_1_3_1","unstructured":"Antoine Bordes Nicolas Usunier Alberto Garcia-Duran Jason Weston and Oksana Yakhnenko. 2013. Translating embeddings for modeling multi-relational data. In Advances in neural information processing systems. ACM USA 1\u20139."},{"key":"e_1_3_2_1_4_1","first-page":"40","article-title":"A survey on evaluation of large language models","volume":"15","author":"Chang Yupeng","year":"2023","unstructured":"Yupeng Chang, Xu Wang, Jindong Wang, Yuan Wu, Linyi Yang, Kaijie Zhu, Hao Chen, Xiaoyuan Yi, Cunxiang Wang, Yidong Wang, et al. 2023. A survey on evaluation of large language models. ACM Transactions on Intelligent Systems and Technology 15, 3 (2023), 40\u201385.","journal-title":"ACM Transactions on Intelligent Systems and Technology"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557316"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403209"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData52589.2021.9671645"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSC56153.2023.00039"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098036"},{"key":"e_1_3_2_1_10_1","unstructured":"Abhimanyu Dubey Abhinav Jauhri and Abhinav Pandey et al. 2024. The Llama 3 Herd of Models. arXiv:2407.21783 [cs.AI] https:\/\/arxiv.org\/abs\/2407.21783"},{"key":"e_1_3_2_1_11_1","unstructured":"Pit Fr\u00f6hlich Jonas Gwozdz and Matthias Joo\u00df. 2023. Leveraging ChatGPT API for Enhanced Data Preprocessing in NatUKE.. In TEXT2KG\/BiKE@ ESWC. CEUR-WS Greece 244\u2013255."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.106088"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","unstructured":"Maarten Grootendorst. 2020. BERTopic: Leveraging BERT and c-TF-IDF to create easily interpretable topics. 10.5281\/zenodo.4381785","DOI":"10.5281\/zenodo.4381785"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939754"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-01918-0"},{"key":"e_1_3_2_1_16_1","volume-title":"Variational Graph Auto-Encoders. stat 1050","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling. 2016. Variational Graph Auto-Encoders. stat 1050 (2016), 21."},{"key":"e_1_3_2_1_17_1","volume-title":"S\u00f6ren Auer, et al.","author":"Lehmann Jens","year":"2015","unstructured":"Jens Lehmann, Robert Isele, Max Jakob, Anja Jentzsch, Dimitris Kontokostas, Pablo N Mendes, Sebastian Hellmann, Mohamed Morsey, Patrick Van Kleef, S\u00f6ren Auer, et al. 2015. Dbpedia-a large-scale, multilingual knowledge base extracted from wikipedia. Semantic web 6, 2 (2015), 167\u2013195."},{"key":"e_1_3_2_1_18_1","volume-title":"GPT understands, too. AI Open pre-proof, pre-proof","author":"Liu Xiao","year":"2023","unstructured":"Xiao Liu, Yanan Zheng, Zhengxiao Du, Ming Ding, Yujie Qian, Zhilin Yang, and Jie Tang. 2023. GPT understands, too. AI Open pre-proof, pre-proof (2023), 1\u201312."},{"key":"e_1_3_2_1_19_1","unstructured":"Patrice Lopez. 2008\u20132024. GROBID. https:\/\/github.com\/kermitt2\/grobid.swh:1:dir:dab86b296e3c3216e2241968f0d63b68e8209d3c"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jnatprod.9b01285"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2623330.2623732"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-017-07451-x"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_2_1_24_1","unstructured":"M Riviere S Pathak PG Sessa C Hardin S Bhupatiraju L Hussenot T Mesnard B Shahriari A Ram\u00e9 et al. 2024. Gemma 2: Improving Open Language Models at a Practical Size. arXiv:2408.00118 [cs.CL] https:\/\/arxiv.org\/abs\/2408.00118"},{"key":"e_1_3_2_1_25_1","unstructured":"Victor Sanh Lysandre Debut Julien Chaumond and Thomas Wolf. 2020. DistilBERT a distilled version of BERT: smaller faster cheaper and lighter. arXiv:1910.01108 [cs.CL] https:\/\/arxiv.org\/abs\/1910.01108"},{"volume-title":"European semantic web conference","author":"Schlichtkrull Michael","key":"e_1_3_2_1_26_1","unstructured":"Michael Schlichtkrull, Thomas N Kipf, Peter Bloem, Rianne van den Berg, Ivan Titov, and Max Welling. 2018. Modeling relational data with graph convolutional networks. In European semantic web conference. Springer, Greece, 593\u2013607."},{"key":"e_1_3_2_1_27_1","unstructured":"Stefan Schmidt-Dichte and Istv\u00e1n J M\u00f3csy. 2023. Improving Natural Product Automatic Extraction with Named Entity Recognition.. In TEXT2KG\/BiKE@ ESWC. CEUR-WS Greece 226\u2013234."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.3233\/SW-2011-0052"},{"key":"e_1_3_2_1_29_1","unstructured":"Gemma Team Morgane Riviere Shreya Pathak and Pier Giuseppe Sessa et al. 2024. Gemma 2: Improving Open Language Models at a Practical Size. arXiv:2408.00118 [cs.CL] https:\/\/arxiv.org\/abs\/2408.00118"},{"key":"e_1_3_2_1_30_1","volume-title":"Joint Proceedings of the Second International Workshop on Knowledge Graph Generation From Text and the First International BiKE Challenge co-located with 20th Extended Semantic Conference (ESWC 2023).","volume":"3447","author":"Tiwari Sanju","year":"2023","unstructured":"Sanju Tiwari, Nandana Mihindukulasooriya, Francesco Osborne, Dimitris Kontokostas, Jennifer D'Souza, Mayank Kejriwal, and Edgard Marx (Eds.). 2023. Joint Proceedings of the Second International Workshop on Knowledge Graph Generation From Text and the First International BiKE Challenge co-located with 20th Extended Semantic Conference (ESWC 2023). Vol. 3447. CEUR Workshop Proceedings, Hersonissos, Greece."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1021\/np3006875"},{"key":"e_1_3_2_1_32_1","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research 9, 11 (2008), 2579\u20132605.","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2187980.2188242"},{"key":"e_1_3_2_1_34_1","unstructured":"Wayne Xin Zhao Kun Zhou Junyi Li Tianyi Tang Xiaolei Wang Yupeng Hou Yingqian Min Beichen Zhang Junjie Zhang Zican Dong et al. 2023. A survey of large language models. arXiv preprint arXiv:2303.18223 1 1 (2023) 124."},{"key":"e_1_3_2_1_35_1","unstructured":"Bhushan Zope Sashikala Mishra and Sanju Tiwari. 2023. Enhancing Biochemical Extraction with BFS-driven Knowledge Graph Embedding approach.. In TEXT2KG\/BiKE@ ESWC. CEUR-WS Greece 235\u2013243."}],"event":{"name":"SAC '25: 40th ACM\/SIGAPP Symposium on Applied Computing","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"],"location":"Catania International Airport Catania Italy","acronym":"SAC '25"},"container-title":["Proceedings of the 40th ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3672608.3707858","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3672608.3707858","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:57:33Z","timestamp":1750298253000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3672608.3707858"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,31]]},"references-count":35,"alternative-id":["10.1145\/3672608.3707858","10.1145\/3672608"],"URL":"https:\/\/doi.org\/10.1145\/3672608.3707858","relation":{},"subject":[],"published":{"date-parts":[[2025,3,31]]},"assertion":[{"value":"2025-05-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}