{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T08:30:38Z","timestamp":1768033838421,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":84,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,16]]},"DOI":"10.1145\/3732775.3733586","type":"proceedings-article","created":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T13:51:02Z","timestamp":1750427462000},"page":"1-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["HiPerRAG: High-Performance Retrieval Augmented Generation for Scientific Insights"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5299-1983","authenticated-orcid":false,"given":"Ozan","family":"Gokdemir","sequence":"first","affiliation":[{"name":"University of Chicago, Chicago, USA"},{"name":"Argonne National Laboratory, Chicago, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0215-3716","authenticated-orcid":false,"given":"Carlo","family":"Siebenschuh","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, USA"},{"name":"Argonne National Laboratory, Chicago, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9873-9177","authenticated-orcid":false,"given":"Alexander","family":"Brace","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, USA"},{"name":"Argonne National Laboratory, Chicago, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1183-7143","authenticated-orcid":false,"given":"Azton","family":"Wells","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5027-607X","authenticated-orcid":false,"given":"Brian","family":"Hsu","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Chicago, USA"},{"name":"University of Chicago, Chicago, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9470-572X","authenticated-orcid":false,"given":"Kyle","family":"Hippe","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, USA"},{"name":"Argonne National Laboratory, Chicago, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3373-5981","authenticated-orcid":false,"given":"Priyanka","family":"Setty","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, USA"},{"name":"Argonne National Laboratory, Chicago, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7812-2962","authenticated-orcid":false,"given":"Aswathy","family":"Ajith","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6547-6902","authenticated-orcid":false,"given":"J. Gregory","family":"Pauloski","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0366-4186","authenticated-orcid":false,"given":"Varuni","family":"Sastry","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9981-0876","authenticated-orcid":false,"given":"Sam","family":"Foreman","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9008-9552","authenticated-orcid":false,"given":"Huihuo","family":"Zheng","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7667-922X","authenticated-orcid":false,"given":"Heng","family":"Ma","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3653-228X","authenticated-orcid":false,"given":"Bharat","family":"Kale","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9652-691X","authenticated-orcid":false,"given":"Nicholas","family":"Chia","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9196-5830","authenticated-orcid":false,"given":"Thomas","family":"Gibbs","sequence":"additional","affiliation":[{"name":"NVIDIA Inc., Santa Clara, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6418-5767","authenticated-orcid":false,"given":"Michael","family":"Papka","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"},{"name":"University of Illinois Chicago, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9301-9760","authenticated-orcid":false,"given":"Thomas","family":"Brettin","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9848-555X","authenticated-orcid":false,"given":"Francis","family":"Alexander","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6974-6797","authenticated-orcid":false,"given":"Anima","family":"Anandkumar","sequence":"additional","affiliation":[{"name":"California Institute of Technology, Pasadena, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2129-5269","authenticated-orcid":false,"given":"Ian","family":"Foster","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"},{"name":"University of Chicago, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4268-4020","authenticated-orcid":false,"given":"Rick","family":"Stevens","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7248-6116","authenticated-orcid":false,"given":"Venkatram","family":"Vishwanath","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1622-5488","authenticated-orcid":false,"given":"Arvind","family":"Ramanathan","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"},{"name":"University of Chicago, Lemont, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,6,20]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-53637-8"},{"key":"e_1_3_2_1_2_1","unstructured":"2023. PubMedQA: A Dataset for Biomedical Research Question Answering. PubMedQA Project Website. https:\/\/pubmedqa.github.io\/ Accessed: 2023-10-11."},{"key":"e_1_3_2_1_3_1","unstructured":"2024. PDFMiner. https:\/\/pypi.org\/project\/pdfminer\/. Accessed: [12\/7\/2024]."},{"key":"e_1_3_2_1_4_1","unstructured":"2024. PyMuPDF Documentation. https:\/\/pymupdf.readthedocs.io\/en\/latest\/. Accessed: [12\/7\/2024]."},{"key":"e_1_3_2_1_5_1","unstructured":"2024. PyPDF Documentation. https:\/\/pypdf.readthedocs.io\/en\/stable\/. Accessed: [12\/7\/2024]."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.12688\/f1000research.7329.2"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1093\/jamia\/ocae166"},{"key":"e_1_3_2_1_8_1","unstructured":"Zhiyu An Xianzhong Ding Yen-Chun Fu Cheng-Chung Chu Yan Li and Wan Du. 2024. Golden-Retriever: High-Fidelity Agentic Retrieval Augmented Generation for Industrial Knowledge Base. arXiv:2408.00798 [cs.IR] https:\/\/arxiv.org\/abs\/2408.00798"},{"key":"e_1_3_2_1_9_1","volume-title":"Self-RAG: Learning to retrieve, generate, and critique through self-reflection. arXiv preprint arXiv:2310.11511","author":"Asai Akari","year":"2023","unstructured":"Akari Asai, Zeqiu Wu, Yizhong Wang, Avirup Sil, and Hannaneh Hajishirzi. 2023. Self-RAG: Learning to retrieve, generate, and critique through self-reflection. arXiv preprint arXiv:2310.11511 (2023)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-023-33607-z"},{"key":"e_1_3_2_1_11_1","volume-title":"Parsl: Pervasive Parallel Programming in Python. In ACM International Symposium on High-Performance Parallel and Distributed Computing.","author":"Babuji Yadu","year":"2019","unstructured":"Yadu Babuji, Anna Woodard, Zhuozhao Li, Ben Clifford, Rohan Kumar, Lukasz Lacinski, Ryan Chard, Justin Wozniak, Ian Foster, Michael Wilde, Daniel Katz, and Kyle Chard. 2019. Parsl: Pervasive Parallel Programming in Python. In ACM International Symposium on High-Performance Parallel and Distributed Computing."},{"key":"e_1_3_2_1_12_1","volume-title":"Learning Representations by Maximizing Mutual Information Across Views. arXiv preprint arXiv:1906.00910","author":"Bachman Philip","year":"2019","unstructured":"Philip Bachman, R Devon Hjelm, and William Buchwalter. 2019. Learning Representations by Maximizing Mutual Information Across Views. arXiv preprint arXiv:1906.00910 (2019)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1037\/amp0001222"},{"key":"e_1_3_2_1_14_1","volume-title":"Nougat: Neural optical understanding for academic documents. arXiv preprint arXiv:2308.13418","author":"Blecher Lukas","year":"2023","unstructured":"Lukas Blecher, Guillem Cucurull, Thomas Scialom, and Robert Stojnic. 2023. Nougat: Neural optical understanding for academic documents. arXiv preprint arXiv:2308.13418 (2023)."},{"key":"e_1_3_2_1_15_1","unstructured":"E. Bolton D. Hall M. Yasunaga T. Lee C. Manning and P. Liang. 2022. Stanford CRFM Introduces PubMedGPT 2.7B. Stanford HAI. https:\/\/hai.stanford.edu\/news\/stanford-crfm-introduces-pubmedgpt-27b"},{"key":"e_1_3_2_1_16_1","unstructured":"Alexander Brace and J. Gregory Pauloski. 2023. https:\/\/github.com\/braceal\/parsl_object_registry. Accessed: 2024-10-09."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","unstructured":"Yapei Chang Kyle Lo Tanya Goyal and Mohit Iyyer. 2023. BooookScore: A systematic exploration of book-length summarization in the era of LLMs. 10.48550\/ARXIV.2310.00785","DOI":"10.48550\/ARXIV.2310.00785"},{"key":"e_1_3_2_1_18_1","volume-title":"Pheng Ann Heng, and Guangyong Chen","author":"Chen Kexin","year":"2024","unstructured":"Kexin Chen, Junyou Li, Kunyi Wang, Yuyang Du, Jiahui Yu, Jiamin Lu, Lanqing Li, Jiezhong Qiu, Jianzhang Pan, Yi Huang, Qun Fang, Pheng Ann Heng, and Guangyong Chen. 2024. Chemist-X: Large Language Model-empowered Agent for Reaction Condition Recommendation in Chemical Synthesis. arXiv:2311.10776 [cs.IR] https:\/\/arxiv.org\/abs\/2311.10776"},{"key":"e_1_3_2_1_19_1","volume-title":"Cohen","author":"Chen Wenhu","year":"2022","unstructured":"Wenhu Chen, Hexiang Hu, Xi Chen, Pat Verga, and William W. Cohen. 2022. MuRAG: Multimodal Retrieval-Augmented Generator for Open Question Answering over Images and Text. arXiv:2210.02928 [cs.CL] https:\/\/arxiv.org\/abs\/2210.02928"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2021636118"},{"key":"e_1_3_2_1_21_1","volume-title":"Dola: Decoding by contrasting layers improves factuality in large language models. arXiv preprint arXiv:2309.03883","author":"Chuang Yung-Sung","year":"2023","unstructured":"Yung-Sung Chuang, Yujia Xie, Hongyin Luo, Yoon Kim, James Glass, and Pengcheng He. 2023. Dola: Decoding by contrasting layers improves factuality in large language models. arXiv preprint arXiv:2309.03883 (2023)."},{"key":"e_1_3_2_1_22_1","unstructured":"Steven Deitz and Christina Freyman. 2024. Science and Engineering Indicators 2024: The State of U.S. Science and Engineering. Technical Report NSB-2024-3. National Science Foundation Alexandria VA. https:\/\/ncses.nsf.gov\/pubs\/nsb20243"},{"key":"e_1_3_2_1_23_1","volume-title":"QLoRA: Efficient Finetuning of Quantized LLMs. arXiv preprint arXiv:2305.14314","author":"Dettmers Tim","year":"2023","unstructured":"Tim Dettmers, Artidoro Pagnoni, Ari Holtzman, and Luke Zettlemoyer. 2023. QLoRA: Efficient Finetuning of Quantized LLMs. arXiv preprint arXiv:2305.14314 (2023)."},{"key":"e_1_3_2_1_24_1","unstructured":"Matthijs Douze Alexandr Guzhva Chengqi Deng Jeff Johnson Gergely Szilvasy Pierre-Emmanuel Mazar\u00e9 Maria Lomeli Lucas Hosseini and Herv\u00e9 J\u00e9gou. 2024. The Faiss library. arXiv:2401.08281 [cs.LG]"},{"key":"e_1_3_2_1_25_1","unstructured":"Darren Edge Ha Trinh Newman Cheng Joshua Bradley Alex Chao Apurva Mody Steven Truitt and Jonathan Larson. 2024. From Local to Global: A Graph RAG Approach to Query-Focused Summarization. arXiv:2404.16130 [cs.CL] https:\/\/arxiv.org\/abs\/2404.16130"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Lutfi Eren Erdogan Nicholas Lee Siddharth Jha Sehoon Kim Ryan Tabrizi Suhong Moon Coleman Hooper Gopala Anumanchipalli Kurt Keutzer and Amir Gholami. 2024. TinyAgent: Function Calling at the Edge. arXiv:2409.00608 [cs.CL] https:\/\/arxiv.org\/abs\/2409.00608","DOI":"10.18653\/v1\/2024.emnlp-demo.9"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.aao0185"},{"key":"e_1_3_2_1_28_1","volume-title":"The Pile: An 800GB Dataset of Diverse Text for Language Modeling. arXiv:2101.00027 [cs.CL] https:\/\/arxiv.org\/abs\/2101.00027","author":"Gao Leo","year":"2020","unstructured":"Leo Gao, Stella Biderman, Sid Black, Laurence Golding, Travis Hoppe, Charles Foster, Jason Phang, Horace He, Anish Thite, Noa Nabeshima, Shawn Presser, and Connor Leahy. 2020. The Pile: An 800GB Dataset of Diverse Text for Language Modeling. arXiv:2101.00027 [cs.CL] https:\/\/arxiv.org\/abs\/2101.00027"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"e_1_3_2_1_30_1","volume-title":"Retrieval-augmented generation for large language models: A survey. arXiv preprint arXiv:2312.10997","author":"Gao Yunfan","year":"2023","unstructured":"Yunfan Gao, Yun Xiong, Xinyu Gao, Kangxiang Jia, Jinliu Pan, Yuxi Bi, Yi Dai, Jiawei Sun, and Haofen Wang. 2023. Retrieval-augmented generation for large language models: A survey. arXiv preprint arXiv:2312.10997 (2023)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2409.18454"},{"key":"e_1_3_2_1_32_1","volume-title":"SwitchPrompt: Learning domain-specific gated soft prompts for classification in low-resource domains. arXiv preprint arXiv:2302.06868","author":"Goswami Koustava","year":"2023","unstructured":"Koustava Goswami, Lukas Lange, Jun Araki, and Heike Adel. 2023. SwitchPrompt: Learning domain-specific gated soft prompts for classification in low-resource domains. arXiv preprint arXiv:2302.06868 (2023)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458754"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458754"},{"key":"e_1_3_2_1_35_1","volume-title":"LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=nZeVKeeFYf9","author":"Hu Edward J","year":"2022","unstructured":"Edward J Hu, yelong shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2022. LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"e_1_3_2_1_36_1","volume-title":"Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al.","author":"Jiang Albert Q","year":"2023","unstructured":"Albert Q Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al. 2023. Mistral 7B. arXiv preprint arXiv:2310.06825 (2023)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","unstructured":"Albert Q. Jiang Alexandre Sablayrolles Arthur Mensch Chris Bamford Devendra Singh Chaplot Diego de las Casas Florian Bressand Gianna Lengyel Guillaume Lample Lucile Saulnier L\u00e9lio Renard Lavaud Marie-Anne Lachaux Pierre Stock Teven Le Scao Thibaut Lavril Thomas Wang Timoth\u00e9e Lacroix and William El Sayed. 2023. Mistral 7B. arXiv e-prints Article arXiv:2310.06825 (Oct. 2023) arXiv:2310.06825 pages. arXiv:2310.06825 [cs.CL] 10.48550\/arXiv.2310.06825","DOI":"10.48550\/arXiv.2310.06825"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICAAIC60222.2024.10574972"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Vladimir Karpukhin Barlas O\u011fuz Sewon Min Patrick Lewis Ledell Wu Sergey Edunov Danqi Chen and Wen tau Yih. 2020. Dense Passage Retrieval for Open-Domain Question Answering. arXiv:2004.04906 [cs.CL] https:\/\/arxiv.org\/abs\/2004.04906","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_40_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Conference of the North American","author":"Ming-Wei Chang Jacob Devlin","year":"2019","unstructured":"Jacob Devlin Ming-Wei Chang Kenton and Lee Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 4171\u20134186."},{"key":"e_1_3_2_1_41_1","volume-title":"43rd International ACM SIGIR Conference on Research and Development in Information Retrieval. 39\u201348","author":"Khattab Omar","year":"2020","unstructured":"Omar Khattab and Matei Zaharia. 2020. ColBERT: Efficient and effective passage search via contextualized late interaction over BERT. In 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval. 39\u201348."},{"key":"e_1_3_2_1_42_1","volume-title":"AAAI Conference on Artificial Intelligence","volume":"32","author":"Khot Tushar","year":"2018","unstructured":"Tushar Khot, Ashish Sabharwal, and Peter Clark. 2018. SciTaiL: A textual entailment dataset from science question answering. In AAAI Conference on Artificial Intelligence, Vol. 32."},{"key":"e_1_3_2_1_43_1","volume-title":"European Conference on Computer Vision. Springer, 498\u2013517","author":"Kim Geewook","year":"2022","unstructured":"Geewook Kim, Teakgyu Hong, Moonbin Yim, JeongYeon Nam, Jinyoung Park, Jinyeong Yim, Wonseok Hwang, Sangdoo Yun, Dongyoon Han, and Seunghyun Park. 2022. OCR-free document understanding transformer. In European Conference on Computer Vision. Springer, 498\u2013517."},{"key":"e_1_3_2_1_44_1","volume-title":"PaperQA: Retrieval-augmented generative agent for scientific research. Preprint ArXiv:2312.07559","author":"L\u00e1la Jakub","year":"2023","unstructured":"Jakub L\u00e1la, Odhran O'Donoghue, Aleksandar Shtedritski, Sam Cox, Samuel G Rodriques, and Andrew D White. 2023. PaperQA: Retrieval-augmented generative agent for scientific research. Preprint ArXiv:2312.07559 (2023)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1038\/nj7612-457a"},{"key":"e_1_3_2_1_46_1","volume-title":"Smeaton","author":"Le-Khac Phuc H.","year":"2020","unstructured":"Phuc H. Le-Khac, Graham Healy, and Alan F. Smeaton. 2020. Contrastive Representation Learning: A Framework and Review. CoRR abs\/2010.05113 (2020). arXiv:2010.05113 https:\/\/arxiv.org\/abs\/2010.05113"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btz682"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3496517"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.joi.2021.101234"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2205.05638"},{"key":"e_1_3_2_1_51_1","unstructured":"Kevin Lu Aditya Grover Pieter Abbeel and Igor Mordatch. 2021. Pretrained Transformers as Universal Computation Engines. arXiv:2103.05247 [cs.LG] https:\/\/arxiv.org\/abs\/2103.05247"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531772"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btae353"},{"key":"e_1_3_2_1_54_1","volume-title":"Caiming Xiong, Yingbo Zhou, and Semih Yavuz.","author":"Meng Rui","year":"2024","unstructured":"Rui Meng, Ye Liu, Shafiq Rayhan Joty, Caiming Xiong, Yingbo Zhou, and Semih Yavuz. 2024. SFR-Embedding-Mistral:Enhance Text Retrieval with Transfer Learning. Salesforce AI Research Blog. https:\/\/blog.salesforceairesearch.com\/sfr-embedded-mistral\/ Accessed: Apr 7, 2024."},{"key":"e_1_3_2_1_55_1","unstructured":"Tomas Mikolov Kai Chen Greg Corrado and Jeffrey Dean. 2013. Efficient Estimation of Word Representations in Vector Space. arXiv:1301.3781 [cs.CL] https:\/\/arxiv.org\/abs\/1301.3781"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2411.14592"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2012208118"},{"key":"e_1_3_2_1_58_1","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318."},{"key":"e_1_3_2_1_59_1","unstructured":"Vik Paruchuri. [n. d.]. Texify: Tool for Converting Text to LaTeX. https:\/\/github.com\/VikParuchuri\/texify. Accessed: [12\/7\/2024]."},{"key":"e_1_3_2_1_60_1","unstructured":"Vik Paruchuri. 2024. Marker. https:\/\/github.com\/VikParuchuri\/marker. Accessed: [12\/7\/2024]."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkaa991"},{"key":"e_1_3_2_1_62_1","volume-title":"Sagar Srinivas Sakhinana, and Venkataramana Runkana","author":"Ravuru Chidaksh","year":"2024","unstructured":"Chidaksh Ravuru, Sagar Srinivas Sakhinana, and Venkataramana Runkana. 2024. Agentic Retrieval-Augmented Generation for Time Series Analysis. arXiv:2408.14484 [cs.AI] https:\/\/arxiv.org\/abs\/2408.14484"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.272"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1021\/acscentsci.9b00576"},{"key":"e_1_3_2_1_66_1","volume-title":"D1","author":"Shi Guobang","year":"2022","unstructured":"Guobang Shi, Xinyue Kang, Fanyi Dong, Yanchao Liu, Ning Zhu, Yuxuan Hu, Hanmei Xu, Xingzhen Lao, and Heng Zheng. 2022. DRAMP 3.0: An enhanced comprehensive data repository of antimicrobial peptides. Nucleic acids research 50, D1 (2022), D488\u2013D496."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1038\/d41586-022-00138-y"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"crossref","unstructured":"Luca Soldaini Rodney Kinney Akshita Bhagia Dustin Schwenk David Atkinson Russell Authur Ben Bogin Khyathi Chandu Jennifer Dumas Yanai Elazar Valentin Hofmann Ananya Harsh Jha Sachin Kumar Li Lucy Xinxi Lyu Nathan Lambert Ian Magnusson Jacob Morrison Niklas Muennighoff Aakanksha Naik Crystal Nam Matthew E. Peters Abhilasha Ravichander Kyle Richardson Zejiang Shen Emma Strubell Nishant Subramani Oyvind Tafjord Pete Walsh Luke Zettlemoyer Noah A. Smith Hannaneh Hajishirzi Iz Beltagy Dirk Groeneveld Jesse Dodge and Kyle Lo. 2024. Dolma: An Open Corpus of Three Trillion Tokens for Language Model Pretraining Research. arXiv preprint (2024). https:\/\/arxiv.org\/abs\/2402.00159","DOI":"10.18653\/v1\/2024.acl-long.840"},{"key":"e_1_3_2_1_69_1","unstructured":"Open Source. [n. d.]. The Chroma Vector Database. https:\/\/docs.trychroma.com\/"},{"key":"e_1_3_2_1_70_1","volume-title":"Galactica: A large language model for science. arXiv preprint arXiv:2211.09085","author":"Taylor Ross","year":"2022","unstructured":"Ross Taylor, Marcin Kardas, Guillem Cucurull, Thomas Scialom, Anthony Hartshorn, Elvis Saravia, Andrew Poulton, Viktor Kerkez, and Robert Stojnic. 2022. Galactica: A large language model for science. arXiv preprint arXiv:2211.09085 (2022)."},{"key":"e_1_3_2_1_71_1","unstructured":"Top500. 2024. November 2024 TOP500. https:\/\/www.top500.org\/lists\/top500\/2024\/11\/"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1335-8"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"crossref","first-page":"D480","DOI":"10.1093\/nar\/gkaa1100","article-title":"UniProt: The universal protein knowledgebase in 2021","volume":"49","author":"UniProt Consortium","year":"2021","unstructured":"UniProt Consortium. 2021. UniProt: The universal protein knowledgebase in 2021. Nucleic Acids Research 49, D1 (2021), D480\u2013D489.","journal-title":"Nucleic Acids Research"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature.2014.14658"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW63119.2024.00114"},{"key":"e_1_3_2_1_76_1","unstructured":"Chengrui Wang Qingqing Long Meng Xiao Xunxin Cai Chengjun Wu Zhen Meng Xuezhi Wang and Yuanchun Zhou. 2024. BioRAG: A RAG-LLM Framework for Biological Question Reasoning. arXiv:2408.01107 [cs.CL] https:\/\/arxiv.org\/abs\/2408.01107"},{"key":"e_1_3_2_1_77_1","volume-title":"APD: The antimicrobial peptide database. Nucleic acids research 32, suppl_1","author":"Wang Zhe","year":"2004","unstructured":"Zhe Wang and Guangshun Wang. 2004. APD: The antimicrobial peptide database. Nucleic acids research 32, suppl_1 (2004), D590\u2013D592."},{"key":"e_1_3_2_1_78_1","volume-title":"Crowdsourcing Multiple Choice Science Questions. ArXiv abs\/1707.06209","author":"Welbl Johannes","year":"2017","unstructured":"Johannes Welbl, Nelson F. Liu, and Matt Gardner. 2017. Crowdsourcing Multiple Choice Science Questions. ArXiv abs\/1707.06209 (2017). https:\/\/api.semanticscholar.org\/CorpusID:1553193"},{"key":"e_1_3_2_1_79_1","volume-title":"Ryen W White, Doug Burger, and Chi Wang.","author":"Wu Qingyun","year":"2023","unstructured":"Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, and Chi Wang. 2023. AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation. arXiv:2308.08155 [cs.AI] https:\/\/arxiv.org\/abs\/2308.08155"},{"key":"e_1_3_2_1_80_1","volume-title":"26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. 1192\u20131200","author":"Xu Yiheng","year":"2020","unstructured":"Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, and Ming Zhou. 2020. LayoutLM: Pre-training of text and layout for document image understanding. In 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. 1192\u20131200."},{"key":"e_1_3_2_1_81_1","volume-title":"FILIP: Finegrained Interactive Language-Image Pre-Training. arXiv:2111.07783 [cs.CV] https:\/\/arxiv.org\/abs\/2111.07783","author":"Yao Lewei","year":"2021","unstructured":"Lewei Yao, Runhui Huang, Lu Hou, Guansong Lu, Minzhe Niu, Hang Xu, Xiaodan Liang, Zhenguo Li, Xin Jiang, and Chunjing Xu. 2021. FILIP: Finegrained Interactive Language-Image Pre-Training. arXiv:2111.07783 [cs.CV] https:\/\/arxiv.org\/abs\/2111.07783"},{"key":"e_1_3_2_1_82_1","volume-title":"Chengwei Qin, Bosheng Ding, Xiaobao Guo, Minzhi Li, Xingxuan Li, and Shafiq Joty.","author":"Zhao Ruochen","year":"2023","unstructured":"Ruochen Zhao, Hailin Chen, Weishi Wang, Fangkai Jiao, Xuan Long Do, Chengwei Qin, Bosheng Ding, Xiaobao Guo, Minzhi Li, Xingxuan Li, and Shafiq Joty. 2023. Retrieving Multimodal Information for Augmented Generation: A Survey. arXiv:2303.10868 [cs.CL] https:\/\/arxiv.org\/abs\/2303.10868"},{"key":"e_1_3_2_1_83_1","volume-title":"Twenty-Fourth International Joint Conference on Artificial Intelligence.","author":"Zheng Xiaoqing","year":"2015","unstructured":"Xiaoqing Zheng, Haoyuan Peng, Yi Chen, Pengjing Zhang, and Wenqiang Zhang. 2015. Character-based parsing with convolutional neural network. In Twenty-Fourth International Joint Conference on Artificial Intelligence."},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.1021\/acsomega.3c05114"}],"event":{"name":"PASC '25: Platform for Advanced Scientific Computing Conference","location":"FHNW University of Applied Sciences and Arts Northwestern Switzerland Brugg-Windisch Switzerland","acronym":"PASC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","ETH Zurich \/ CSCS"]},"container-title":["Proceedings of the Platform for Advanced Scientific Computing Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3732775.3733586","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T13:52:46Z","timestamp":1750427566000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3732775.3733586"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,16]]},"references-count":84,"alternative-id":["10.1145\/3732775.3733586","10.1145\/3732775"],"URL":"https:\/\/doi.org\/10.1145\/3732775.3733586","relation":{},"subject":[],"published":{"date-parts":[[2025,6,16]]},"assertion":[{"value":"2025-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}