{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,8]],"date-time":"2026-03-08T01:39:24Z","timestamp":1772933964178,"version":"3.50.1"},"reference-count":53,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T00:00:00Z","timestamp":1765152000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T00:00:00Z","timestamp":1765152000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,8]]},"DOI":"10.1109\/bigdata66926.2025.11402280","type":"proceedings-article","created":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T20:57:57Z","timestamp":1772830677000},"page":"8021-8029","source":"Crossref","is-referenced-by-count":0,"title":["Multi-Planners in Plan-and-Solve Prompting for Medical Reasoning"],"prefix":"10.1109","author":[{"given":"Yucheng","family":"Wang","sequence":"first","affiliation":[{"name":"Penn State Harrisburg,Middletown,USA"}]},{"given":"Alex","family":"Li","sequence":"additional","affiliation":[{"name":"Penn State Harrisburg,Middletown,USA"}]},{"given":"Jeremy","family":"Blum","sequence":"additional","affiliation":[{"name":"Penn State Harrisburg,Middletown,USA"}]},{"given":"Hien","family":"Nguyen","sequence":"additional","affiliation":[{"name":"Penn State Harrisburg,Middletown,USA"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1877","article-title":"Language mod-els are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref2","author":"Le Scao","year":"2023","journal-title":"Bloom: A 176b-parameter open-access multilingual language model"},{"issue":"240","key":"ref3","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery","year":"2023","journal-title":"Journal of Machine Learning Research"},{"key":"ref4","article-title":"Gpt-4 technical report","author":"Achiam","year":"2023","journal-title":"arXiv preprint"},{"issue":"3","key":"ref5","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3641289","article-title":"A survey on evaluation of large language models","volume":"15","author":"Chang","year":"2024","journal-title":"ACM transactions on intelligent systems and technology"},{"key":"ref6","article-title":"Towards trustworthy ai: A review of ethical and robust large language models","author":"Ferdaus","year":"2024","journal-title":"arXiv preprint"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.147"},{"key":"ref8","article-title":"Plan-and-execute","volume-title":"n.d.","year":"2025"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.52202\/079017-0911"},{"issue":"6","key":"ref10","article-title":"A survey on large language model based autonomous agents","volume-title":"Frontiers of Computer Science","volume":"18","author":"Wang","year":"2024"},{"key":"ref11","volume-title":"LIm stability: A detailed analysis with some surprises","author":"Atil","year":"2024"},{"key":"ref12","volume-title":"The good, the bad, and the greedy: Evaluation of llms should not ignore non-determinism","author":"Song","year":"2024"},{"key":"ref13","volume-title":"Large language model based multi-agents: A survey of progress and challenges","author":"Guo","year":"2024"},{"key":"ref14","volume-title":"Examining inter-consistency of large language models collaboration: An in-depth analysis via debate","author":"Xiong","year":"2023"},{"key":"ref15","volume-title":"Improving factuality and reasoning in language models through multi agent debate","author":"Du","year":"2023"},{"key":"ref16","volume-title":"Metagpt: Meta programming for a multi-agent collaborative framework","author":"Hong","year":"2023"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.3390\/app11146421"},{"key":"ref18","volume-title":"Changing answer order can decrease mmlu accuracy","author":"Gupta","year":"2024"},{"key":"ref19","volume-title":"Llms\u2019 classification performance is overclaimed","author":"Xu","year":"2024"},{"key":"ref20","article-title":"Disc-medllm: Bridging general large language models and real-world medical consultation","author":"Bao","year":"2023","journal-title":"arXiv preprint"},{"key":"ref21","article-title":"Capabilities of gpt-4 on medical challenge problems","author":"Nori","year":"2023","journal-title":"arXiv preprint"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-023-02448-8"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1038\/s44172-024-00271-8"},{"key":"ref24","article-title":"Medical image understanding with pretrained vision language models: A comprehensive study","author":"Qin","year":"2022","journal-title":"arXiv preprint"},{"key":"ref25","article-title":"Pharmacygpt: The ai pharmacist","author":"Liu","year":"2023","journal-title":"arXiv preprint"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1111\/bcp.15896"},{"key":"ref27","article-title":"Meddm: LIm-executable clinical guidance tree for clinical decision-making","author":"Li","year":"2023","journal-title":"arXiv preprint"},{"key":"ref28","article-title":"Medagents: Large language models as collaborators for zero-shot medical reasoning","author":"Tang","year":"2023","journal-title":"arXiv preprint"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.clinicalnlp-1.7"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-short.119"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.conll-1.21"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btae075"},{"key":"ref33","article-title":"Knowledge-augmented reasoning distillation for small language models in knowledge-intensive tasks","volume":"36","author":"Kang","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref34","article-title":"Pmc-vqa: Visual instruction tuning for medical visual question answering","author":"Zhang","year":"2023","journal-title":"arXiv preprint"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1056\/aioa2300138"},{"key":"ref36","article-title":"Llava-med: Training a large language-and-vision assistant for biomedicine in one day","volume":"36","author":"Li","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1613"},{"key":"ref38","article-title":"Babyagi","volume-title":"GitHub repository, n.d.","author":"Nakajima","year":"2025"},{"key":"ref39","first-page":"2024","volume-title":"LLM API Provider Leaderboard: Artificial Anal-ysis"},{"key":"ref40","article-title":"Measuring massive multitask language understanding","author":"Hendrycks","journal-title":"arXiv preprint"},{"key":"ref41","author":"Subramaniam","year":"2024","journal-title":"Debategpt: Fine-tuning large language models with multi-agent debate supervision"},{"key":"ref42","article-title":"Gpqa: A graduate-level google-proof q&a benchmark","author":"Rein","year":"2023","journal-title":"arXiv preprint"},{"key":"ref43","article-title":"Measuring mathematical problem solving with the math dataset","author":"Hendrycks","year":"2021","journal-title":"arXiv preprint"},{"key":"ref44","article-title":"Mathscale: Scaling instruction tuning for mathematical reasoning","author":"Tang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref45","first-page":"3","volume-title":"GPT-3.5 Turbo Model"},{"key":"ref46","first-page":"2025","article-title":"Tavily search","volume-title":"n.d."},{"issue":"3","key":"ref47","article-title":"Communicative agents for software development","volume":"6","author":"Qian","year":"2023","journal-title":"ar Xiv preprint arXiv:2307.07924"},{"key":"ref48","first-page":"991","article-title":"Camel: Communicative agents for\u201d mind\u201d exploration of large language model society","volume":"36","author":"Li","year":"2023","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06291-2"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-024-03423-7"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1800"},{"key":"ref52","article-title":"Self-consistency improves chain of thought reasoning in language models","author":"Wang","year":"2022","journal-title":"arXiv preprint"},{"key":"ref53","article-title":"Llm-medqa: Enhancing medical question an-swering through case studies in large language models","author":"Yang","year":"2024","journal-title":"ar Xiv preprint arXiv:2501.05464"}],"event":{"name":"2025 IEEE International Conference on Big Data (BigData)","location":"Macau, China","start":{"date-parts":[[2025,12,8]]},"end":{"date-parts":[[2025,12,11]]}},"container-title":["2025 IEEE International Conference on Big Data (BigData)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11400704\/11400712\/11402280.pdf?arnumber=11402280","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T06:56:11Z","timestamp":1772866571000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11402280\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,8]]},"references-count":53,"URL":"https:\/\/doi.org\/10.1109\/bigdata66926.2025.11402280","relation":{},"subject":[],"published":{"date-parts":[[2025,12,8]]}}}