{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T18:30:31Z","timestamp":1771612231137,"version":"3.50.1"},"publisher-location":"Cham","reference-count":59,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031962349","type":"print"},{"value":"9783031962356","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-96235-6_22","type":"book-chapter","created":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T05:02:47Z","timestamp":1750654967000},"page":"299-316","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Enhancing Answer Reliability Through Inter-Model Consensus of\u00a0Large Language Models"],"prefix":"10.1007","author":[{"given":"Alireza","family":"Amiri-Margavi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Iman","family":"Jebellat","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ehsan","family":"Jebellat","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Seyed Pouyan Mousavi","family":"Davoudi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,6,24]]},"reference":[{"key":"22_CR1","unstructured":"Achiam, J., , et\u00a0al.: Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"22_CR2","unstructured":"Ahn, J., Verma, R., Lou, R., Liu, D., Zhang, R., Yin, W.: Large language models for mathematical reasoning: Progresses and challenges. arXiv preprint arXiv:2402.00157 (2024)"},{"key":"22_CR3","unstructured":"Amiri-Margavi, A.: Inter-model consensus. https:\/\/github.com\/Alireza-Amiri\/Inter-Model-Consensus (2024)"},{"key":"22_CR4","unstructured":"Anthropic: The claude 3 model family: Opus, sonnet, haiku. Anthropic Technical Report (2024)"},{"key":"22_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2023.103000","volume":"91","author":"R Azad","year":"2024","unstructured":"Azad, R., et al.: Advances in medical image analysis with vision transformers: a comprehensive review. Med. Image Anal. 91, 103000 (2024)","journal-title":"Med. Image Anal."},{"issue":"5995","key":"22_CR6","doi-asserted-by":"publisher","first-page":"1081","DOI":"10.1126\/science.1185718","volume":"329","author":"B Bahrami","year":"2010","unstructured":"Bahrami, B., Olsen, K., Latham, P.E., Roepstorff, A., Rees, G., Frith, C.D.: Optimally interacting minds. Science 329(5995), 1081\u20131085 (2010)","journal-title":"Science"},{"issue":"2","key":"22_CR7","doi-asserted-by":"publisher","DOI":"10.1098\/rsos.172189","volume":"5","author":"A Baronchelli","year":"2018","unstructured":"Baronchelli, A.: The emergence of consensus: a primer. Royal Society Open Sci. 5(2), 172189 (2018)","journal-title":"Royal Society Open Sci."},{"key":"22_CR8","doi-asserted-by":"crossref","unstructured":"Bender, E.M., Gebru, T., McMillan-Major, A., Shmitchell, S.: On the dangers of stochastic parrots: Can language models be too big? In: Proceedings of the 2021 ACM Conference on Fairness, Accountability, and Transparency, pp. 610\u2013623 (2021)","DOI":"10.1145\/3442188.3445922"},{"key":"22_CR9","unstructured":"Bommasani, R., et\u00a0al.: On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)"},{"key":"22_CR10","doi-asserted-by":"crossref","unstructured":"Chowdhary, K., Chowdhary, K.: Natural language processing. In: Fundamentals of artificial intelligence, pp. 603\u2013649 (2020)","DOI":"10.1007\/978-81-322-3972-7_19"},{"key":"22_CR11","unstructured":"Dafoe, A., et al.: Open problems in cooperative ai. arXiv preprint arXiv:2012.08630 (2020)"},{"key":"22_CR12","doi-asserted-by":"crossref","unstructured":"Davoodi, A.G., Davoudi, S.P.M., Pezeshkpour, P.: Llms are not intelligent thinkers: Introducing mathematical topic tree benchmark for comprehensive evaluation of llms. arXiv preprint arXiv:2406.05194 (2024)","DOI":"10.18653\/v1\/2025.naacl-long.161"},{"key":"22_CR13","doi-asserted-by":"crossref","unstructured":"Dietterich, T.G.: Ensemble methods in machine learning. In: International workshop on multiple classifier systems, pp. 1\u201315. Springer (2000)","DOI":"10.1007\/3-540-45014-9_1"},{"key":"22_CR14","doi-asserted-by":"crossref","unstructured":"Doshi-Velez, F., et\u00a0al.: Accountability of AI under the law: The role of explanation. arXiv preprint arXiv:1711.01134 (2017)","DOI":"10.2139\/ssrn.3064761"},{"key":"22_CR15","unstructured":"Du, Y., Rajivan, P., Gonzalez, C.: Large language models for collective problem-solving: Insights into group consensus. In: Proceedings of the Annual Meeting of the Cognitive Science Society, vol. 46 (0) (2024)"},{"key":"22_CR16","doi-asserted-by":"crossref","unstructured":"Friedkin, N.E.: Social networks in structural equation models. Social Psychol. Quart. 316\u2013328 (1990)","DOI":"10.2307\/2786737"},{"issue":"12","key":"22_CR17","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1145\/3458723","volume":"64","author":"T Gebru","year":"2021","unstructured":"Gebru, T.: Datasheets for datasets. Commun. ACM 64(12), 86\u201392 (2021)","journal-title":"Commun. ACM"},{"key":"22_CR18","unstructured":"Guo, D., et\u00a0al.: Deepseek-r1: Incentivizing reasoning capability in LLMS via reinforcement learning. arXiv preprint arXiv:2501.12948 (2025)"},{"key":"22_CR19","doi-asserted-by":"crossref","unstructured":"Gupta, P., Pratap\u00a0Singh, A., Kumar, V.: A review of ensemble methods used in ai applications. In: International Conference on Cybersecurity in Emerging Digital Era, pp. 145\u2013157. Springer (2022)","DOI":"10.1007\/978-981-99-5080-5_13"},{"key":"22_CR20","unstructured":"Hashimoto, T., Srivastava, M., Namkoong, H., Liang, P.: Fairness without demographics in repeated loss minimization. In: International Conference on Machine Learning, pp. 1929\u20131938. PMLR (2018)"},{"issue":"5","key":"22_CR21","doi-asserted-by":"publisher","first-page":"3635","DOI":"10.1287\/mnsc.2021.4090","volume":"68","author":"L He","year":"2022","unstructured":"He, L., Analytis, P.P., Bhatia, S.: The wisdom of model crowds. Manage. Sci. 68(5), 3635\u20133659 (2022)","journal-title":"Manage. Sci."},{"key":"22_CR22","unstructured":"Hendrycks, D., et al.: Measuring massive multitask language understanding. arXiv preprint arXiv:2009.03300 (2020)"},{"key":"22_CR23","doi-asserted-by":"crossref","unstructured":"Holstein, K., Wortman\u00a0Vaughan, J., Daum\u00e9\u00a0III, H., Dudik, M., Wallach, H.: Improving fairness in machine learning systems: What do industry practitioners need? In: Proceedings of the 2019 CHI Conference On Human Factors In Computing Systems, pp. 1\u201316 (2019)","DOI":"10.1145\/3290605.3300830"},{"issue":"46","key":"22_CR24","doi-asserted-by":"publisher","first-page":"16385","DOI":"10.1073\/pnas.0403723101","volume":"101","author":"L Hong","year":"2004","unstructured":"Hong, L., Page, S.E.: Groups of diverse problem solvers can outperform groups of high-ability problem solvers. Proc. Natl. Acad. Sci. 101(46), 16385\u201316389 (2004)","journal-title":"Proc. Natl. Acad. Sci."},{"key":"22_CR25","unstructured":"Huang, Y., et al.: Enabling ensemble learning for heterogeneous large language models with deep parallel collaboration. arXiv preprint arXiv:2404.12715 (2024)"},{"key":"22_CR26","doi-asserted-by":"crossref","unstructured":"Hutchins, E.: Cognition in the wild. MIT Press (1995)","DOI":"10.7551\/mitpress\/1881.001.0001"},{"key":"22_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2024.104659","volume":"175","author":"I Jebellat","year":"2024","unstructured":"Jebellat, I., Jebellat, E., Amiri-Margavi, A., Vahidi-Moghaddam, A., Pishkenari, H.N.: A reinforcement learning approach to find optimal propulsion strategy for microrobots swimming at low reynolds number. Robot. Auton. Syst. 175, 104659 (2024)","journal-title":"Robot. Auton. Syst."},{"key":"22_CR28","doi-asserted-by":"crossref","unstructured":"Jiao, Y., Shridhar, K., Cui, P., Zhou, W., Sachan, M.: Automatic educational question generation with difficulty level controls. In: International Conference on Artificial Intelligence in Education, pp. 476\u2013488. Springer (2023)","DOI":"10.1007\/978-3-031-36272-9_39"},{"key":"22_CR29","doi-asserted-by":"crossref","unstructured":"Kittur, A., Kraut, R.E.: Harnessing the wisdom of crowds in wikipedia: quality through coordination. In: Proceedings of the 2008 ACM Conference on Computer Supported Cooperative Work, pp. 37\u201346 (2008)","DOI":"10.1145\/1460563.1460572"},{"issue":"4","key":"22_CR30","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1109\/MIS.2006.75","volume":"21","author":"G Klein","year":"2006","unstructured":"Klein, G., Moon, B., Hoffman, R.R.: Making sense of sensemaking 1: Alternative perspectives. IEEE Intell. Syst. 21(4), 70\u201373 (2006)","journal-title":"IEEE Intell. Syst."},{"key":"22_CR31","first-page":"22199","volume":"35","author":"T Kojima","year":"2022","unstructured":"Kojima, T., Gu, S.S., Reid, M., Matsuo, Y., Iwasawa, Y.: Large language models are zero-shot reasoners. Adv. Neural. Inf. Process. Syst. 35, 22199\u201322213 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"23","key":"22_CR32","doi-asserted-by":"publisher","first-page":"12592","DOI":"10.1073\/pnas.1919012117","volume":"117","author":"AJ Larrazabal","year":"2020","unstructured":"Larrazabal, A.J., Nieto, N., Peterson, V., Milone, D.H., Ferrante, E.: Gender imbalance in medical imaging datasets produces biased classifiers for computer-aided diagnosis. Proc. Natl. Acad. Sci. 117(23), 12592\u201312594 (2020)","journal-title":"Proc. Natl. Acad. Sci."},{"key":"22_CR33","unstructured":"L\u00e9vy, P.: Collective intelligence: Mankind\u2019s emerging world in cyberspace. Perseus books (1997)"},{"key":"22_CR34","unstructured":"Liu, X., et\u00a0al.: Large language models and causal inference in collaboration: A comprehensive survey. arXiv preprint arXiv:2403.09606 (2024)"},{"key":"22_CR35","unstructured":"Lu, J., Pang, Z., Xiao, M., Zhu, Y., Xia, R., Zhang, J.: Merge, ensemble, and cooperate! a survey on collaborative strategies in the era of large language models. arXiv preprint arXiv:2407.06089 (2024)"},{"key":"22_CR36","unstructured":"Malone, T.W., Bernstein, M.S.: Handbook of collective intelligence. MIT press (2022)"},{"key":"22_CR37","unstructured":"Mann, B., Ryder, N., Subbiah, M., Kaplan, J., Dhariwal, P., Neelakantan, A., Shyam, P., Sastry, G., Askell, A., Agarwal, S., et\u00a0al.: Language models are few-shot learners. arXiv preprint arXiv:2005.141651 (2020)"},{"issue":"4","key":"22_CR38","first-page":"63","volume":"41","author":"EA Mennis","year":"2006","unstructured":"Mennis, E.A.: The wisdom of crowds: Why the many are smarter than the few and how collective wisdom shapes business, economies, societies, and nations. Bus. Econ. 41(4), 63\u201365 (2006)","journal-title":"Bus. Econ."},{"key":"22_CR39","unstructured":"MIT News: Multi-ai collaboration helps reasoning and factual accuracy in large language models (2023). https:\/\/news.mit.edu\/2023\/multi-ai-collaboration-helps-reasoning-factual-accuracy-language-models-0918"},{"issue":"11","key":"22_CR40","doi-asserted-by":"publisher","first-page":"501","DOI":"10.1038\/s42256-019-0114-4","volume":"1","author":"B Mittelstadt","year":"2019","unstructured":"Mittelstadt, B.: Principles alone cannot guarantee ethical AI. Nature Mach. Intell. 1(11), 501\u2013507 (2019)","journal-title":"Nature Mach. Intell."},{"key":"22_CR41","unstructured":"Naik, N.: Probabilistic consensus through ensemble validation: A framework for llm reliability. arXiv preprint arXiv:2411.06535 (2024)"},{"issue":"1","key":"22_CR42","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1109\/JPROC.2006.887293","volume":"95","author":"R Olfati-Saber","year":"2007","unstructured":"Olfati-Saber, R., Fax, J.A., Murray, R.M.: Consensus and cooperation in networked multi-agent systems. Proc. IEEE 95(1), 215\u2013233 (2007)","journal-title":"Proc. IEEE"},{"key":"22_CR43","unstructured":"OpenAI: Gpt-4 technical report (2023). https:\/\/openai.com\/research\/gpt-4"},{"key":"22_CR44","doi-asserted-by":"crossref","unstructured":"Page, S.: The difference: How the power of diversity creates better groups, firms, schools, and societies-new edition. Princeton University Press (2008)","DOI":"10.1515\/9781400830282"},{"issue":"4","key":"22_CR45","first-page":"1082","volume":"5","author":"NM Patrikalakis","year":"1999","unstructured":"Patrikalakis, N.M., Fortier, P.J., Ioannidis, Y., Nikolaou, C.N., Robinson, A.R., Rossignac, J.R., Vinacua, A., Abrams, S.L.: Distributed information and computation in scientific and engineering environments. D-lib Mag. 5(4), 1082\u20139873 (1999)","journal-title":"D-lib Mag."},{"issue":"1","key":"22_CR46","first-page":"27","volume":"3","author":"AS Pillai","year":"2023","unstructured":"Pillai, A.S.: Advancements in natural language processing for automotive virtual assistants enhancing user experience and safety. Journal of Computational Intelligence and Robotics 3(1), 27\u201336 (2023)","journal-title":"Journal of Computational Intelligence and Robotics"},{"issue":"8","key":"22_CR47","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I., et al.: Language models are unsupervised multitask learners. OpenAI blog 1(8), 9 (2019)","journal-title":"OpenAI blog"},{"key":"22_CR48","doi-asserted-by":"crossref","unstructured":"Raghavan, M., Barocas, S., Kleinberg, J., Levy, K.: Mitigating bias in algorithmic hiring: Evaluating claims and practices. In: Proceedings of the 2020 conference on fairness, accountability, and transparency, pp. 469\u2013481 (2020)","DOI":"10.1145\/3351095.3372828"},{"issue":"29","key":"22_CR49","doi-asserted-by":"publisher","first-page":"17931","DOI":"10.1007\/s00521-024-10203-4","volume":"36","author":"SS Rezk","year":"2024","unstructured":"Rezk, S.S., Selim, K.S.: Metaheuristic-based ensemble learning: an extensive review of methods and applications. Neural Comput. Appl. 36(29), 17931\u201317959 (2024)","journal-title":"Neural Comput. Appl."},{"issue":"5","key":"22_CR50","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1038\/s42256-019-0048-x","volume":"1","author":"C Rudin","year":"2019","unstructured":"Rudin, C.: Stop explaining black box machine learning models for high stakes decisions and use interpretable models instead. Nature Mach. Intell. 1(5), 206\u2013215 (2019)","journal-title":"Nature Mach. Intell."},{"key":"22_CR51","unstructured":"Sun, Q., Yin, Z., Li, X., Wu, Z., Qiu, X., Kong, L.: Corex: pushing the boundaries of complex reasoning through multi-model collaboration. arXiv preprint arXiv:2310.00280 (2023)"},{"key":"22_CR52","unstructured":"Taylor, R., et al.: Galactica: a large language model for science. arXiv preprint arXiv:2211.09085 (2022)"},{"key":"22_CR53","unstructured":"Team, G., et\u00a0al.: Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)"},{"key":"22_CR54","unstructured":"Touvron, H., et\u00a0al.: Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)"},{"key":"22_CR55","doi-asserted-by":"crossref","unstructured":"Vercammen, A., Ji, Y., Burgman, M.: The collective intelligence of random small crowds: a partial replication of kosinski et al.(2012). Judgment Decision Making 14(1), 91\u201398 (2019)","DOI":"10.1017\/S1930297500002941"},{"issue":"1","key":"22_CR56","first-page":"7068349","volume":"2018","author":"A Voulodimos","year":"2018","unstructured":"Voulodimos, A., Doulamis, N., Doulamis, A., Protopapadakis, E.: Deep learning for computer vision: a brief review. Comput. Intell. Neurosci. 2018(1), 7068349 (2018)","journal-title":"Comput. Intell. Neurosci."},{"issue":"3","key":"22_CR57","first-page":"729","volume":"12","author":"MA Wiering","year":"2012","unstructured":"Wiering, M.A., Van Otterlo, M.: Reinforcement learning. Adapt. Learn. Optim. 12(3), 729 (2012)","journal-title":"Adapt. Learn. Optim."},{"key":"22_CR58","doi-asserted-by":"crossref","unstructured":"Woolley, A.W., Chabris, C.F., Pentland, A., Hashmi, N., Malone, T.W.: Evidence for a collective intelligence factor in the performance of human groups. Science 330(6004), 686\u2013688 (2010)","DOI":"10.1126\/science.1193147"},{"key":"22_CR59","doi-asserted-by":"crossref","unstructured":"Yin, Z., et al.: Exchange-of-thought: Enhancing large language model capabilities through cross-model communication. arXiv preprint arXiv:2312.01823 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.936"}],"container-title":["IFIP Advances in Information and Communication Technology","Artificial Intelligence Applications and Innovations"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-96235-6_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T05:03:00Z","timestamp":1750654980000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-96235-6_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031962349","9783031962356"],"references-count":59,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-96235-6_22","relation":{},"ISSN":["1868-4238","1868-422X"],"issn-type":[{"value":"1868-4238","type":"print"},{"value":"1868-422X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"24 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AIAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"IFIP International Conference on Artificial Intelligence Applications and Innovations","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Limassol","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Cyprus","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 June 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 June 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aiai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ifipaiai.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}