{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,20]],"date-time":"2026-06-20T16:58:37Z","timestamp":1781974717048,"version":"3.54.5"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031680304","type":"print"},{"value":"9783031680311","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-68031-1_6","type":"book-chapter","created":{"date-parts":[[2024,9,21]],"date-time":"2024-09-21T06:02:03Z","timestamp":1726898523000},"page":"77-89","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Benchmarking Large Language Models: Opportunities and Challenges"],"prefix":"10.1007","author":[{"given":"Miro","family":"Hodak","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"David","family":"Ellison","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chris","family":"Van Buren","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaotong","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ajay","family":"Dholakia","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,9,22]]},"reference":[{"key":"6_CR1","unstructured":"Reuters. Accessed 29 June 2023. https:\/\/www.reuters.com\/technology\/chatgpt-sets-record-fastest-growing-user-base-analyst-note-2023-02-01\/"},{"key":"6_CR2","unstructured":"Google. Accessed 29 June 2023. https:\/\/blog.google\/technology\/ai\/bard-google-ai-search-updates\/"},{"key":"6_CR3","first-page":"6000","volume":"30","author":"A Vaswani","year":"2017","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural. Inf. Process. Syst. 30, 6000\u20136010 (2017)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR4","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv:1810.04805"},{"key":"6_CR5","unstructured":"Brown, T., et al.: Language Models are Few-Shot Learners. arXiv:2005.14165"},{"key":"6_CR6","unstructured":"Narang, S., Chowdhery, A.: Pathways Language Model (PaLM): Scaling to 540 Billion Parameters for Breakthrough Performance. https:\/\/ai.googleblog.com\/2022\/04\/pathways-language-model-palm-scaling-to.html"},{"key":"6_CR7","unstructured":"Touvron, H., et al.: LLaMA: Open and efficient foundation language models. arXiv: 2302.13971v1 (2023)"},{"key":"6_CR8","unstructured":"Cerebras. Accessed 29 June 2023. https:\/\/www.cerebras.net\/blog\/cerebras-makes-it-easy-to-harness-the-predictive-power-of-gpt-j"},{"key":"6_CR9","unstructured":"Gao, L., et al.: The Pile: An 800GB Dataset of Diverse Text for Language Modeling. arXiv:2101.00027"},{"key":"6_CR10","unstructured":"Le Scao, T., et al.: BLOOM: A 176B-Parameter Open-Access Multilingual Language Model. arXiv:2211.05100"},{"key":"6_CR11","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I.: Improving language understanding by generative pre-training (2018)"},{"key":"6_CR12","doi-asserted-by":"crossref","unstructured":"Zhu, Y.: Aligning books and movies: towards story-like visual explanations by watching movies and reading books. In: Proceedings of the IEEE International Conference on Computer Vision, pp.19\u201327 (2015)","DOI":"10.1109\/ICCV.2015.11"},{"key":"6_CR13","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I.: Language models are unsupervised multitask learners (2019)"},{"key":"6_CR14","unstructured":"Brown, T.B., et al.: Langage models are few-shot learners, NeurIPS (2020)"},{"issue":"1","key":"6_CR15","first-page":"5485","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(1), 5485\u20135551 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"6_CR16","unstructured":"OpenAI, GPT-4 Technical Report. arXiv:2303.08774v3 (2023)"},{"key":"6_CR17","unstructured":"MosaicML, Introducing MPT-7B: A new standard for open-source, commercially usable LLMs, May 2023, accessed June 29, 2023, https:\/\/www.mosaicml.com\/blog\/mpt-7b"},{"key":"6_CR18","unstructured":"Pursuing groundbreaking scale and accelerating research using Meta\u2019s Research SuperCluster. Accessed 29 June 2023. https:\/\/ai.facebook.com\/blog\/supercomputer-meta-research-supercluster-2023\/"},{"key":"6_CR19","unstructured":"ChatGPT and generative AI are booming, but the costs can be extraordinary. Accessed 29 June 2023. https:\/\/www.cnbc.com\/2023\/03\/13\/chatgpt-and-generative-ai-are-booming-but-at-a-very-expensive-price.html#:~:text=Analysts%20and%20technologists%20estimate%20that,could%20cost%20over%20%244%20million"},{"key":"6_CR20","unstructured":"Frantar, E. Alistarh, D.: SparseGPT: Massive Language Models Can Be Accurately Pruned in One-Shot. arXiv:2301.00774 (2023)"},{"key":"6_CR21","unstructured":"Xiao, G., Lin. J., Seznec, M., Wu, H., Demouth, J., Han, S.: SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models. arXiv:2211.10438 (2023)"},{"key":"6_CR22","unstructured":"Hendrycks, D., et al.: Measuring massive multitask language understanding, ICLR (2021)"},{"key":"6_CR23","doi-asserted-by":"publisher","unstructured":"Dholakia, A., Ellison, D., Hodak, M., Dutta, D.: Benchmarking considerations for trustworthy and responsible AI (Panel). In: Nambiar, R., Poess, M., (eds) Performance Evaluation and Benchmarking. TPCTC 2022. Lecture Notes in Computer Science, vol 13860. Springer, Cham. https:\/\/doi.org\/10.1007\/978-3-031-29576-8_8","DOI":"10.1007\/978-3-031-29576-8_8"},{"key":"6_CR24","unstructured":"Bias and Toxicity in Large Language Models. Accessed 18 July 2023. https:\/\/www.cs.princeton.edu\/courses\/archive\/fall22\/cos597G\/lectures\/lec14.pdf"},{"key":"6_CR25","unstructured":"Liu, Y., Iter, D., Xu, Y., Wang, S., Xu, R., Zhu, C.: G-Eval: NLG Evaluation using GPT-4 with Better Human Alignment. arXiv:2303.16634"},{"key":"6_CR26","unstructured":"Perspective. Accessed 18 July 2023. https:\/\/support.perspectiveapi.com\/s\/about-the-api-faqs?language=en_US"},{"key":"6_CR27","doi-asserted-by":"publisher","unstructured":"Dholakia, A., Ellison, D., Hodak, M., Dutta, D.: Benchmarking Considerations for Trustworthy and Responsible AI (Panel). In: Nambiar, R., Poess, M. (eds) Performance Evaluation and Benchmarking. TPCTC 2022. Lecture Notes in Computer Science, vol 13860. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-29576-8_8","DOI":"10.1007\/978-3-031-29576-8_8"},{"key":"6_CR28","unstructured":"MLCommons. Accessed 29 2023. https:\/\/mlcommons.org\/en\/training-normal-30\/"},{"key":"6_CR29","doi-asserted-by":"publisher","unstructured":"Liu Olesiuk, Y., Hodak, M., Ellison, D., Dholakia, A.: More the merrier: comparative evaluation of TPCx-AI and MLPerf benchmarks for AI. In: Nambiar, R., Poess, M. (eds) Performance Evaluation and Benchmarking. TPCTC 2022. Lecture Notes in Computer Science, vol 13860. Springer, Cham. https:\/\/doi.org\/10.1007\/978-3-031-29576-8_5","DOI":"10.1007\/978-3-031-29576-8_5"},{"key":"6_CR30","unstructured":"Touvron, H., et al.: Llama 2: Open Foundation and Fine-Tuned Chat Models. arXiv: 2307.09288 (2023)"}],"container-title":["Lecture Notes in Computer Science","Performance Evaluation and Benchmarking"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-68031-1_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,21]],"date-time":"2024-09-21T06:02:58Z","timestamp":1726898578000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-68031-1_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031680304","9783031680311"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-68031-1_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"22 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TPCTC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Technology Conference on Performance Evaluation and Benchmarking","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vancouver, BC","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Canada","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tpctc2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}