{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T15:05:18Z","timestamp":1763910318129,"version":"3.41.0"},"reference-count":3,"publisher":"Association for Computing Machinery (ACM)","issue":"4","license":[{"start":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T00:00:00Z","timestamp":1743120000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":["Commun. ACM"],"published-print":{"date-parts":[[2025,4]]},"abstract":"<jats:p>Plenty of benchmarks exist to measure artificial intelligence, but getting an accurate view of how good it is at certain tasks is more challenging than one might think.<\/jats:p>","DOI":"10.1145\/3708972","type":"journal-article","created":{"date-parts":[[2025,3,20]],"date-time":"2025-03-20T14:39:21Z","timestamp":1742481561000},"page":"15-17","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["How Do You Measure AI?"],"prefix":"10.1145","volume":"68","author":[{"given":"Logan","family":"Kugler","sequence":"first","affiliation":[{"name":"Tampa, Tampa, Florida, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,3,28]]},"reference":[{"key":"e_1_3_1_2_1","unstructured":"Achiam J. et al GPT-4 Technical Report arXiv Mar. 13 2023; https:\/\/arxiv.org\/abs\/2303.08774"},{"key":"e_1_3_1_3_1","unstructured":"LMSYS Chatbot Arena Leaderboard Large Model Systems Organization https:\/\/lmarena.ai"},{"key":"e_1_3_1_4_1","unstructured":"McKeown K. et al Reading Subtext: Evaluating large language models on short story summarization with writers arXiv Mar. 2 2024; https:\/\/arxiv.org\/abs\/2403.01061"}],"container-title":["Communications of the ACM"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3708972","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3708972","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:55Z","timestamp":1750295875000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3708972"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,28]]},"references-count":3,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["10.1145\/3708972"],"URL":"https:\/\/doi.org\/10.1145\/3708972","relation":{},"ISSN":["0001-0782","1557-7317"],"issn-type":[{"type":"print","value":"0001-0782"},{"type":"electronic","value":"1557-7317"}],"subject":[],"published":{"date-parts":[[2025,3,28]]},"assertion":[{"value":"2025-03-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}