{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T02:47:04Z","timestamp":1776134824932,"version":"3.50.1"},"reference-count":78,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T00:00:00Z","timestamp":1770595200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100003708","name":"Korea Institute of Science and Technology Information","doi-asserted-by":"publisher","award":["K26L3M1C1"],"award-info":[{"award-number":["K26L3M1C1"]}],"id":[{"id":"10.13039\/501100003708","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Future Generation Computer Systems"],"published-print":{"date-parts":[[2026,8]]},"DOI":"10.1016\/j.future.2026.108423","type":"journal-article","created":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T16:01:47Z","timestamp":1770739307000},"page":"108423","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["SpectraBench: A three-stage evolution framework for intelligent large language model evaluation"],"prefix":"10.1016","volume":"181","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5676-7556","authenticated-orcid":false,"given":"Gunwoo","family":"Lee","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9391-4028","authenticated-orcid":false,"given":"Rae-Young","family":"Jang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9098-3001","authenticated-orcid":false,"given":"Sang-Hwan","family":"Gwak","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6929-0825","authenticated-orcid":false,"given":"Kyong-Ha","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5142-6106","authenticated-orcid":false,"given":"Ryong","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.future.2026.108423_bib0001","series-title":"Proceedings of the 29th Symposium on Operating Systems Principles","first-page":"611","article-title":"Efficient memory management for large language model serving with PagedAttention","author":"Kwon","year":"2023"},{"key":"10.1016\/j.future.2026.108423_bib0002","series-title":"Proceedings of the 7th Conference on Machine Learning and Systems (MLSys)","article-title":"Punica: multi-tenant LoRA serving","author":"Chen","year":"2024"},{"key":"10.1016\/j.future.2026.108423_bib0003","series-title":"18th USENIX Symposium on Operating Systems Design and Implementation (OSDI \u201924)","first-page":"173","article-title":"Llumnix: dynamic scheduling for large language model serving","author":"Sun","year":"2024"},{"key":"10.1016\/j.future.2026.108423_bib0004","series-title":"Advances in Neural Information Processing Systems","first-page":"10088","article-title":"QLoRA: efficient finetuning of quantized LLMs","volume":"36","author":"Dettmers","year":"2023"},{"key":"10.1016\/j.future.2026.108423_bib0005","series-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","first-page":"559","article-title":"Alpa: automating inter-and intra-operator parallelism for distributed deep learning","author":"Zheng","year":"2022"},{"issue":"6","key":"10.1016\/j.future.2026.108423_bib0006","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3530811","article-title":"Efficient transformers: a survey","volume":"55","author":"Tay","year":"2022","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.future.2026.108423_bib0007","series-title":"Performance Modeling and Design of Computer Systems: Queueing Theory in Action","author":"Harchol-Balter","year":"2013"},{"issue":"9","key":"10.1016\/j.future.2026.108423_bib0008","doi-asserted-by":"crossref","first-page":"1175","DOI":"10.1109\/12.57058","article-title":"Priority inheritance protocols: an approach to real-time synchronization","volume":"39","author":"Sha","year":"1990","journal-title":"IEEE Trans. Comput."},{"key":"10.1016\/j.future.2026.108423_bib0009","series-title":"Building Microservices: Designing Fine-Grained Systems","author":"Newman","year":"2015"},{"key":"10.1016\/j.future.2026.108423_bib0010","unstructured":"J. Kaplan, S. McCandlish, T. Henighan, T.B. Brown, B. Chess, R. Child, S. Gray, A. Radford, J. Wu, D. Amodei, Scaling laws for neural language models, (2020). 10.48550\/arXiv.2001.08361."},{"key":"10.1016\/j.future.2026.108423_bib0011","series-title":"Advances in Neural Information Processing Systems","first-page":"30016","article-title":"Training compute-optimal large language models","volume":"35","author":"Hoffmann","year":"2022"},{"key":"10.1016\/j.future.2026.108423_bib0012","series-title":"Proceedings of the 15th ACM Workshop on Hot Topics in Networks","first-page":"50","article-title":"Resource management with deep reinforcement learning","author":"Mao","year":"2016"},{"key":"10.1016\/j.future.2026.108423_bib0013","series-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI \u201918)","first-page":"561","article-title":"Ray: a distributed framework for emerging AI applications","author":"Moritz","year":"2018"},{"issue":"1","key":"10.1016\/j.future.2026.108423_bib0014","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1023\/A:1010933404324","article-title":"Random forests","volume":"45","author":"Breiman","year":"2001","journal-title":"Mach. Learn."},{"key":"10.1016\/j.future.2026.108423_bib0015","series-title":"Advances in Neural Information Processing Systems","article-title":"Judging LLM-as-a-Judge with MT-Bench and chatbot arena","volume":"36","author":"Zheng","year":"2024"},{"key":"10.1016\/j.future.2026.108423_bib0016","unstructured":"L. Gao, J. Tow, S. Biderman, S. Black, A. DiPofi, C. Foster, L. Golding, J. Hsu, K. McDonell, N. Muennighoff, et al., A framework for few-shot language model evaluation, 2021. 10.5281\/zenodo.10256836."},{"key":"10.1016\/j.future.2026.108423_bib0017","series-title":"International Conference on Learning Representations (ICLR)","article-title":"GLUE: a multi-task benchmark and analysis platform for natural language understanding","author":"Wang","year":"2019"},{"key":"10.1016\/j.future.2026.108423_bib0018","series-title":"Advances in Neural Information Processing Systems","first-page":"3261","article-title":"SuperGLUE: a stickier benchmark for general-Purpose language understanding systems","volume":"32","author":"Wang","year":"2019"},{"key":"10.1016\/j.future.2026.108423_bib0019","unstructured":"O. Contributors, OpenCompass: a universal evaluation platform for foundation models, 2023. 10.5281\/zenodo.8271397."},{"issue":"1","key":"10.1016\/j.future.2026.108423_bib0020","doi-asserted-by":"crossref","first-page":"140","DOI":"10.1111\/nyas.15007","article-title":"Holistic evaluation of language models","volume":"1525","author":"Liang","year":"2023","journal-title":"Ann. N. Y. Acad. Sci."},{"key":"10.1016\/j.future.2026.108423_bib0021","unstructured":"Y. Wang, X. Ma, G. Zhang, Y. Ni, A. Chandra, S. Guo, W. Ren, A. Wang, Y. Zhang, K. Arulraj, et al., MMLU-Pro: a more robust and challenging multi-task language understanding benchmark, (2024). 10.48550\/arXiv.2406.01574."},{"key":"10.1016\/j.future.2026.108423_bib0022","doi-asserted-by":"crossref","unstructured":"Y. Wang, H. Le, A.D. Gotmare, N.D.Q. Bui, J. Li, S.C.H. Hoi, CodeT5+: open code large language models for code understanding and generation, (2023). 10.48550\/arXiv.2305.07922.","DOI":"10.18653\/v1\/2023.emnlp-main.68"},{"key":"10.1016\/j.future.2026.108423_bib0023","unstructured":"R. Li, L.B. Allal, Y. Zi, N. Muennighoff, D. Kocetkov, C. Mou, M. Marone, C. Akiki, J. Li, J. Chim, et al., StarCoder: may the source be with you!, (2023a). 10.48550\/arXiv.2305.06161."},{"key":"10.1016\/j.future.2026.108423_bib0024","series-title":"International Conference on Machine Learning","first-page":"19730","article-title":"BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models","author":"Li","year":"2023"},{"key":"10.1016\/j.future.2026.108423_bib0025","series-title":"Advances in Neural Information Processing Systems","article-title":"Visual instruction tuning","volume":"36","author":"Liu","year":"2024"},{"key":"10.1016\/j.future.2026.108423_bib0026","unstructured":"Y. Qin, S. Liang, Y. Ye, K. Zhu, L. Yan, Y. Lu, Y. Lin, X. Cai, Z. Tang, Z. Liu, et al., ToolLLM: facilitating large language models to master 16000+ real-world APIs, (2024). 10.48550\/arXiv.2307.16789."},{"key":"10.1016\/j.future.2026.108423_bib0027","series-title":"Advances in Neural Information Processing Systems","first-page":"16344","article-title":"FlashAttention: fast and memory-efficient exact attention with IO-awareness","volume":"35","author":"Dao","year":"2022"},{"key":"10.1016\/j.future.2026.108423_bib0028","unstructured":"T. Dao, FlashAttention-2: faster attention with better parallelism and work partitioning, (2023). 10.48550\/arXiv.2307.08691."},{"key":"10.1016\/j.future.2026.108423_bib0029","series-title":"International Conference on Learning Representations","article-title":"GPTQ: accurate post-training quantization for generative pre-trained transformers","author":"Frantar","year":"2023"},{"key":"10.1016\/j.future.2026.108423_bib0030","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"7989","article-title":"White-box quantization through layerwise knowledge distillation","author":"Nagel","year":"2021"},{"issue":"12","key":"10.1016\/j.future.2026.108423_bib0031","doi-asserted-by":"crossref","first-page":"54","DOI":"10.1145\/3381831","article-title":"Green AI","volume":"63","author":"Schwartz","year":"2020","journal-title":"Commun. ACM"},{"key":"10.1016\/j.future.2026.108423_bib0032","series-title":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","first-page":"3645","article-title":"Energy and policy considerations for deep learning in NLP","author":"Strubell","year":"2019"},{"issue":"26","key":"10.1016\/j.future.2026.108423_bib0033","article-title":"Explaining neural scaling laws","volume":"121","author":"Bahri","year":"2024","journal-title":"Proc. Nat. Acad. Sci."},{"key":"10.1016\/j.future.2026.108423_bib0034","series-title":"Advances in Neural Information Processing Systems","first-page":"1340","article-title":"A deep reinforcement learning approach for job scheduling","author":"Mao","year":"2016"},{"key":"10.1016\/j.future.2026.108423_bib0035","series-title":"Proceedings of the ACM Special Interest Group on Data Communication","first-page":"270","article-title":"Learning scheduling algorithms for data processing clusters","author":"Mao","year":"2019"},{"key":"10.1016\/j.future.2026.108423_bib0036","series-title":"International Conference on Learning Representations","article-title":"Large batch optimization for deep learning: training BERT in 76 min","author":"You","year":"2020"},{"key":"10.1016\/j.future.2026.108423_bib0037","series-title":"2025 USENIX Annual Technical Conference (USENIX ATC \u201925)","first-page":"1","article-title":"LLMStation: resource multiplexing in tuning and serving large language models","author":"He","year":"2025"},{"key":"10.1016\/j.future.2026.108423_bib0038","series-title":"Proceedings of the 8th Conference on Machine Learning and Systems (MLSys)","article-title":"SOLA: optimizing SLO attainment for large language model serving with state-aware scheduling","author":"Guo","year":"2025"},{"key":"10.1016\/j.future.2026.108423_bib0039","series-title":"Proceedings of the 30th International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)","first-page":"1","article-title":"TAPAS: thermal- and power-aware scheduling for LLM inference in cloud platforms","author":"Zhao","year":"2025"},{"key":"10.1016\/j.future.2026.108423_bib0040","series-title":"Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining","first-page":"2623","article-title":"Optuna: a next-generation hyperparameter optimization framework","author":"Akiba","year":"2019"},{"key":"10.1016\/j.future.2026.108423_bib0041","series-title":"International Conference on Machine Learning","first-page":"1437","article-title":"BOHB: robust and efficient hyperparameter optimization at scale","author":"Falkner","year":"2018"},{"key":"10.1016\/j.future.2026.108423_bib0042","series-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","first-page":"1","article-title":"Efficient large-scale language model training on GPU clusters using megatron-LM","author":"Narayanan","year":"2021"},{"key":"10.1016\/j.future.2026.108423_bib0043","series-title":"Proceedings of the 44th Annual International Symposium on Computer Architecture","first-page":"1","article-title":"In-datacenter performance analysis of a tensor processing unit","author":"Jouppi","year":"2017"},{"key":"10.1016\/j.future.2026.108423_bib0044","series-title":"Proceedings of the 50th Annual International Symposium on Computer Architecture","first-page":"1","article-title":"TPU v4: an optically reconfigurable supercomputer for machine learning with hardware support for embeddings","author":"Jouppi","year":"2023"},{"key":"10.1016\/j.future.2026.108423_bib0045","series-title":"2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA)","first-page":"446","article-title":"MLPerf inference benchmark","author":"Reddi","year":"2020"},{"key":"10.1016\/j.future.2026.108423_bib0046","series-title":"13th USENIX Symposium on Networked Systems Design and Implementation (NSDI 16)","first-page":"363","article-title":"Ernest: efficient performance prediction for large-scale advanced analytics","author":"Venkataraman","year":"2016"},{"key":"10.1016\/j.future.2026.108423_bib0047","series-title":"Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing (EMNLP)","first-page":"2576","article-title":"Collaborative performance prediction for large language models","author":"Zhang","year":"2024"},{"key":"10.1016\/j.future.2026.108423_bib0048","series-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI)","first-page":"595","article-title":"Gandiva: introspective cluster scheduling for deep learning","author":"Xiao","year":"2018"},{"key":"10.1016\/j.future.2026.108423_bib0049","series-title":"17th USENIX Symposium on Networked Systems Design and Implementation (NSDI)","first-page":"485","article-title":"Tiresias: a GPU cluster manager for distributed deep learning","author":"Gu","year":"2020"},{"key":"10.1016\/j.future.2026.108423_bib0050","series-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI)","first-page":"447","article-title":"Salus: fine-grained GPU sharing primitives for deep learning applications","author":"Yu","year":"2019"},{"key":"10.1016\/j.future.2026.108423_bib0051","series-title":"Proceedings of the 29th ACM Symposium on Operating Systems Principles (SOSP)","first-page":"611","article-title":"PagedAttention: efficient memory management for large language model serving","author":"Korthikanti","year":"2023"},{"key":"10.1016\/j.future.2026.108423_bib0052","series-title":"18th USENIX Symposium on Operating Systems Design and Implementation (OSDI)","article-title":"Punica: multi-tenant LLM serving with dynamic LoRA composition","author":"Shi","year":"2024"},{"key":"10.1016\/j.future.2026.108423_bib0053","series-title":"21st USENIX Symposium on Networked Systems Design and Implementation (NSDI)","article-title":"SOLA: serving large language models in SLO-Aware manner","author":"Yang","year":"2024"},{"key":"10.1016\/j.future.2026.108423_bib0054","series-title":"17th USENIX Symposium on Operating Systems Design and Implementation (OSDI)","article-title":"TAPAS: a token-level adaptive scheduler for LLM inference","author":"Liu","year":"2023"},{"issue":"5","key":"10.1016\/j.future.2026.108423_bib0055","doi-asserted-by":"crossref","first-page":"1189","DOI":"10.1214\/aos\/1013203451","article-title":"Greedy function approximation: a gradient boosting machine","volume":"29","author":"Friedman","year":"2001","journal-title":"Ann. Stat."},{"key":"10.1016\/j.future.2026.108423_bib0056","series-title":"Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","first-page":"785","article-title":"XGBoost: a scalable tree boosting system","author":"Chen","year":"2016"},{"key":"10.1016\/j.future.2026.108423_bib0057","series-title":"Deep Learning","author":"Goodfellow","year":"2016"},{"issue":"3","key":"10.1016\/j.future.2026.108423_bib0058","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1023\/A:1022627411411","article-title":"Support-vector networks","volume":"20","author":"Cortes","year":"1995","journal-title":"Mach. Learn."},{"key":"10.1016\/j.future.2026.108423_bib0059","unstructured":"NAVER Cloud, HyperCLOVA X Team, HyperCLOVA X SEED, 2024, (https:\/\/huggingface.co\/nampdn-ai\/hyperclovax-seed). Hugging Face model card."},{"key":"10.1016\/j.future.2026.108423_bib0060","unstructured":"K.L. Team, Y. Bak, H. Lee, M. Ryu, J. Ham, S. Jung, D.W. Nam, T. Eo, D. Lee, D. Jung, B. Kim, N. Kim, J. Park, H. Ko, et al., Kanana: compute-efficient bilingual language models, 2025. https:\/\/arxiv.org\/abs\/2502.18934."},{"key":"10.1016\/j.future.2026.108423_bib0061","unstructured":"LG AI Research, EXAONE-3.5-2.4B-instruct, 2024, (https:\/\/huggingface.co\/LGAI-EXAONE\/EXAONE-3.5-2.4B-Instruct). Hugging Face model card."},{"key":"10.1016\/j.future.2026.108423_bib0062","unstructured":"ETRI LIRS, EAGLE-3B preview, 2025, (https:\/\/huggingface.co\/etri-lirs\/eagle-3b-preview). Hugging Face model card."},{"key":"10.1016\/j.future.2026.108423_bib0063","unstructured":"G. Team, Gemma 3 technical report, 2025, (https:\/\/ai.google.dev\/gemma\/docs\/gemma-3). Google technical report."},{"key":"10.1016\/j.future.2026.108423_bib0064","unstructured":"A.Q. Jiang, et al., Mistral 7B, (2023). https:\/\/arxiv.org\/abs\/2310.06825."},{"key":"10.1016\/j.future.2026.108423_bib0065","unstructured":"J. Bai, et al., Qwen3 technical report, (2025). https:\/\/arxiv.org\/abs\/2502.04291."},{"key":"10.1016\/j.future.2026.108423_bib0066","unstructured":"A. Grattafiori, et al., The Lama 3 herd of models, (2024). https:\/\/arxiv.org\/abs\/2407.21783."},{"key":"10.1016\/j.future.2026.108423_bib0067","unstructured":"T. Zhang, et al., DNA 1.0: a family of codesigned tokenizers and models, (2024). Describes Llama-DNA-8B, https:\/\/arxiv.org\/abs\/2410.04709."},{"key":"10.1016\/j.future.2026.108423_bib0068","unstructured":"Luxia labs, LUXIA-21.4B, 2024, (https:\/\/huggingface.co\/luxia-labs\/luxia-21.4b). Hugging Face model card."},{"key":"10.1016\/j.future.2026.108423_bib0069","unstructured":"LG AI Research, EXAONE-3.5-32B-instruct, 2024, (https:\/\/huggingface.co\/LGAI-EXAONE\/EXAONE-3.5-32B-Instruct). Hugging Face model card."},{"key":"10.1016\/j.future.2026.108423_bib0070","unstructured":"W.I. Choi, et al., KMMLU: measuring massive multitask language understanding in Korean, (2024). https:\/\/arxiv.org\/abs\/2503.00585."},{"key":"10.1016\/j.future.2026.108423_bib0071","unstructured":"J. Yoon, et al., K-hard: a hard test set for Korean language understanding, (2025). https:\/\/arxiv.org\/abs\/2506.07785."},{"key":"10.1016\/j.future.2026.108423_bib0072","unstructured":"D. Hendrycks, et al., Measuring massive multitask language understanding, (2020). https:\/\/arxiv.org\/abs\/2009.03300."},{"key":"10.1016\/j.future.2026.108423_bib0073","series-title":"Proceedings of EMNLP Workshop (NAACL 2018)","article-title":"Think you have solved question answering? Try ARC, the AI2 reasoning challenge","author":"Clark","year":"2018"},{"key":"10.1016\/j.future.2026.108423_bib0074","doi-asserted-by":"crossref","unstructured":"R. Zellers, et al., HellaSwag: can a machine really finish your sentence?, in: ACL, 2019. https:\/\/arxiv.org\/abs\/1905.07830.","DOI":"10.18653\/v1\/P19-1472"},{"key":"10.1016\/j.future.2026.108423_bib0075","unstructured":"M. Kim, et al., KoBEST: Korean balanced evaluation of NLP systems, (2022). https:\/\/arxiv.org\/abs\/2204.04541."},{"key":"10.1016\/j.future.2026.108423_bib0076","series-title":"LREC-COLING","article-title":"HAE-RAE: real-world knowledge-aware Korean instruction dataset and benchmark for LLMs","author":"Oh","year":"2024"},{"key":"10.1016\/j.future.2026.108423_bib0077","unstructured":"M. Kim, et al., KorMedMCQA: a comprehensive benchmark for Korean medical multiple-choice QA, (2024). https:\/\/arxiv.org\/abs\/2403.01469."},{"key":"10.1016\/j.future.2026.108423_bib0078","unstructured":"YBLG, CSATQA: Korean college scholastic ability test QA dataset, 2024, (https:\/\/huggingface.co\/datasets\/YBLG\/CSATQA). Hugging Face dataset card."}],"container-title":["Future Generation Computer Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167739X26000579?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167739X26000579?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T01:57:06Z","timestamp":1776131826000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167739X26000579"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,8]]},"references-count":78,"alternative-id":["S0167739X26000579"],"URL":"https:\/\/doi.org\/10.1016\/j.future.2026.108423","relation":{},"ISSN":["0167-739X"],"issn-type":[{"value":"0167-739X","type":"print"}],"subject":[],"published":{"date-parts":[[2026,8]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"SpectraBench: A three-stage evolution framework for intelligent large language model evaluation","name":"articletitle","label":"Article Title"},{"value":"Future Generation Computer Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.future.2026.108423","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Author(s). Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"108423"}}