{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T13:13:44Z","timestamp":1776950024295,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,5,4]]},"DOI":"10.1145\/3777884.3797017","type":"proceedings-article","created":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T12:27:26Z","timestamp":1776947246000},"page":"406-418","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["An Evaluation Study of Generative AI Systems: Framework-Aware Performance Under Real-World Constraints"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-4634-6795","authenticated-orcid":false,"given":"Abed","family":"Matinpour","sequence":"first","affiliation":[{"name":"Electrical Engineering and Computer Science Department, York University, Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9726-5408","authenticated-orcid":false,"given":"Farhoud","family":"Jafari Kaleibar","sequence":"additional","affiliation":[{"name":"Electrical Engineering and Computer Science Department, York University, Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3580-161X","authenticated-orcid":false,"given":"Sara","family":"Fehresti","sequence":"additional","affiliation":[{"name":"The School of Information Technology, York University, Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2591-213X","authenticated-orcid":false,"given":"Shaylin","family":"Ziaei","sequence":"additional","affiliation":[{"name":"Electrical Engineering and Computer Science Department, York University, Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0383-920X","authenticated-orcid":false,"given":"Marin","family":"Litoiu","sequence":"additional","affiliation":[{"name":"Electrical Engineering and Computer Science Department, York University, Toronto, Canada"}]}],"member":"320","published-online":{"date-parts":[[2026,5,3]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"May. ETL: Ensemble--at--Training for Low-cost Open-source Large Language Models. In 2025 15th International Conference on Electrical Engineering (ICEENG) (pp. 1--6). IEEE.","author":"Zaki A.M.","unstructured":"Zaki, A.M., Khalil, M.I. and Abbas, H., 2025, May. ETL: Ensemble--at--Training for Low-cost Open-source Large Language Models. In 2025 15th International Conference on Electrical Engineering (ICEENG) (pp. 1--6). IEEE."},{"key":"e_1_3_2_1_2_1","volume-title":"May. TELLM: Advancements in Knowledge Incorporation and Task-specific Enhancements of Large Language Models. In 2024 32nd International Conference on Electrical Engineering (ICEE) (pp. 1--5). IEEE.","author":"Feizi F.","unstructured":"Feizi, F., HosseinNia, A., Hemmatyar, M., Rahimi, F. and Kaleibar, F.J., 2024, May. TELLM: Advancements in Knowledge Incorporation and Task-specific Enhancements of Large Language Models. In 2024 32nd International Conference on Electrical Engineering (ICEE) (pp. 1--5). IEEE."},{"issue":"12","key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","first-page":"e202500282","DOI":"10.1002\/aisy.202500282","article-title":"Optimized DeepLabV3 for Clinical Data Analysis through Advanced Particle Swarm Optimization-Based Channel Selection","volume":"7","author":"Norouziazad A.","year":"2025","unstructured":"Norouziazad, A., Homam, B., Feygin, A., Najafpour Ghazvini Fardshad, M., Rozenblat, S., Matinpour, A., Laly, A., Esmaeildoost, F. and Salahandish, R., 2025. Optimized DeepLabV3 for Clinical Data Analysis through Advanced Particle Swarm Optimization-Based Channel Selection. Advanced Intelligent Systems, 7(12), p.e202500282.","journal-title":"Advanced Intelligent Systems"},{"key":"e_1_3_2_1_4_1","volume-title":"Integrated Intelligence: A Survey of Compound Al Systems. arXiv preprint arXiv:2506.04565.","author":"Chen J.","year":"2025","unstructured":"Chen, J., Ye, J. and Wang, G., 2025. From Standalone LLMs to Integrated Intelligence: A Survey of Compound Al Systems. arXiv preprint arXiv:2506.04565."},{"key":"e_1_3_2_1_5_1","volume-title":"Generative AI Systems: A Systems-based Perspective on Generative AI. arXiv preprint arXiv:2407.11001","author":"Jakub M.","year":"2024","unstructured":"Jakub M. Tomczak. 2024. Generative AI Systems: A Systems-based Perspective on Generative AI. arXiv preprint arXiv:2407.11001 (2024)."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the 33rd ACM International Conference on the Foundations of Software Engineering (pp. 1285--1288)","author":"Joshi O.","year":"2025","unstructured":"Joshi, O., 2025, June. Co-Intelligence in Software Engineering: Understanding and Optimizing GenAI Integration in Software Engineering for Skill Development in Time-Constrained Programming. In Proceedings of the 33rd ACM International Conference on the Foundations of Software Engineering (pp. 1285--1288)."},{"key":"e_1_3_2_1_7_1","volume-title":"Keheliya Gallaba, Filipe R. Cogo, Boyuan Chen, Haoxiang Zhang, Kishanthan Thangarajah, Gustavo A. Oliva, Jinqiu Lin, andWeiyi Shang.","author":"Ahmed E.","year":"2024","unstructured":"Ahmed E. Hassan, Dayi Lin, Gopi Krishnan Rajbahadur, Keheliya Gallaba, Filipe R. Cogo, Boyuan Chen, Haoxiang Zhang, Kishanthan Thangarajah, Gustavo A. Oliva, Jinqiu Lin, andWeiyi Shang. 2024. Rethinking Software Engineering in the Foundation Model Era: A Curated Catalogue of Challenges in the Development of Trustworthy FMware. arXiv preprint arXiv:2402.15943 (2024)."},{"key":"e_1_3_2_1_8_1","unstructured":"White J. 2024. Building living software systems with generative & agentic AI. arXiv preprint arXiv:2408.01768."},{"key":"e_1_3_2_1_9_1","volume-title":"November. Augmenting Automatic Root-Cause Identification with Incident Alerts Using LLM. In 2024 34th International Conference on Collaborative Advances in Software and COmputiNg (CASCON) (pp. 1--10)","author":"Sarda K.","unstructured":"Sarda, K., Namrud, Z., Watts, I., Shwartz, L., Nagar, S., Mohapatra, P. and Litoiu, M., 2024, November. Augmenting Automatic Root-Cause Identification with Incident Alerts Using LLM. In 2024 34th International Conference on Collaborative Advances in Software and COmputiNg (CASCON) (pp. 1--10). IEEE."},{"key":"e_1_3_2_1_10_1","volume-title":"Repocoder: Repository-level code completion through iterative retrieval and generation. arXiv preprint arXiv:2303.12570.","author":"Zhang F.","year":"2023","unstructured":"Zhang, F., Chen, B., Zhang, Y., Keung, J., Liu, J., Zan, D., Mao, Y., Lou, J.G. and Chen, W., 2023. Repocoder: Repository-level code completion through iterative retrieval and generation. arXiv preprint arXiv:2303.12570."},{"key":"e_1_3_2_1_11_1","volume-title":"LLM-Powered Architectures: Designing the Next Generation of Intelligent Software Systems","author":"Shethiya A.S.","unstructured":"Shethiya, A.S., 2023. LLM-Powered Architectures: Designing the Next Generation of Intelligent Software Systems. Academia Nexus Journal, 2(1)."},{"key":"e_1_3_2_1_12_1","volume-title":"ChatGPT, and other LLMs","author":"Ben A.","unstructured":"Ben A., 2023. Generative AI with LangChain: Build large language model (LLM) apps with Python, ChatGPT, and other LLMs. Packt Publishing Ltd."},{"key":"e_1_3_2_1_13_1","volume-title":"Architecture of Applications Powered by Large Language Models. Master's thesis","author":"Oleg E.","unstructured":"Oleg E., 2024. Architecture of Applications Powered by Large Language Models. Master's thesis. Metropolia University of Applied Sciences, Finland."},{"key":"e_1_3_2_1_14_1","volume-title":"IJGIS Fall of 2024 Conference. The New World Foundation.","author":"Malviya R.K.","unstructured":"Malviya, R.K., Javalkar, V. and Malviya, R., 2024, September. Scalability and performance benchmarking of langchain, llamaindex, and haystack for enterprise ai customer support systems. In IJGIS Fall of 2024 Conference. The New World Foundation."},{"key":"e_1_3_2_1_15_1","unstructured":"Milli\u00e8re R. 2023. The alignment problem in context. arXiv preprint arXiv:2311.02147."},{"key":"e_1_3_2_1_16_1","unstructured":"Guo P.F. Tsai Y.D. and Lin S.D. 2024. Benchmarking large language model uncertainty for prompt optimization. arXiv preprint arXiv:2409.10044."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3617946.3617951"},{"key":"e_1_3_2_1_18_1","volume-title":"International Conference on Smart Data Intelligence (pp. 645--664)","author":"Bansal S.","unstructured":"Bansal, S., Ishika, Muskan and Arya, G., 2025, January. Comparative Study of LLM Libraries: LangChain, LlamaIndex, OpenAI API, Hugging Face, and LoRA. In International Conference on Smart Data Intelligence (pp. 645--664). Singapore: Springer Nature Singapore."},{"key":"e_1_3_2_1_19_1","first-page":"4540","article-title":"Taskbench: Benchmarking large language models for task automation","volume":"37","author":"Shen Y.","year":"2024","unstructured":"Shen, Y., Song, K., Tan, X., Zhang, W., Ren, K., Yuan, S., Lu, W., Li, D. and Zhuang, Y., 2024. Taskbench: Benchmarking large language models for task automation. Advances in Neural Information Processing Systems, 37, pp.4540--4574.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","first-page":"78142","DOI":"10.52202\/075280-3414","article-title":"Benchmarking foundation models with language-model-asan- examiner","volume":"36","author":"Bai Y.","year":"2023","unstructured":"Bai, Y., Ying, J., Cao, Y., Lv, X., He, Y., Wang, X., Yu, J., Zeng, K., Xiao, Y., Lyu, H. and Zhang, J., 2023. Benchmarking foundation models with language-model-asan- examiner. Advances in Neural Information Processing Systems, 36, pp.78142--78167.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","volume-title":"Wildbench: Benchmarking llms with challenging tasks from real users in the wild. arXiv preprint arXiv:2406.04770","author":"Lin B.Y.","year":"2024","unstructured":"Lin, B.Y., Deng, Y., Chandu, K., Brahman, F., Ravichander, A., Pyatkin, V., Dziri, N., Bras, R.L. and Choi, Y., 2024. Wildbench: Benchmarking llms with challenging tasks from real users in the wild. arXiv preprint arXiv:2406.04770, 2024."},{"key":"e_1_3_2_1_22_1","unstructured":"Hendrycks D. Burns C. Basart S. Zou A. Mazeika M. Song D. and Steinhardt J. 2020. Measuring massive multitask language understanding. arXiv preprint arXiv:2009.03300."},{"key":"e_1_3_2_1_23_1","volume-title":"Livebench: A challenging, contamination-free llm benchmark. arXiv preprint arXiv:2406.19314, 4.","author":"White C.","year":"2024","unstructured":"White, C., Dooley, S., Roberts, M., Pal, A., Feuer, B., Jain, S., Shwartz-Ziv, R., Jain, N., Saifullah, K., Naidu, S. and Hegde, C., 2024. Livebench: A challenging, contamination-free llm benchmark. arXiv preprint arXiv:2406.19314, 4."},{"key":"e_1_3_2_1_24_1","volume-title":"Companion Proceedings of the 32nd ACM International Conference on the Foundations of Software Engineering (pp. 417--428)","author":"Goel D.","unstructured":"Goel, D., Husain, F., Singh, A., Ghosh, S., Parayil, A., Bansal, C., Zhang, X. and Rajmohan, S., 2024, July. X-lifecycle learning for cloud incident management using llms. In Companion Proceedings of the 32nd ACM International Conference on the Foundations of Software Engineering (pp. 417--428)."},{"key":"e_1_3_2_1_25_1","unstructured":"Mei L. Yao J. Ge Y. Wang Y. Bi B. Cai Y. Liu J. Li M. Li Z.Z. Zhang D. and Zhou C. 2025. A survey of context engineering for large language models. arXiv preprint arXiv:2507.13334."},{"key":"e_1_3_2_1_26_1","volume-title":"Titans: A Survey of Efficient LLM Inference Serving. arXiv preprint arXiv:2504.19720.","author":"Zhen R.","year":"2025","unstructured":"Zhen, R., Li, J., Ji, Y., Yang, Z., Liu, T., Xia, Q., Duan, X., Wang, Z., Huai, B. and Zhang, M., 2025. Taming the Titans: A Survey of Efficient LLM Inference Serving. arXiv preprint arXiv:2504.19720."},{"key":"e_1_3_2_1_27_1","unstructured":"Li Y. 2023. A practical survey on zero-shot prompt design for in-context learning. arXiv preprint arXiv:2309.13205."},{"key":"e_1_3_2_1_28_1","volume-title":"December. Towards Adaptive Context Management for Intelligent Conversational Question Answering. In International Conference on Advanced Data Mining and Applications (pp. 360--375)","author":"Perera M.M.","unstructured":"Perera, M.M., Mahmood, A., Wijethilake, K.E. and Sheng, Q.Z., 2024, December. Towards Adaptive Context Management for Intelligent Conversational Question Answering. In International Conference on Advanced Data Mining and Applications (pp. 360--375). Singapore: Springer Nature Singapore."},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of the 29th symposium on operating systems principles (pp. 611--626)","author":"Kwon W.","unstructured":"Kwon,W., Li, Z., Zhuang, S., Sheng, Y., Zheng, L., Yu, C.H., Gonzalez, J., Zhang, H. and Stoica, I., 2023, October. Efficient memory management for large language model serving with pagedattention. In Proceedings of the 29th symposium on operating systems principles (pp. 611--626)."},{"key":"e_1_3_2_1_30_1","unstructured":"Li Y. Yu D. Chen Y. and Gao J. 2023. Phi-2: The surprising power of small language models. Microsoft Research Technical Report. Available at: https:\/\/huggingface.co\/microsoft\/phi-2"},{"key":"e_1_3_2_1_31_1","unstructured":"Jiang A. Sablayrolles A. Mensch A. et al. 2023. Mistral 7B. arXiv preprint arXiv:2310.06825. Available at: https:\/\/huggingface.co\/mistralai\/Mistral-7BInstruct-v0.2"},{"key":"e_1_3_2_1_32_1","unstructured":"Yu L. Li Y. et al. 2023. MetaMath: Improving Mathematical Reasoning in Language Models. arXiv preprint arXiv:2309.12284. Available at: https:\/\/huggingface.co\/meta-math\/MetaMath-Mistral-7B"},{"key":"e_1_3_2_1_33_1","unstructured":"Touvron H. Martin L. Stone K. et al. 2023. LLaMA 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288. Available at: https:\/\/huggingface.co\/meta-llama\/Llama-2--7b-chat-hf"},{"key":"e_1_3_2_1_34_1","unstructured":"Meta AI 2024. The LLaMA 3.1 Language Models. Technical report. Available at: https:\/\/huggingface.co\/nvidia\/Meta-Llama-3.1--8B-Instruct-ONNX-INT4"},{"key":"e_1_3_2_1_35_1","unstructured":"DeepSeek-AI 2024. DeepSeek LLM: Scaling Open-Source Language Models. Technical report. Available at: https:\/\/huggingface.co\/deepseek-ai\/deepseek-llm-7bbase"},{"key":"e_1_3_2_1_36_1","unstructured":"DeepSeek-AI 2024. DeepSeek-R1: Reasoning Models via Distillation. Technical report. Available at: https:\/\/huggingface.co\/deepseek-ai\/DeepSeek-R1-Distill-7B"},{"key":"e_1_3_2_1_37_1","unstructured":"Jiang A. Sablayrolles A. Mensch A. et al. 2023. Mistral 7B (Quantized Variant). Derived from Mistral 7B. Available at: https:\/\/huggingface.co\/mistralai\/Mistral-7B-Instruct-v0.2"}],"event":{"name":"ICPE '26: 17th ACM\/SPEC International Conference on Performance Engineering","location":"Florence Italy","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","SIGMETRICS ACM Special Interest Group on Measurement and Evaluation","SPEC"]},"container-title":["Proceedings of the 17th ACM\/SPEC International Conference on Performance Engineering"],"original-title":[],"deposited":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T12:27:58Z","timestamp":1776947278000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3777884.3797017"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,3]]},"references-count":37,"alternative-id":["10.1145\/3777884.3797017","10.1145\/3777884"],"URL":"https:\/\/doi.org\/10.1145\/3777884.3797017","relation":{},"subject":[],"published":{"date-parts":[[2026,5,3]]},"assertion":[{"value":"2026-05-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}