{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T07:46:05Z","timestamp":1766043965954,"version":"3.48.0"},"publisher-location":"Cham","reference-count":57,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783032054340"},{"type":"electronic","value":"9783032054357"}],"license":[{"start":{"date-parts":[[2025,9,12]],"date-time":"2025-09-12T00:00:00Z","timestamp":1757635200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,12]],"date-time":"2025-09-12T00:00:00Z","timestamp":1757635200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-05435-7_25","type":"book-chapter","created":{"date-parts":[[2025,9,13]],"date-time":"2025-09-13T01:30:51Z","timestamp":1757727051000},"page":"457-476","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Statistical Runtime Verification for\u00a0LLMs via\u00a0Robustness Estimation"],"prefix":"10.1007","author":[{"given":"Natan","family":"Levy","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Adiel","family":"Ashrov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guy","family":"Katz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,9,12]]},"reference":[{"key":"25_CR1","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1007\/978-3-642-04898-2_118","volume":"1","author":"T Anderson","year":"2011","unstructured":"Anderson, T.: Anderson-darling tests of goodness-of-fit. Int. Encyclopedia Stat. Sci. 1, 52\u201354 (2011)","journal-title":"Int. Encyclopedia Stat. Sci."},{"key":"25_CR2","unstructured":"BERT-Base-Uncased (2023). https:\/\/huggingface.co\/google-bert\/bert-base-uncased"},{"key":"25_CR3","unstructured":"B\u0113rzi\u0146\u0161, J., Kalni\u0146a, E.: Robustness of pre-trained language models against adversarial attacks. MZ Comput. J. 5(2) (2024)"},{"issue":"377","key":"25_CR4","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1080\/01621459.1982.10477788","volume":"77","author":"G Box","year":"1982","unstructured":"Box, G., Cox, D.: An analysis of transformations revisited, rebutted. J. Am. Stat. Assoc. 77(377), 209\u2013210 (1982)","journal-title":"J. Am. Stat. Assoc."},{"key":"25_CR5","unstructured":"Brix, C., Bak, S., Liu, C., Johnson, T.: The Fourth Int. Verification of Neural Networks Competition (VNN-COMP): Summary and Results. Technical report (2023). https:\/\/arxiv.org\/abs\/2312.16760"},{"key":"25_CR6","unstructured":"Carlini, N., Katz, G., Barrett, C., Dill, D.: Provably Minimally-Distorted Adversarial Examples. Technical report (2017). https:\/\/arxiv.org\/abs\/1709.10207"},{"key":"25_CR7","doi-asserted-by":"crossref","unstructured":"Cheong, I., Xia, K., Feng, K.K., Chen, Q.Z., Zhang, A.X.: (A) i am not a lawyer, but...: engaging legal experts towards responsible LLM policies for legal advice. In: Proceedings of the ACM Conference on Fairness, Accountability, and Transparency (FACCT), pp. 2454\u20132469 (2024)","DOI":"10.1145\/3630106.3659048"},{"key":"25_CR8","unstructured":"Cohen, J., Rosenfeld, E., Kolter, Z.: Certified adversarial robustness via randomized smoothing. In: Proceedings of the 36th International Conference on Machine Learning (ICML), pp. 1310\u20131320 (2019)"},{"key":"25_CR9","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: Pre-Training of Deep Bidirectional Transformers for Language Understanding, Technical report (2018). http:\/\/arxiv.org\/abs\/1810.04805"},{"key":"25_CR10","unstructured":"Dong, X., Luu, A.T., Ji, R., Liu, H.: Towards Robustness Against Natural Language Word Substitutions, Technical report (2021). https:\/\/arxiv.org\/abs\/2107.13541"},{"key":"25_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijpharm.2023.123741","volume":"652","author":"M Elbadawi","year":"2024","unstructured":"Elbadawi, M., Li, H., Basit, A.W., Gaisford, S.: The role of artificial intelligence in generating original scientific research. Int. J. Pharmaceut. 652, 123741 (2024)","journal-title":"Int. J. Pharmaceut."},{"key":"25_CR12","unstructured":"Goodfellow, I., Shlens, J., Szegedy, C.: Explaining and Harnessing Adversarial Examples, Technical report (2014). http:\/\/arxiv.org\/abs\/1412.6572"},{"key":"25_CR13","unstructured":"Guo, C., Pleiss, G., Sun, Y., Weinberger, Q.: On calibration of modern neural networks. In: Proceedings of the 34th International Conference on Machine Learning, pp. 1321\u20131330 (2017)"},{"key":"25_CR14","unstructured":"Hadar, A., Levy, N., Winokur, M.: Management and Detection System for Medical Surgical Equipment, Technical report (2022). http:\/\/arxiv.org\/abs\/2211.02351"},{"key":"25_CR15","doi-asserted-by":"crossref","unstructured":"Hashemi, V., K\u0159et\u00ednsk\u1ef3, J., Rieder, S., Sch\u00f6n, T., Vorhoff, J.: Gaussian-based and outside-the-box runtime monitoring join forces. In: Proceedings of the 24th International Conference on Runtime Verification, pp. 218\u2013228 (2024)","DOI":"10.1007\/978-3-031-74234-7_14"},{"key":"25_CR16","doi-asserted-by":"crossref","unstructured":"He, W., Wu, C., Bensalem, S.: Box-based monitor approach for out-of-distribution detection in YOLO: an exploratory study. In: Proceedings of the 24th International Conference on Runtime Verification (RV), pp. 229\u2013239 (2024)","DOI":"10.1007\/978-3-031-74234-7_15"},{"key":"25_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1007\/978-3-030-86362-3_7","volume-title":"Artificial Neural Networks and Machine Learning \u2013 ICANN 2021","author":"C Huang","year":"2021","unstructured":"Huang, C., Hu, Z., Huang, X., Pei, K.: Statistical certification of acceptable robustness for neural networks. In: Farka\u0161, I., Masulli, P., Otte, S., Wermter, S. (eds.) ICANN 2021. LNCS, vol. 12891, pp. 79\u201390. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86362-3_7"},{"key":"25_CR18","doi-asserted-by":"crossref","unstructured":"Huang, Y., Sansom, J., Ma, Z., Gervits, F., Chai, J.: Drivlme: enhancing LLM- based autonomous driving agents with embodied and social experiences. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 3153\u20133160. IEEE (2024)","DOI":"10.1109\/IROS58592.2024.10802555"},{"key":"25_CR19","doi-asserted-by":"crossref","unstructured":"Jin, D., Jin, Z., Zhou, J.T., Szolovits, P.: Is Bert really robust? A strong baseline for natural language attack on text classification and entailment. In: Proceedings 34th of the AAAI Conference on Artificial Intelligence, pp. 8018\u20138025 (2020)","DOI":"10.1609\/aaai.v34i05.6311"},{"key":"25_CR20","doi-asserted-by":"crossref","unstructured":"Jones, E., Jia, R., Raghunathan, A., Liang, P.: Robust Encodings: A Framework for Combating Adversarial Typos, Technical report (2020). https:\/\/arxiv.org\/abs\/2005.01229","DOI":"10.18653\/v1\/2020.acl-main.245"},{"issue":"3","key":"25_CR21","doi-asserted-by":"publisher","first-page":"598","DOI":"10.2514\/1.G003724","volume":"42","author":"D Julian","year":"2019","unstructured":"Julian, D., Kochenderfer, J., Owen, P.: Deep neural network compression for aircraft collision avoidance systems. J. Guid. Control. Dyn. 42(3), 598\u2013608 (2019)","journal-title":"J. Guid. Control. Dyn."},{"key":"25_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1007\/978-3-319-63387-9_5","volume-title":"Computer Aided Verification","author":"G Katz","year":"2017","unstructured":"Katz, G., Barrett, C., Dill, D.L., Julian, K., Kochenderfer, M.J.: Reluplex: an efficient SMT solver for verifying deep neural networks. In: Majumdar, R., Kun\u010dak, V. (eds.) CAV 2017. LNCS, vol. 10426, pp. 97\u2013117. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-63387-9_5"},{"key":"25_CR23","doi-asserted-by":"crossref","unstructured":"Katz, G., Barrett, C., Dill, D., Julian, K., Kochenderfer, M.: Reluplex: a calculus for reasoning about deep neural networks. Formal Methods in System Design (FMSD) (2021)","DOI":"10.1007\/s10703-021-00363-7"},{"key":"25_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1007\/978-3-030-25540-4_26","volume-title":"Computer Aided Verification","author":"G Katz","year":"2019","unstructured":"Katz, G., et al.: The marabou framework for verification and analysis of deep neural networks. In: Dillig, I., Tasiran, S. (eds.) CAV 2019. LNCS, vol. 11561, pp. 443\u2013452. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-25540-4_26"},{"key":"25_CR25","doi-asserted-by":"crossref","unstructured":"Katz, G., Levy, N., Refaeli, I., Yerushalmi, R.: DEM: a method for certifying deep neural network classifier outputs in aerospace. In: Proceedings of the 43rd Digital Avionics Systems Conference\u00a0(DASC) (2024)","DOI":"10.1109\/DASC62030.2024.10748779"},{"key":"25_CR26","unstructured":"Kim, H., Papamakarios, G., Mnih, A.: The lipschitz constant of self-attention. In: Proceedings of the 38th International Conference on Machine Learning (ICML), pp. 5562\u20135571 (2021)"},{"key":"25_CR27","doi-asserted-by":"publisher","first-page":"871","DOI":"10.4271\/2011-01-2564","volume":"4","author":"A Landi","year":"2011","unstructured":"Landi, A., Nicholson, M.: ARP4754A\/ED-79A-guidelines for development of civil aircraft and systems-enhancements, novelties and key topics. SAE Int. J. Aerospace 4, 871\u2013879 (2011)","journal-title":"SAE Int. J. Aerospace"},{"key":"25_CR28","unstructured":"Levy, N., Ashrov, A., Katz, G.: Towards Robust LLMs: an adversarial robustness measurement framework \u2014 code (2024). https:\/\/github.com\/adielashrov\/trust-ai-roma-for-llm"},{"key":"25_CR29","unstructured":"Levy, N., Katz, G.: RoMA: a method for neural network robustness measurement and assessment. In: Proceedings of the 29th International Conference on Neural Information Processing (ICONIP) (2021)"},{"key":"25_CR30","doi-asserted-by":"crossref","unstructured":"Levy, N., Yerushalmi, R., Katz, G.: gRoMA: a tool for measuring the global robustness of deep neural networks. In: Proceedings o the 12th International Symposium on Leveraging Applications of Formal Methods, Verification and Validation (ISoLA), pp. 160\u2013170 (2023)","DOI":"10.1007\/978-3-031-46002-9_9"},{"key":"25_CR31","doi-asserted-by":"crossref","unstructured":"Levy, O., Dikman, I., Levy, N., Winokur, M.: Work in progress: AI-powered engineering-bridging theory and practice. In: Proceedings of the 9th IEEE World Engineering Education Conference (EDUNINE) (2025)","DOI":"10.1109\/EDUNINE62377.2025.10981330"},{"key":"25_CR32","doi-asserted-by":"crossref","unstructured":"Marzari, L., Corsi, D., Cicalese, F., Farinelli, A.: The #Dnn-Verification Problem: Counting Unsafe Inputs for Deep Neural Networks, Technical report (2023). https:\/\/arxiv.org\/abs\/2301.07068","DOI":"10.24963\/ijcai.2023\/25"},{"key":"25_CR33","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient Estimation of Word Representations in Vector Space, Technical report (2013). https:\/\/arxiv.org\/abs\/1301.3781"},{"key":"25_CR34","doi-asserted-by":"crossref","unstructured":"Morris, J.X., Lifland, E., Yoo, J.Y., Grigsby, J., Jin, D., Qi, Y.: Textattack: A Framework for Adversarial Attacks, Data Augmentation, and Adversarial Training in NLP, Technical report (2020). https:\/\/arxiv.org\/abs\/2005.05909","DOI":"10.18653\/v1\/2020.emnlp-demos.16"},{"key":"25_CR35","unstructured":"OpenAI: GPT-4 Technical Report, Technical report (2024). https:\/\/arxiv.org\/abs\/2303.08774"},{"key":"25_CR36","unstructured":"OpenAI: Chatgpt (july 2025 version) (2025). https:\/\/chat.openai.com. Accessed Jul 2025"},{"key":"25_CR37","doi-asserted-by":"crossref","unstructured":"Owen, M., Panken, A., Moss, R., Alvarez, L., Leeper, C.: ACAS Xu: integrated collision avoidance and detect and avoid capability for UAS. In: Proceedings of the 38th IEEE\/AIAA Digital Avionics Systems Conference (DASC), pp. 1\u201310 (2019)","DOI":"10.1109\/DASC43569.2019.9081758"},{"key":"25_CR38","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.: Glove: global vectors for word representation. In: Proceedings of the International Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"issue":"1","key":"25_CR39","doi-asserted-by":"publisher","first-page":"9378","DOI":"10.1038\/s41598-025-93566-5","volume":"15","author":"S Qiu","year":"2025","unstructured":"Qiu, S., et al.: Hard label adversarial attack with high query efficiency against NLP models. Sci. Rep. 15(1), 9378 (2025)","journal-title":"Sci. Rep."},{"key":"25_CR40","unstructured":"Raffel, C., et al.: Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer, Technical report (2023). https:\/\/arxiv.org\/abs\/1910.10683"},{"key":"25_CR41","unstructured":"Rohan, A., et\u00a0al.: PaLM 2 Technical Report, Technical report (2023). https:\/\/arxiv.org\/abs\/2305.10403"},{"key":"25_CR42","doi-asserted-by":"crossref","unstructured":"Romero-Alvarado, D., Hern\u00e1ndez-Orallo, J., Mart\u00ednez-Plumed, F.: How resilient are language models to text perturbations? In: Proceedings of the 25th International Conference on Intelligent Data Engineering and Automated Learning (IDEAL), pp. 85\u201396 (2024)","DOI":"10.1007\/978-3-031-77731-8_8"},{"issue":"6","key":"25_CR43","doi-asserted-by":"publisher","first-page":"192","DOI":"10.3390\/fi15060192","volume":"15","author":"K Roumeliotis","year":"2023","unstructured":"Roumeliotis, K., Tselikas, N.: ChatGPT and open-AI models: a preliminary review. Future Internet 15(6), 192 (2023)","journal-title":"Future Internet"},{"key":"25_CR44","doi-asserted-by":"crossref","unstructured":"Sato, M., Suzuki, J., Shindo, H., Matsumoto, Y.: Interpretable Adversarial Perturbation in Input Embedding Space for Text, Technical report (2018). https:\/\/arxiv.org\/abs\/1805.02917","DOI":"10.24963\/ijcai.2018\/601"},{"key":"25_CR45","unstructured":"Singh, A., Singh, N., Vatsal, S.: Robustness of LLMs to Perturbations in Text, Technical report (2024). https:\/\/arxiv.org\/abs\/2407.08989"},{"key":"25_CR46","doi-asserted-by":"crossref","unstructured":"Socher, R., et al.: Recursive deep models for semantic compositionality over a sentiment treebank. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1631\u20131642 (2013)","DOI":"10.18653\/v1\/D13-1170"},{"key":"25_CR47","doi-asserted-by":"crossref","unstructured":"Subramanian, V., Benetos, E., Xu, N., McDonald, S., Sandler, M.: Adversarial Attacks in Sound Event Classification, Technical report (2019). https:\/\/arxiv.org\/abs\/1907.02477","DOI":"10.33682\/sp9n-qk06"},{"key":"25_CR48","unstructured":"Temple, B., Buescher, K., Armstrong, J.: PyRAT-Python Radiography Analysis Tool (u), Los Alamos National Laboratory (LANL) (2011)"},{"key":"25_CR49","unstructured":"Touvron, H., et\u00a0al.: LLaMA: Open and Efficient Foundation Language Models, Technical report (2023). https:\/\/arxiv.org\/abs\/2302.13971"},{"key":"25_CR50","unstructured":"Tsuzuku, Y., Sato, I., Sugiyama, M.: Lipschitz-margin training: scalable certification of perturbation invariance for deep neural networks. In: Proceedings of the 32nd Advances in Neural Information Processing Systems (NeurIPS), pp. 6541\u20136550 (2018)"},{"key":"25_CR51","unstructured":"Wang, B., et al.: Adversarial GLUE: A Multi-Task Benchmark for Robustness Evaluation of Language Models, Technical report (2021). https:\/\/arxiv.org\/abs\/2111.02840"},{"key":"25_CR52","unstructured":"Wang, S., et al.: Beta-CROWN: efficient bound propagation with per-neuron split constraints for complete and incomplete neural network verification. In: Proceedings 35th Conference on Neural Information Processing Systems (NeurIPS) (2021)"},{"key":"25_CR53","unstructured":"Wang, Y., Zhao, Y.: Rupbench: benchmarking reasoning under perturbations for robustness evaluation in large language models, Technical report (2024). https:\/\/arxiv.org\/abs\/2406.11020"},{"key":"25_CR54","unstructured":"Webb, S., Rainforth, T., Teh, Y.W., Kumar, P.: A statistical approach to assessing neural network robustness. In: Proceedings of the 7th International Conference on Learning Representations (ICLR) (2019)"},{"key":"25_CR55","doi-asserted-by":"crossref","unstructured":"Yang, F., Zhan, S.S., Wang, Y., Huang, C., Zhu, Q.: Case study: runtime safety verification of neural network controlled system. In: Proceedings of the 24th International Conference on Runtime Verification (RV), pp. 205\u2013217 (2024)","DOI":"10.1007\/978-3-031-74234-7_13"},{"key":"25_CR56","unstructured":"Yoshida, Y., Miyato, T.: Spectral Norm Regularization for Improving the Generalizability of Deep Learning, Technical report (2017). https:\/\/arxiv.org\/abs\/1705.10941"},{"key":"25_CR57","unstructured":"Zhao, W.X., et\u00a0al.: A Survey of Large Language Models, Technical report (2025). https:\/\/arxiv.org\/abs\/2303.18223"}],"container-title":["Lecture Notes in Computer Science","Runtime Verification"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-05435-7_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T07:43:27Z","timestamp":1766043807000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-05435-7_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,12]]},"ISBN":["9783032054340","9783032054357"],"references-count":57,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-05435-7_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,9,12]]},"assertion":[{"value":"12 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"RV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Runtime Verification","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Graz","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Austria","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"rv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/rv25.isec.tugraz.at\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}