{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T15:23:38Z","timestamp":1774365818340,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,27]],"date-time":"2024-10-27T00:00:00Z","timestamp":1729987200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,27]]},"DOI":"10.1145\/3691620.3695060","type":"proceedings-article","created":{"date-parts":[[2024,10,18]],"date-time":"2024-10-18T15:39:19Z","timestamp":1729265959000},"page":"643-655","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["GlitchProber: Advancing Effective Detection and Mitigation of Glitch Tokens in Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-6447-1756","authenticated-orcid":false,"given":"Zhibo","family":"Zhang","sequence":"first","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, Hubei, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5332-9890","authenticated-orcid":false,"given":"Wuxia","family":"Bai","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, Hubei, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8032-3841","authenticated-orcid":false,"given":"Yuxi","family":"Li","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, Hubei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1039-2151","authenticated-orcid":false,"given":"Mark Huasong","family":"Meng","sequence":"additional","affiliation":[{"name":"Technical University of Munich, Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3977-6573","authenticated-orcid":false,"given":"Kailong","family":"Wang","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, Hubei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2023-0247","authenticated-orcid":false,"given":"Ling","family":"Shi","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2990-1614","authenticated-orcid":false,"given":"Li","family":"Li","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5727-4326","authenticated-orcid":false,"given":"Jun","family":"Wang","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1100-8633","authenticated-orcid":false,"given":"Haoyu","family":"Wang","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, Hubei, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Principal component analysis","author":"Abdi Herv\u00e9","year":"2010","unstructured":"Herv\u00e9 Abdi and Lynne J Williams. 2010. Principal component analysis. Wiley interdisciplinary reviews: computational statistics 2, 4 (2010), 433--459."},{"key":"e_1_3_2_1_2_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_3_1","volume-title":"Yi: Open Foundation Models by 01.AI. arXiv:2403.04652 [cs.CL]","author":"Young Alex","year":"2024","unstructured":"01. AI,:, Alex Young, Bei Chen, Chao Li, Chengen Huang, Ge Zhang, Guanwei Zhang, Heng Li, Jiangcheng Zhu, Jianqun Chen, Jing Chang, Kaidong Yu, Peng Liu, Qiang Liu, Shawn Yue, Senbin Yang, Shiming Yang, Tao Yu, Wen Xie, Wenhao Huang, Xiaohui Hu, Xiaoyi Ren, Xinyao Niu, Pengcheng Nie, Yuchi Xu, Yudong Liu, Yue Wang, Yuxuan Cai, Zhenyu Gu, Zhiyuan Liu, and Zonghong Dai. 2024. Yi: Open Foundation Models by 01.AI. arXiv:2403.04652 [cs.CL]"},{"key":"e_1_3_2_1_4_1","volume-title":"Sonnet, Haiku. Online. https:\/\/www-cdn.anthropic.com\/de8ba9b01c9ab7cbabf5c33b80b7bbc618857627\/Model_Card_Claude_3.pdf","year":"2024","unstructured":"Anthropic. 2024. The Claude 3 Model Family: Opus, Sonnet, Haiku. Online. https:\/\/www-cdn.anthropic.com\/de8ba9b01c9ab7cbabf5c33b80b7bbc618857627\/Model_Card_Claude_3.pdf (Accessed: 2024-04-24)."},{"key":"e_1_3_2_1_6_1","unstructured":"Mark Chen Jerry Tworek Heewoo Jun Qiming Yuan Henrique Ponde de Oliveira Pinto Jared Kaplan Harri Edwards Yuri Burda Nicholas Joseph Greg Brockman Alex Ray Raul Puri Gretchen Krueger Michael Petrov Heidy Khlaaf Girish Sastry Pamela Mishkin Brooke Chan Scott Gray Nick Ryder Mikhail Pavlov Alethea Power Lukasz Kaiser Mohammad Bavarian Clemens Winter Philippe Tillet Felipe Petroski Such Dave Cummings Matthias Plappert Fotios Chantzis Elizabeth Barnes Ariel Herbert-Voss William Hebgen Guss Alex Nichol Alex Paino Nikolas Tezak Jie Tang Igor Babuschkin Suchir Balaji Shantanu Jain William Saunders Christopher Hesse Andrew N. Carr Jan Leike Josh Achiam Vedant Misra Evan Morikawa Alec Radford Matthew Knight Miles Brundage Mira Murati Katie Mayer Peter Welinder Bob McGrew Dario Amodei Sam McCandlish Ilya Sutskever and Wojciech Zaremba. 2021. Evaluating Large Language Models Trained on Code. arXiv:2107.03374 [cs.LG]"},{"key":"e_1_3_2_1_7_1","unstructured":"Krzysztof Choromanski Valerii Likhosherstov David Dohan Xingyou Song Andreea Gane Tamas Sarlos Peter Hawkins Jared Davis Afroz Mohiuddin Lukasz Kaiser et al. 2020. Rethinking attention with performers. arXiv preprint arXiv:2009.14794 (2020)."},{"key":"e_1_3_2_1_8_1","volume-title":"Training Verifiers to Solve Math Word Problems. arXiv preprint arXiv:2110.14168","author":"Cobbe Karl","year":"2021","unstructured":"Karl Cobbe, Vineet Kosaraju, Mohammad Bavarian, Mark Chen, Heewoo Jun, Lukasz Kaiser, Matthias Plappert, Jerry Tworek, Jacob Hilton, Reiichiro Nakano, Christopher Hesse, and John Schulman. 2021. Training Verifiers to Solve Math Word Problems. arXiv preprint arXiv:2110.14168 (2021)."},{"key":"e_1_3_2_1_9_1","volume-title":"Support-vector networks. Machine learning 20, 3","author":"Cortes Corinna","year":"1995","unstructured":"Corinna Cortes and Vladimir Vapnik. 1995. Support-vector networks. Machine learning 20, 3 (1995), 273--297."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Gelei Deng Yi Liu Yuekang Li Kailong Wang Ying Zhang Zefeng Li Haoyu Wang Tianwei Zhang and Yang Liu. 2024. MasterKey: Automated Jailbreak Across Multiple Large Language Model Chatbots. In NDSS.","DOI":"10.14722\/ndss.2024.24188"},{"key":"e_1_3_2_1_11_1","volume-title":"Pandora: Jailbreak GPTs by Retrieval Augmented Generation Poisoning. NDSS AISCC","author":"Deng Gelei","year":"2024","unstructured":"Gelei Deng, Yi Liu, Kailong Wang, Yuekang Li, Tianwei Zhang, and Yang Liu. 2024. Pandora: Jailbreak GPTs by Retrieval Augmented Generation Poisoning. NDSS AISCC (2024)."},{"key":"e_1_3_2_1_12_1","unstructured":"Martin Fell. 2023. A Search for More ChatGPT \/ GPT-3.5 \/ GPT-4 \"Unspeakable\" Glitch Tokens. Online. https:\/\/www.lesswrong.com\/posts\/kmWrwtGE9B9hpbgRT\/a-search-for-more-chatgpt-gpt-3-5-gpt-4-unspeakable-glitch (Accessed: 2024-05-05)."},{"key":"e_1_3_2_1_13_1","volume-title":"Coercing LLMs to do and reveal (almost) anything. arXiv preprint arXiv:2402.14020","author":"Geiping Jonas","year":"2024","unstructured":"Jonas Geiping, Alex Stein, Manli Shu, Khalid Saifullah, Yuxin Wen, and Tom Goldstein. 2024. Coercing LLMs to do and reveal (almost) anything. arXiv preprint arXiv:2402.14020 (2024)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.34740\/KAGGLE\/M\/3301"},{"key":"e_1_3_2_1_15_1","unstructured":"GlitchProber. (Accessed on 06\/07\/2024). https:\/\/sites.google.com\/view\/glitchprober\/."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3650212.3680383"},{"key":"e_1_3_2_1_17_1","volume-title":"Measuring massive multitask language understanding. arXiv preprint arXiv:2009.03300","author":"Hendrycks Dan","year":"2020","unstructured":"Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt. 2020. Measuring massive multitask language understanding. arXiv preprint arXiv:2009.03300 (2020)."},{"key":"e_1_3_2_1_18_1","unstructured":"Albert Q. Jiang Alexandre Sablayrolles Arthur Mensch Chris Bamford Devendra Singh Chaplot Diego de las Casas Florian Bressand Gianna Lengyel Guillaume Lample Lucile Saulnier L\u00e9lio Renard Lavaud Marie-Anne Lachaux Pierre Stock Teven Le Scao Thibaut Lavril Thomas Wang Timoth\u00e9e Lacroix and William El Sayed. 2023. Mistral 7B. arXiv:2310.06825 [cs.CL]"},{"key":"e_1_3_2_1_19_1","volume-title":"Digger: Detecting Copyright Content Mis-usage in Large Language Model Training.","author":"Li Haodong","year":"2024","unstructured":"Haodong Li, Gelei Deng, Yi Liu, Kailong Wang, Yuekang Li, Tianwei Zhang, Yang Liu, Guoai Xu, Guosheng Xu, and Haoyu Wang. 2024. Digger: Detecting Copyright Content Mis-usage in Large Language Model Training."},{"key":"e_1_3_2_1_20_1","volume-title":"Drowzee: Metamorphic Testing for Fact-conflicting Hallucination Detection in Large Language Models. In OOPSLA (To Appear).","author":"Li Ningke","year":"2024","unstructured":"Ningke Li, Yuekang Li, Yi Liu, Ling Shi, Kailong Wang, and Haoyu Wang. 2024. Drowzee: Metamorphic Testing for Fact-conflicting Hallucination Detection in Large Language Models. In OOPSLA (To Appear)."},{"key":"e_1_3_2_1_21_1","unstructured":"Yuxi Li Yi Liu Gelei Deng Ying Zhang Wenjia Song Ling Shi Kailong Wang Yuekang Li Yang Liu and Haoyu Wang. 2024. Glitch Tokens in Large Language Models: Categorization Taxonomy and Effective Detection. In FSE."},{"key":"e_1_3_2_1_22_1","unstructured":"Yuxi Li Yi Liu Yuekang Li Ling Shi Gelei Deng Shengquan Chen and Kailong Wang. 2024. Lockpicking LLMs: A Logit-Based Jailbreak Using Token-level Manipulation."},{"key":"e_1_3_2_1_23_1","volume-title":"Le","author":"Liu Hanxiao","year":"2021","unstructured":"Hanxiao Liu, Zihang Dai, David R. So, and Quoc V. Le. 2021. Pay Attention to MLPs. arXiv:2105.08050 [cs.LG]"},{"key":"e_1_3_2_1_24_1","volume-title":"Prompt Injection attack against LLM-integrated Applications. arXiv preprint arXiv:2306.05499","author":"Liu Yi","year":"2023","unstructured":"Yi Liu, Gelei Deng, Yuekang Li, Kailong Wang, Tianwei Zhang, Yepang Liu, Haoyu Wang, Yan Zheng, and Yang Liu. 2023. Prompt Injection attack against LLM-integrated Applications. arXiv preprint arXiv:2306.05499 (2023)."},{"key":"e_1_3_2_1_25_1","volume-title":"Simplified self-attention for transformer-based end-to-end speech recognition. arXiv preprint arXiv:2005.10463","author":"Luo Haoneng","year":"2020","unstructured":"Haoneng Luo, Shiliang Zhang, Ming Lei, and Lei Xie. 2020. Simplified self-attention for transformer-based end-to-end speech recognition. arXiv preprint arXiv:2005.10463 (2020)."},{"key":"e_1_3_2_1_26_1","unstructured":"mwatkins. 2023. The petertodd phenomenon. Online. https:\/\/www.lesswrong.com\/posts\/jkY6QdCfAXHJk3kea\/the-petertodd-phenomenon (Accessed: 2024-05-05)."},{"key":"e_1_3_2_1_27_1","unstructured":"mwatkins. 2023. A Search for More ChatGPT\/GPT-3.5\/GPT-4 \"Unspeakable\" Glitch Tokens. Online. https:\/\/www.lesswrong.com\/posts\/kmWrwtGE9B9hpbgRT\/a-search-for-more-chatgpt-gpt-3-5-gpt-4-unspeakable-glitch (Accessed: 2024-05-03)."},{"key":"e_1_3_2_1_28_1","unstructured":"mwatkins and Jessica Rumbelow. 2023. SolidGoldMagikarp II: technical details and more recent findings. Online. https:\/\/www.lesswrong.com\/posts\/Ya9LzwEbfaAMY8ABo\/solidgoldmagikarp-ii-technical-details-and-more-recent (Accessed: 2024-05-05)."},{"key":"e_1_3_2_1_29_1","unstructured":"mwatkins and Jessica Rumbelow. 2023. SolidGoldMagikarp III: Glitch token archaeology. Online. https:\/\/www.lesswrong.com\/posts\/8viQEp8KBg2QSW4Yc\/solidgoldmagikarp-iii-glitch-token-archaeology (Accessed: 2024-05-05)."},{"key":"e_1_3_2_1_30_1","unstructured":"Bloom J Nanda N. 2022. TransformerLens. Online. https:\/\/github.com\/neelnanda-io\/TransformerLens (Accessed: 2024-05-05)."},{"key":"e_1_3_2_1_31_1","unstructured":"Machel Reid Nikolay Savinov Denis Teplyashin Dmitry Lepikhin Timothy Lillicrap Jean-baptiste Alayrac Radu Soricut Angeliki Lazaridou Orhan Firat Julian Schrittwieser et al. 2024. Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context. arXiv preprint arXiv:2403.05530 (2024)."},{"key":"e_1_3_2_1_32_1","unstructured":"Jessica Rumbelow and mwatkins. 2023. SolidGoldMagikarp(plus prompt generation). Online. https:\/\/www.lesswrong.com\/posts\/aPeJE8bSo6rAFoLqg\/solidgoldmagikarp-plus-prompt-generation (Accessed: 2024-05-05)."},{"key":"e_1_3_2_1_33_1","volume-title":"Smola","author":"Scholkopf Bernhard","year":"2002","unstructured":"Bernhard Scholkopf and Alexander J. Smola. 2002. Learning with Kernels: Support Vector Machines, Regularization, Optimization, and Beyond. MIT Press."},{"key":"e_1_3_2_1_34_1","unstructured":"Gemini Team Rohan Anil Sebastian Borgeaud Yonghui Wu Jean-Baptiste Alayrac Jiahui Yu Radu Soricut Johan Schalkwyk Andrew M Dai Anja Hauth et al. 2023. Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.59287\/icaens.1127"},{"key":"e_1_3_2_1_36_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev Punit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang Ross Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. arXiv:2307.09288 [cs.CL]"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.5555\/211359"},{"key":"e_1_3_2_1_38_1","first-page":"64","article-title":"Markov processes over denumerable products of spaces, describing large systems of automata","volume":"5","author":"Vaserstein Leonid Nisonovich","year":"1969","unstructured":"Leonid Nisonovich Vaserstein. 1969. Markov processes over denumerable products of spaces, describing large systems of automata. Problemy Peredachi Informatsii 5, 3 (1969), 64--72.","journal-title":"Problemy Peredachi Informatsii"},{"key":"e_1_3_2_1_39_1","volume-title":"arXiv preprint arXiv:2405.17067","author":"Wang Dixuan","year":"2024","unstructured":"Dixuan Wang, Yanda Li, Junyuan Jiang, Zepeng Ding, Guochao Jiang, Jiaqing Liang, and Deqing Yang. 2024. Tokenization Matters! Degrading Large Language Models through Challenging Their Tokenization. arXiv preprint arXiv:2405.17067 (2024)."},{"key":"e_1_3_2_1_40_1","volume-title":"MeTMaP: Metamorphic Testing for Detecting False Vector Matching Problems in LLM Augmented Generation. FORGE","author":"Wang Guanyu","year":"2024","unstructured":"Guanyu Wang, Yuekang Li, Yi Liu, Gelei Deng, Tianlin Li, Guosheng Xu, Yang Liu, Haoyu Wang, and Kailong Wang. 2024. MeTMaP: Metamorphic Testing for Detecting False Vector Matching Problems in LLM Augmented Generation. FORGE (2024)."},{"key":"e_1_3_2_1_41_1","volume-title":"Autogen: Enabling next-gen llm applications via multi-agent conversation framework. arXiv preprint arXiv:2308.08155","author":"Wu Qingyun","year":"2023","unstructured":"Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Shaokun Zhang, Erkang Zhu, Beibin Li, Li Jiang, Xiaoyun Zhang, and Chi Wang. 2023. Autogen: Enabling next-gen llm applications via multi-agent conversation framework. arXiv preprint arXiv:2308.08155 (2023)."}],"event":{"name":"ASE '24: 39th IEEE\/ACM International Conference on Automated Software Engineering","location":"Sacramento CA USA","acronym":"ASE '24","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS"]},"container-title":["Proceedings of the 39th IEEE\/ACM International Conference on Automated Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3691620.3695060","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3691620.3695060","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:04:07Z","timestamp":1750291447000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3691620.3695060"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,27]]},"references-count":40,"alternative-id":["10.1145\/3691620.3695060","10.1145\/3691620"],"URL":"https:\/\/doi.org\/10.1145\/3691620.3695060","relation":{},"subject":[],"published":{"date-parts":[[2024,10,27]]},"assertion":[{"value":"2024-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}