{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T22:09:18Z","timestamp":1778710158500,"version":"3.51.4"},"reference-count":13,"publisher":"SCITEPRESS - Science and Technology Publications","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.5220\/0014430200004052","type":"proceedings-article","created":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T10:40:25Z","timestamp":1773571225000},"page":"3365-3371","source":"Crossref","is-referenced-by-count":0,"title":["A Comparative Study of Feature Identification Methods for Sparse Autoencoders in Board Game Representations"],"prefix":"10.5220","author":[{"given":"Jonathan","family":"Zea","sequence":"first","affiliation":[{"name":"Departamento de Inform\u00e1tica y Ciencias de la Computaci\u00f3n, Escuela Polit\u00e9cnica Nacional, Quito, Ecuador"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lorena","family":"Barona","sequence":"additional","affiliation":[{"name":"Departamento de Inform\u00e1tica y Ciencias de la Computaci\u00f3n, Escuela Polit\u00e9cnica Nacional, Quito, Ecuador"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marco","family":"Benalc\u00e1zar","sequence":"additional","affiliation":[{"name":"Departamento de Inform\u00e1tica y Ciencias de la Computaci\u00f3n, Escuela Polit\u00e9cnica Nacional, Quito, Ecuador"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"3171","reference":[{"key":"ref1","article-title":"Mechanistic Interpretability for AI Safety -- A Review","author":"Leonard Bereska","year":"2024","unstructured":"Bereska, L. and Gavves, E. (2024). Mechanistic interpretability for ai safety -- a review."},{"key":"ref2","article-title":"Llama Scope: Extracting millions of features from Llama-3.1-8B with sparse Autoencoders","author":"Zhengfu He","year":"2024","unstructured":"He, Z., Shu, W., Ge, X., Chen, L., Wang, J., Zhou, Y., Liu, F., Guo, Q., Huang, X., Wu, Z., Jiang, Y.-G., and Qiu, X. (2024). Llama scope: Extracting millions of features from Llama-3.1-8B with sparse autoencoders."},{"key":"ref3","article-title":"Revisiting end-to-end sparse autoencoder training: A short finetune is all you need","author":"Adam Karvonen","year":"2025","unstructured":"Karvonen, A. (2025). Revisiting end-to-end sparse autoencoder training: A short finetune is all you need."},{"key":"ref4","article-title":"Measuring progress in dictionary learning for language model interpretability with board game models","author":"Adam Karvonen","year":"2024","unstructured":"Karvonen, A., Wright, B., Rager, C., Angell, R., Brinkmann, J., Smith, L., Verdun, C. M., Bau, D., and Marks, S. (2024). Measuring progress in dictionary learning for language model interpretability with board game models."},{"key":"ref5","article-title":"Feature-level insights into artificial Text Detection with Sparse Autoencoders","author":"Kristian Kuznetsov","year":"2025","unstructured":"Kuznetsov, K., Kushnareva, L., Druzhinina, P., Razzhigaev, A., Voznyuk, A., Piontkovskaya, I., Burnaev, E., and Barannikov, S. (2025). Feature-level insights into artificial text detection with sparse autoencoders."},{"key":"ref6","article-title":"Can sparse autoencoders be used to decompose and interpret steering vectors?","author":"Harry Mayne","year":"2024","unstructured":"Mayne, H., Yang, Y., and Mahdi, A. (2024). Can sparse autoencoders be used to decompose and interpret steering vectors?"},{"key":"ref7","article-title":"Compute optimal inference and provable amortisation gap in sparse autoencoders","author":"Charles O'Neill","year":"2024","unstructured":"O'Neill, C., Gumran, A., and Klindt, D. (2024). Compute optimal inference and provable amortisation gap in sparse autoencoders."},{"key":"ref8","article-title":"Sparse Autoencoders Trained on the Same Data Learn Different Features","author":"Gon\u00e7alo Paulo","year":"2025","unstructured":"Paulo, G. and Belrose, N. (2025). Sparse autoencoders trained on the same data learn different features."},{"key":"ref9","article-title":"Automatically interpreting millions of features in large language models","author":"Goncalo Paulo","year":"2024","unstructured":"Paulo, G., Mallen, A., Juang, C., and Belrose, N. (2024). Automatically interpreting millions of features in large language models."},{"key":"ref10","article-title":"A practical review of mechanistic interpretability for transformer-based language models","author":"Daking Rai","year":"2024","unstructured":"Rai, D., Zhou, Y., Feng, S., Saparov, A., and Yao, Z. (2024). A practical review of mechanistic interpretability for transformer-based language models."},{"key":"ref11","article-title":"Scaling Monosemanticity: Extracting Interpretable Features from Claude 3 Sonnet","author":"Adly Templeton","year":"2024","unstructured":"Templeton, A., Conerly, T., Marcus, J., Lindsey, J., Bricken, T., Chen, B., Pearce, A., Citro, C., Ameisen, E., Jones, A., Cunningham, H., Turner, N. L., McDougall, C., MacDiarmid, M., Freeman, C. D., Sumers, T. R., Rees, E., Batson, J., Jermyn, A., Carter, S., Olah, C., and Henighan, T. (2024). Scaling monosemanticity: Extracting interpretable features from claude 3 sonnet. Transformer Circuits Thread."},{"key":"ref12","article-title":"DILA: Dictionary Label Attention for mechanistic interpretability in high-dimensional multi-label medical coding prediction","author":"John Wu","year":"2024","unstructured":"Wu, J., Wu, D., and Sun, J. (2024). DILA: Dictionary label attention for mechanistic interpretability in high-dimensional multi-label medical coding prediction."},{"key":"ref13","article-title":"Projected Gradient Descent Algorithm for Low-Rank Matrix Estimation","author":"Teng Zhang","year":"2024","unstructured":"Zhang, T. and Fan, X. (2024). Projected gradient descent algorithm for low-rank matrix estimation."}],"event":{"name":"18th International Conference on Agents and Artificial Intelligence","location":"Marbella, Spain","start":{"date-parts":[[2026,3,5]]},"end":{"date-parts":[[2026,3,8]]}},"container-title":["Proceedings of the 18th International Conference on Agents and Artificial Intelligence"],"original-title":["A Comparative Study of Feature Identification Methods for Sparse Autoencoders in Board Game Representations"],"deposited":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T21:24:18Z","timestamp":1778707458000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.scitepress.org\/DigitalLibrary\/Link.aspx?doi=10.5220\/0014430200004052"}},"subtitle":[""],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":13,"URL":"https:\/\/doi.org\/10.5220\/0014430200004052","relation":{},"subject":[],"published":{"date-parts":[[2026]]}}}