{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T19:06:09Z","timestamp":1780427169737,"version":"3.54.1"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1007\/s10994-025-06867-1","type":"journal-article","created":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T01:31:50Z","timestamp":1759282310000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["LLM-based feature generation from text for interpretable machine learning"],"prefix":"10.1007","volume":"114","author":[{"given":"Vojt\u011bch","family":"Balek","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Luk\u00e1\u0161","family":"S\u00fdkora","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Vil\u00e9m","family":"Sklen\u00e1k","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tom\u00e1\u0161","family":"Kliegr","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,10,1]]},"reference":[{"key":"6867_CR1","unstructured":"Agrawal, R., & Srikant, R. (1994). Fast algorithms for mining association rules. Proc. 20th int. conf. very large databases, VLDB (Vol. 1215, pp. 487\u2013499). Santiago, Chile: VLDB."},{"key":"6867_CR2","doi-asserted-by":"crossref","unstructured":"Almatarneh, S., Gamallo, P., & Pena, F.J.R. (2019). Citius-cole at semeval-2019 task 5: Combining linguistic features to identify hate speech against immigrants and women on multilingual tweets. Proceedings of the 13th international workshop on semantic evaluation (pp. 387\u2013390).","DOI":"10.18653\/v1\/S19-2068"},{"issue":"5","key":"6867_CR3","doi-asserted-by":"publisher","first-page":"2571","DOI":"10.1007\/s10618-024-01041-y","volume":"38","author":"M Atzmueller","year":"2024","unstructured":"Atzmueller, M., F\u00fcrnkranz, J., Kliegr, T., & Schmid, U. (2024). Explainable and interpretable machine learning and data mining. Data Mining and Knowledge Discovery, 38(5), 2571\u20132595.","journal-title":"Data Mining and Knowledge Discovery"},{"key":"6867_CR4","doi-asserted-by":"crossref","unstructured":"Beltagy, I., Lo, K., & Cohan, A. (2019). Scibert: A pretrained language model for scientific text. Proceedings of the 2019 conference on empirical methods in natural language processing and the 9th international joint conference on natural language processing (emnlp-ijcnlp) (pp. 3615\u20133620).","DOI":"10.18653\/v1\/D19-1371"},{"issue":"5","key":"6867_CR5","doi-asserted-by":"publisher","first-page":"2313","DOI":"10.1007\/s11192-022-04314-9","volume":"127","author":"L Beranov\u00e1","year":"2022","unstructured":"Beranov\u00e1, L., Joachimiak, M. P., Kliegr, T., Rabby, G., & Sklen\u00e1k, V. (2022). Why was this cited? Explainable machine learning applied to COVID-19 research literature. Scientometrics, 127(5), 2313\u20132349. https:\/\/doi.org\/10.1007\/s11192-022-04314-9","journal-title":"Scientometrics"},{"key":"6867_CR6","unstructured":"Cap, N.B. (2024). Research article quality prediction [Thesis (in Czech)]. Retrieved from https:\/\/theses.cz\/id\/pd9t84\/"},{"key":"6867_CR7","doi-asserted-by":"crossref","unstructured":"Casanueva, I., Tem\u010dinas, T., Gerz, D., Henderson, M., & Vuli\u0107, I. (2020). Efficient intent detection with dual sentence encoders. arXiv preprint arXiv:2003.04807","DOI":"10.18653\/v1\/2020.nlp4convai-1.5"},{"key":"6867_CR8","volume-title":"Mathematical methods of statistics","author":"H Cram\u00e9r","year":"1946","unstructured":"Cram\u00e9r, H. (1946). Mathematical methods of statistics. Princeton University Press."},{"key":"6867_CR9","doi-asserted-by":"crossref","unstructured":"De Gibert, O., Perez, N., Garc\u00eda-Pablos, A., & Cuadros, M. (2018). Hate speech dataset from a white supremacy forum. arXiv preprint arXiv:1809.04444","DOI":"10.18653\/v1\/W18-5102"},{"key":"6867_CR10","unstructured":"Devlin, J., Chang, M.-W., Lee, K., & Toutanova, K. (2019). BERT: Pre-training of deep bidirectional transformers for language understanding. J. Burstein, C. Doran, & T. Solorio (Eds.), Proceedings of the 2019 conference of the north American chapter of the association for computational linguistics: Human language technologies, volume 1 (pp. 4171\u20134186). Minneapolis, Minnesota: Association for Computational Linguistics."},{"key":"6867_CR11","doi-asserted-by":"crossref","unstructured":"Dvorackova, L., Joachimiak, M.P., Cerny, M., Kubecova, A., Sklenak, V., & Kliegr, T. (2024). Explaining word embeddings with perfect fidelity: Case study in research impact prediction. arXiv preprint arXiv:2409.15912.","DOI":"10.1007\/s10994-025-06870-6"},{"issue":"1","key":"6867_CR12","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1007\/s11192-005-0208-0","volume":"63","author":"P Glenisson","year":"2005","unstructured":"Glenisson, P., Gl\u00e4nzel, W., & Persson, O. (2005). Combining full-text analysis and bibliometric indicators. A pilot study. Scientometrics, 63(1), 163\u2013180.","journal-title":"Scientometrics"},{"key":"6867_CR13","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2023.107268","volume":"162","author":"Y Gong","year":"2023","unstructured":"Gong, Y., Liu, G., Xue, Y., Li, R., & Meng, L. (2023). A survey on dataset quality in machine learning. Information and Software Technology, 162, Article 107268.","journal-title":"Information and Software Technology"},{"key":"6867_CR14","unstructured":"He, W., Dai, Y., Hui, B., Yang, M., Cao, Z., Dong, J., & Li, Y. (2022). Space-2: Tree-structured semi-supervised contrastive pre-training for task-oriented dialog understanding. arXiv preprint arXiv:2209.06638"},{"key":"6867_CR15","doi-asserted-by":"publisher","first-page":"368","DOI":"10.1016\/j.jbusres.2022.04.064","volume":"148","author":"CY Heo","year":"2022","unstructured":"Heo, C. Y., Kim, B., Park, K., & Back, R. M. (2022). A comparison of best-worst scaling and Likert scale methods on peer-to-peer accommodation attributes. Journal of Business Research, 148, 368\u2013377.","journal-title":"Journal of Business Research"},{"key":"6867_CR16","unstructured":"LanguageTool (2025). Languagetool: Ai-based grammar checker. Retrieved from https:\/\/languagetool.org\/ (Accessed: 2025-04-04)"},{"key":"6867_CR17","unstructured":"Laughlin, G.H.M. (1969). Smog grading-a new readability formula. Journal of Reading,12(8), 639\u2013646, Retrieved 2025-04-04, from http:\/\/www.jstor.org\/stable\/40011226"},{"key":"6867_CR18","unstructured":"Loukas, L., Stogiannidis, I., Malakasiotis, P., & Vassos, S. (2023). Breaking the bank with chatgpt: Few-shot text classification for finance. arXiv preprint arXiv:2308.14634"},{"key":"6867_CR19","unstructured":"Lundberg, S.M., & Lee, S.-I. (2017). A unified approach to interpreting model predictions. I. Guyon et al. (Eds.), Advances in neural information processing systems 30 (pp. 4765\u20134774). Curran Associates, Inc."},{"key":"6867_CR20","doi-asserted-by":"crossref","unstructured":"Markov, T., Zhang, C., Agarwal, S., Nekoul, F.E., Lee, T., Adler, S., & Weng, L. (2023). A holistic approach to undesired content detection in the real world. Proceedings of the aaai conference on artificial intelligence (Vol. 37, pp. 15009\u201315018).","DOI":"10.1609\/aaai.v37i12.26752"},{"key":"6867_CR21","unstructured":"Morris, J. (2025). language tool python: A free python grammar checker. Retrieved from https:\/\/github.com\/jxmorris12\/language_tool_python (Accessed: 2025-04- 04)"},{"key":"6867_CR22","doi-asserted-by":"crossref","unstructured":"OECD (2015). Frascati manual 2015. Retrieved from https:\/\/www.oecd-ilibrary.org\/content\/publication\/9789264239012-en","DOI":"10.1787\/9789264239012-en"},{"key":"6867_CR23","doi-asserted-by":"crossref","unstructured":"Pasquier, N., Bastide, Y., Taouil, R., & Lakhal, L. (1999). Discovering frequent closed itemsets for association rules. Database theory\u2014ICDT\u201999: 7th International Conference Jerusalem, Israel, January 10\u201312 (pp. 398\u2013416).","DOI":"10.1007\/3-540-49257-7_25"},{"key":"6867_CR24","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., & Duchesnay, E. (2011). Scikit-learn: Machine learning in Python. Journal of Machine Learning Research, 12, 2825\u20132830.","journal-title":"Journal of Machine Learning Research"},{"key":"6867_CR25","unstructured":"Radcliffe, N. (2007). Using control groups to target on predicted lift: Building and assessing uplift model. Direct Marketing Analytics Journal, pp. 14\u201321."},{"key":"6867_CR26","doi-asserted-by":"crossref","unstructured":"Ras, Z.W., & Wieczorkowska, A. (2000). Action-rules: How to increase profit of a company. European conference on principles of data mining and knowledge discovery (pp. 587\u2013592). Springer.","DOI":"10.1007\/3-540-45372-5_70"},{"key":"6867_CR27","doi-asserted-by":"crossref","unstructured":"Shome, A., Cruz, L., & Van Deursen, A. (2022). Data smells in public datasets. Proceedings of the 1st international conference on ai engineering: Software engineering for ai (pp. 205\u2013216).","DOI":"10.1145\/3522664.3528621"},{"key":"6867_CR28","doi-asserted-by":"crossref","unstructured":"Sykora, L., & Kliegr, T. (2023). Apriori modified for action rules mining. Proceedings of the 12th Knowledge Capture Conference 2023 (pp. 30\u201334). ACM.","DOI":"10.1145\/3587259.3627569"},{"key":"6867_CR29","doi-asserted-by":"publisher","first-page":"1195","DOI":"10.1007\/s11192-016-1889-2","volume":"107","author":"I Tahamtan","year":"2016","unstructured":"Tahamtan, I., Safipour Afshar, A., & Ahamdzadeh, K. (2016). Factors affecting number of citations: A comprehensive review of the literature. Scientometrics, 107, 1195\u20131225.","journal-title":"Scientometrics"},{"key":"6867_CR30","unstructured":"Tang, Z., Fang, H., Zhou, S., Yang, T., Zhong, Z., Hu, T., & Karypis, G. (2024). AutoGluon-multimodal (AutoMM): Supercharging multimodal AutoML with foundation models. International conference on automated machine learning (AutoML)."},{"issue":"2","key":"6867_CR31","doi-asserted-by":"publisher","first-page":"547","DOI":"10.1162\/qss_a_00258","volume":"4","author":"M Thelwall","year":"2023","unstructured":"Thelwall, M., Kousha, K., Wilson, P., Makita, M., Abdoli, M., & Stuart, E., & Cancellieri, M. (2023). Predicting article quality scores with machine learning: The U.K. research excellence framework. Quantitative Science Studies, 4(2), 547\u2013573","journal-title":"Quantitative Science Studies"},{"key":"6867_CR32","doi-asserted-by":"publisher","unstructured":"Touvron, H., Martin, L., Stone, K., Albert, P., Almahairi, A., Babaei, Y., & Scialom, T. (2023). Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 , https:\/\/doi.org\/10.48550\/arXiv.2307.09288","DOI":"10.48550\/arXiv.2307.09288"},{"key":"6867_CR33","doi-asserted-by":"publisher","unstructured":"Wolf, T., Debut, L., Sanh, V., Chaumond, J., Delangue, C., Moi, A., & Rush, A. M. (2019). Huggingface\u2019s transformers: State-of-the-art natural language processing. arXiv preprint arXiv:1910.03771 , https:\/\/doi.org\/10.48550\/arXiv.1910.03771","DOI":"10.48550\/arXiv.1910.03771"},{"key":"6867_CR34","doi-asserted-by":"publisher","unstructured":"Zhang, X., Zhang, J., Rekabdar, B., Zhou, Y., Wang, P., & Liu, K. (2024). Dynamic and adaptive feature generation with llm. arXiv preprint arXiv:2406.03505 , https:\/\/doi.org\/10.48550\/arXiv.2406.03505","DOI":"10.48550\/arXiv.2406.03505"},{"key":"6867_CR35","doi-asserted-by":"crossref","unstructured":"Zhou, L., Farag, Y., & Vlachos, A. (2024). An llm feature-based framework for dialogue constructiveness assessment. arXiv preprint arXiv:2406.14760","DOI":"10.18653\/v1\/2024.emnlp-main.308"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-025-06867-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-025-06867-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-025-06867-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,2]],"date-time":"2025-12-02T14:29:45Z","timestamp":1764685785000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-025-06867-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,1]]},"references-count":35,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2025,11]]}},"alternative-id":["6867"],"URL":"https:\/\/doi.org\/10.1007\/s10994-025-06867-1","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,1]]},"assertion":[{"value":"15 August 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 June 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 August 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 October 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"During the preparation of this work, the authors used Grammarly, Gemini and ChatGPT in order to improve the readability and language of the manuscript. After using this tool\/service, the authors reviewed and edited the content as needed.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declaration of generative AI and AI-assisted technologies in the writing process"}}],"article-number":"241"}}