{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T11:40:10Z","timestamp":1759059610392,"version":"3.44.0"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032061171","type":"print"},{"value":"9783032061188","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T00:00:00Z","timestamp":1759104000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T00:00:00Z","timestamp":1759104000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-06118-8_6","type":"book-chapter","created":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T11:23:35Z","timestamp":1759058615000},"page":"89-103","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Iterative Corpus Refinement for\u00a0Materials Property Prediction Based on\u00a0Scientific Texts"],"prefix":"10.1007","author":[{"given":"Lei","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Markus","family":"Stricker","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,29]]},"reference":[{"key":"6_CR1","doi-asserted-by":"publisher","unstructured":"Banko, L., Krysiak, O., Schumann, W., Ludwig, A.: Electrochemical activity of several compositions in the system AG-PD-PT-RU for the oxygen reduction reaction in 0.05 m koh solution ph 12.5 (2024). https:\/\/doi.org\/10.5281\/zenodo.13992986","DOI":"10.5281\/zenodo.13992986"},{"issue":"3","key":"6_CR2","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1016\/j.joule.2018.12.015","volume":"3","author":"TA Batchelor","year":"2019","unstructured":"Batchelor, T.A., Pedersen, J.K., Winther, S.H., Castelli, I.E., Jacobsen, K.W., Rossmeisl, J.: High-entropy alloys as a discovery platform for electrocatalysis. Joule 3(3), 834\u2013845 (2019). https:\/\/doi.org\/10.1016\/j.joule.2018.12.015","journal-title":"Joule"},{"key":"6_CR3","doi-asserted-by":"publisher","unstructured":"Bilgin, M., Sent\u00fcrk, I.F.: Sentiment analysis on twitter data with semi-supervised doc2vec. In: 2017 International Conference on Computer Science and Engineering (UBMK), pp. 661\u2013666 (2017). https:\/\/doi.org\/10.1109\/UBMK.2017.8093492","DOI":"10.1109\/UBMK.2017.8093492"},{"issue":"2","key":"6_CR4","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1016\/S0924-0136(02)01042-7","volume":"134","author":"E Ezugwu","year":"2003","unstructured":"Ezugwu, E., Bonney, J., Yamane, Y.: An overview of the machinability of aeroengine alloys. J. Mater. Process. Technol. 134(2), 233\u2013253 (2003). https:\/\/doi.org\/10.1016\/S0924-0136(02)01042-7","journal-title":"J. Mater. Process. Technol."},{"key":"6_CR5","doi-asserted-by":"publisher","unstructured":"Ferreira, P., et al.: Instability of Pt\/C electrocatalysts in proton exchange membrane fuel cells: a mechanistic investigation. J. Electrochem. Soci. 152(11), A2256\u2013A2271 (2005). https:\/\/doi.org\/10.1149\/1.2050347","DOI":"10.1149\/1.2050347"},{"issue":"1","key":"6_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/S1389-5567(00)00002-2","volume":"1","author":"A Fujishima","year":"2000","unstructured":"Fujishima, A., Rao, T.N., Tryk, D.A.: Titanium dioxide photocatalysis. J. Photochem. Photobiol., C 1(1), 1\u201321 (2000). https:\/\/doi.org\/10.1016\/S1389-5567(00)00002-2","journal-title":"J. Photochem. Photobiol., C"},{"issue":"3","key":"6_CR7","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1038\/nmat1849","volume":"6","author":"A Geim","year":"2007","unstructured":"Geim, A., Novoselov, K.: The rise of graphene. Nat. Mater. 6(3), 183\u2013191 (2007). https:\/\/doi.org\/10.1038\/nmat1849","journal-title":"Nat. Mater."},{"key":"6_CR8","unstructured":"Goldberg, Y., Levy, O.: word2vec explained: deriving Mikolov et al.\u2019s negative-sampling word-embedding method (2014). https:\/\/arxiv.org\/abs\/1402.3722"},{"issue":"1\u20132","key":"6_CR9","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1002\/adem.200300507","volume":"6","author":"PK Huang","year":"2004","unstructured":"Huang, P.K., Yeh, J.W., Shun, T.T., Chen, S.K.: Multi-principal-element alloys with improved oxidation and wear resistance for thermal spray coating. Adv. Eng. Mater. 6(1\u20132), 74\u201378 (2004). https:\/\/doi.org\/10.1002\/adem.200300507","journal-title":"Adv. Eng. Mater."},{"key":"6_CR10","doi-asserted-by":"publisher","unstructured":"Jain, A., Shin, Y., Persson, K.A.: Computational predictions of energy materials using density functional theory. Nature Rev. Mater. 1(1) (2016). https:\/\/doi.org\/10.1038\/natrevmats.2015.4","DOI":"10.1038\/natrevmats.2015.4"},{"issue":"1","key":"6_CR11","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1038\/nchem.2085","volume":"7","author":"D Larcher","year":"2015","unstructured":"Larcher, D., Tarascon, J.M.: Towards greener and more sustainable batteries for electrical energy storage. Nat. Chem. 7(1), 19\u201329 (2015). https:\/\/doi.org\/10.1038\/nchem.2085","journal-title":"Nat. Chem."},{"key":"6_CR12","unstructured":"Le, Q., Mikolov, T.: Distributed representations of sentences and documents (2014)"},{"issue":"3","key":"6_CR13","doi-asserted-by":"publisher","first-page":"159","DOI":"10.1016\/j.jmat.2017.08.002","volume":"3","author":"Y Liu","year":"2017","unstructured":"Liu, Y., Zhao, T., Ju, W., Shi, S.: Materials discovery and design using machine learning. J. Materiomics 3(3), 159\u2013177 (2017). https:\/\/doi.org\/10.1016\/j.jmat.2017.08.002","journal-title":"J. Materiomics"},{"key":"6_CR14","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J., Sutskever, L., Zweig, G.: word2vec 22, 795 (2013). https:\/\/code.google.com\/p\/word2vec"},{"key":"6_CR15","doi-asserted-by":"publisher","unstructured":"Miracle, D., Senkov, O.: A critical review of high entropy alloys and related concepts. Acta Materialia 122, 448 \u2013 511 (2017). https:\/\/doi.org\/10.1016\/j.actamat.2016.08.081","DOI":"10.1016\/j.actamat.2016.08.081"},{"issue":"7601","key":"6_CR16","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1038\/nature17439","volume":"533","author":"P Raccuglia","year":"2016","unstructured":"Raccuglia, P., et al.: Machine-learning-assisted materials discovery using failed experiments. Nature 533(7601), 73\u201376 (2016). https:\/\/doi.org\/10.1038\/nature17439","journal-title":"Nature"},{"issue":"5760","key":"6_CR17","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1126\/science.1114736","volume":"311","author":"AJ Ragauskas","year":"2006","unstructured":"Ragauskas, A.J., et al.: The path forward for biofuels and biomaterials. Science 311(5760), 484\u2013489 (2006). https:\/\/doi.org\/10.1126\/science.1114736","journal-title":"Science"},{"issue":"11","key":"6_CR18","doi-asserted-by":"publisher","first-page":"1501","DOI":"10.1007\/s11837-013-0755-4","volume":"65","author":"JE Saal","year":"2013","unstructured":"Saal, J.E., Kirklin, S., Aykol, M., Meredig, B., Wolverton, C.: Materials design and discovery with high-throughput density functional theory: the open quantum materials database (OQMD). JOM 65(11), 1501\u20131509 (2013). https:\/\/doi.org\/10.1007\/s11837-013-0755-4","journal-title":"JOM"},{"key":"6_CR19","doi-asserted-by":"publisher","DOI":"10.1016\/j.mtphys.2024.101560","volume":"48","author":"J Schmidt","year":"2024","unstructured":"Schmidt, J., et al.: Improving machine-learning models in materials science through large datasets. Mater. Today Phys. 48, 101560 (2024). https:\/\/doi.org\/10.1016\/j.mtphys.2024.101560","journal-title":"Mater. Today Phys."},{"issue":"8022","key":"6_CR20","doi-asserted-by":"publisher","first-page":"755","DOI":"10.1038\/s41586-024-07566-y","volume":"631","author":"I Shumailov","year":"2024","unstructured":"Shumailov, I., Shumaylov, Z., Zhao, Y., Papernot, N., Anderson, R., Gal, Y.: AI models collapse when trained on recursively generated data. Nature 631(8022), 755\u2013759 (2024). https:\/\/doi.org\/10.1038\/s41586-024-07566-y","journal-title":"Nature"},{"key":"6_CR21","doi-asserted-by":"publisher","unstructured":"Thelen, F., Zehl, R., Limani, N., Schuhmann, W., Ludwig, A.: High-throughput SECCM and EDX data for the hydrogen evolution reaction in ag-au-pd-pt-ru and ag-au-pd-pt-rh thin-film materials libraries (2025). https:\/\/doi.org\/10.5281\/zenodo.14959252","DOI":"10.5281\/zenodo.14959252"},{"key":"6_CR22","doi-asserted-by":"publisher","unstructured":"Thelen, F., Zehl, R., Zerdoumi, R., B\u00fcrgel, J.L., Schuhmann, W., Ludwig, A.: Dataset - accelerating combinatorial electrocatalyst discovery with Bayesian optimization: a case study in the quaternary system ni-pd-pt-ru for the oxygen evolution reaction (2025). https:\/\/doi.org\/10.5281\/zenodo.14891704","DOI":"10.5281\/zenodo.14891704"},{"issue":"4","key":"6_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.patter.2022.100488","volume":"3","author":"A Trewartha","year":"2022","unstructured":"Trewartha, A., et al.: Quantifying the advantage of domain-specific pre-training on named entity recognition tasks in materials science. Patterns 3(4), 100488 (2022). https:\/\/doi.org\/10.1016\/j.patter.2022.100488","journal-title":"Patterns"},{"issue":"7763","key":"6_CR24","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1038\/s41586-019-1335-8","volume":"571","author":"V Tshitoyan","year":"2019","unstructured":"Tshitoyan, V., et al.: Unsupervised word embeddings capture latent knowledge from materials science literature. Nature 571(7763), 95\u201398 (2019). https:\/\/doi.org\/10.1038\/s41586-019-1335-8","journal-title":"Nature"},{"issue":"5","key":"6_CR25","doi-asserted-by":"publisher","first-page":"299","DOI":"10.1002\/adem.200300567","volume":"6","author":"JW Yeh","year":"2004","unstructured":"Yeh, J.W., et al.: Nanostructured high-entropy alloys with multiple principal elements: novel alloy design concepts and outcomes. Adv. Eng. Mater. 6(5), 299\u2013303 (2004). https:\/\/doi.org\/10.1002\/adem.200300567","journal-title":"Adv. Eng. Mater."},{"key":"6_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.coelec.2024.101590","volume":"48","author":"R Zerdoumi","year":"2024","unstructured":"Zerdoumi, R., Ludwig, A., Schuhmann, W.: High entropy intermetallic compounds: a discovery platform for structure\u2013property correlations and materials design principles in electrocatalysis. Curr. Opin. Electrochem. 48, 101590 (2024). https:\/\/doi.org\/10.1016\/j.coelec.2024.101590","journal-title":"Curr. Opin. Electrochem."},{"key":"6_CR27","doi-asserted-by":"publisher","unstructured":"Zhang, L., Stricker, M.: MatNexus: a comprehensive text mining and analysis suite for materials discovery. SoftwareX 26, 101654 (2024). https:\/\/doi.org\/10.1016\/j.softx.2024.101654","DOI":"10.1016\/j.softx.2024.101654"},{"key":"6_CR28","unstructured":"Zhang, L., Stricker, M.: Code for \u201citerative corpus refinement for materials property prediction based on scientific texts\u201d (2025). https:\/\/github.com\/lab-mids\/word_embedding_paper_selection"},{"key":"6_CR29","unstructured":"Zhang, L., Stricker, M.: Electrocatalyst discovery through text mining and multi-objective optimization (2025). https:\/\/arxiv.org\/abs\/2502.20860"},{"key":"6_CR30","unstructured":"Zhang, Y., et al.: Siren\u2019s song in the AI ocean: a survey on hallucination in large language models. arXiv:2309.01219 (2023)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Applied Data Science Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-06118-8_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T11:23:38Z","timestamp":1759058618000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-06118-8_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,29]]},"ISBN":["9783032061171","9783032061188"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-06118-8_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,29]]},"assertion":[{"value":"29 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Porto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecmlpkdd.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}