{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T04:49:54Z","timestamp":1747975794256,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":31,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819605699"},{"type":"electronic","value":"9789819605705"}],"license":[{"start":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T00:00:00Z","timestamp":1732924800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T00:00:00Z","timestamp":1732924800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-0570-5_32","type":"book-chapter","created":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T00:46:02Z","timestamp":1732927562000},"page":"441-456","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Nob-MIAs: Non-biased Membership Inference Attacks Assessment on\u00a0Large Language Models with\u00a0Ex-Post Dataset Construction"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3026-1749","authenticated-orcid":false,"given":"C\u00e9dric","family":"Eichler","sequence":"first","affiliation":[]},{"given":"Nathan","family":"Champeil","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7537-295X","authenticated-orcid":false,"given":"Nicolas","family":"Anciaux","sequence":"additional","affiliation":[]},{"given":"Alexandra","family":"Bensamoun","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8059-7094","authenticated-orcid":false,"given":"H\u00e9ber","family":"H. Arcolezi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4023-3197","authenticated-orcid":false,"given":"Jos\u00e9 Maria","family":"De Fuentes","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,30]]},"reference":[{"key":"32_CR1","unstructured":"Biderman, S., et al.: Pythia: a suite for analyzing large language models across training and scaling. In: Proceedings of the 40th International Conference on Machine Learning. ICML 2023, JMLR.org (2023)"},{"key":"32_CR2","unstructured":"Carlini, N., et\u00a0al.: Extracting training data from large language models. In: 30th USENIX Security Symposium (USENIX Security 2021), pp. 2633\u20132650 (2021)"},{"key":"32_CR3","doi-asserted-by":"crossref","unstructured":"Chang, K.K., Cramer, M., Soni, S., Bamman, D.: Speak, memory: an archaeology of books known to chatgpt\/gpt-4. arXiv preprint arXiv:2305.00118 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.453"},{"key":"32_CR4","unstructured":"Cheng, J., Marone, M., Weller, O., Lawrie, D., Khashabi, D., Van\u00a0Durme, B.: Dated data: Tracing knowledge cutoffs in large language models. arXiv preprint arXiv:2403.12958 (2024)"},{"key":"32_CR5","unstructured":"Computer, T.: Redpajama-data: An open source recipe to reproduce llama training dataset (2023). https:\/\/github.com\/togethercomputer\/RedPajama-Data"},{"key":"32_CR6","unstructured":"Das, D., Zhang, J., Tram\u00e8r, F.: Blind baselines beat membership inference attacks for foundation models. arXiv preprint arXiv:2406.16201 (2024)"},{"key":"32_CR7","unstructured":"Duan, M., et al.: Do membership inference attacks work on large language models? arXiv preprint arXiv:2402.07841 (2024)"},{"key":"32_CR8","unstructured":"Gailly, J.l., Adler, M.: Zlib compression library (2004)"},{"key":"32_CR9","doi-asserted-by":"crossref","unstructured":"Galli, F., Melis, L., Cucinotta, T.: Noisy neighbors: Efficient membership inference attacks against llms. arXiv preprint arXiv:2406.16565 (2024)","DOI":"10.18653\/v1\/2024.privatenlp-1.1"},{"key":"32_CR10","unstructured":"Gao, L., et al.: The pile: An 800gb dataset of diverse text for language modeling (2020)"},{"key":"32_CR11","unstructured":"Geng, X., Liu, H.: Openllama: An open reproduction of llama (May 2023). https:\/\/github.com\/openlm-research\/open_llama"},{"key":"32_CR12","unstructured":"Groeneveld, D., Ha, C., Magnusson, I.: Bff: The big friendly filter (2023). https:\/\/github.com\/allenai\/bff"},{"key":"32_CR13","doi-asserted-by":"crossref","unstructured":"Jedrzejewski, F.V., Thode, L., Fischbach, J., Gorschek, T., Mendez, D., Lavesson, N.: Adversarial machine learning in industry: a systematic literature review. Comput. Sec., 103988 (2024)","DOI":"10.1016\/j.cose.2024.103988"},{"key":"32_CR14","unstructured":"Kaneko, M., Ma, Y., Wata, Y., Okazaki, N.: Sampling-based pseudo-likelihood for membership inference attacks. arXiv preprint arXiv:2404.11262 (2024)"},{"key":"32_CR15","unstructured":"Li, H., et al.: Digger: Detecting copyright content mis-usage in large language model training. arXiv preprint arXiv:2401.00676 (2024)"},{"key":"32_CR16","unstructured":"Lin, C.Y.: Rouge: a package for automatic evaluation of summaries. In: Text summarization branches out, pp. 74\u201381 (2004)"},{"key":"32_CR17","doi-asserted-by":"crossref","unstructured":"Liu, X., et al.: Shield: Evaluation and defense strategies for copyright compliance in llm text generation. arXiv preprint arXiv:2406.12975 (2024)","DOI":"10.18653\/v1\/2024.emnlp-main.98"},{"key":"32_CR18","unstructured":"Maini, P., Jia, H., Papernot, N., Dziedzic, A.: Llm dataset inference: Did you train on my dataset? arXiv preprint arXiv:2406.06443 (2024)"},{"key":"32_CR19","unstructured":"Meeus, M., Jain, S., Rei, M., de\u00a0Montjoye, Y.: Did the neurons read your book? document-level membership inference for large language models. In: Balzarotti, D., Xu, W. (eds.) 33rd USENIX Security Symposium, USENIX Security 2024, Philadelphia, PA, USA, 14-16 August 2024. USENIX Association (2024)"},{"key":"32_CR20","unstructured":"Meeus, M., Jain, S., Rei, M., de\u00a0Montjoye, Y.A.: Inherent challenges of post-hoc membership inference for large language models. arXiv preprint arXiv:2406.17975 (2024)"},{"key":"32_CR21","unstructured":"Meeus, M., Shilov, I., Faysse, M., de\u00a0Montjoye, Y.A.: Copyright traps for large language models. In: 41st International Conference on Machine Learning (2024)"},{"key":"32_CR22","unstructured":"Panaitescu-Liess, M.A., et al.: Can watermarking large language models prevent copyrighted text generation and hide training data? arXiv preprint arXiv:2407.17417 (2024)"},{"key":"32_CR23","unstructured":"Rae, J.W., Potapenko, A., Jayakumar, S.M., Lillicrap, T.P.: Compressive transformers for long-range sequence modelling. arXiv preprint arXiv:1911.05507 (2019)"},{"key":"32_CR24","unstructured":"Reuel, A., et\u00a0al.: Open problems in technical ai governance. arXiv preprint arXiv:2407.14981 (2024)"},{"key":"32_CR25","unstructured":"Shi, W., et al.: Detecting pretraining data from large language models. In: The Twelfth International Conference on Learning Representations (2024)"},{"key":"32_CR26","doi-asserted-by":"crossref","unstructured":"Shokri, R., Stronati, M., Song, C., Shmatikov, V.: Membership inference attacks against machine learning models. In: 2017 IEEE Symposium on Security and Privacy (SP), pp. 3\u201318. IEEE (2017)","DOI":"10.1109\/SP.2017.41"},{"key":"32_CR27","unstructured":"Sonkar, S., Baraniuk, R.G.: Many-shot regurgitation (msr) prompting. arXiv preprint arXiv:2405.08134 (2024)"},{"key":"32_CR28","unstructured":"Touvron, H., et\u00a0al.: Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)"},{"key":"32_CR29","doi-asserted-by":"crossref","unstructured":"Wei, J.T.Z., Wang, R.Y., Jia, R.: Proving membership in llm pretraining data via data watermarks. arXiv preprint arXiv:2402.10892 (2024)","DOI":"10.18653\/v1\/2024.findings-acl.788"},{"key":"32_CR30","unstructured":"Yan, B., et al.: On protecting the data privacy of large language models (llms): A survey. arXiv preprint arXiv:2403.05156 (2024)"},{"key":"32_CR31","doi-asserted-by":"crossref","unstructured":"Yeom, S., Giacomelli, I., Fredrikson, M., Jha, S.: Privacy risk in machine learning: Analyzing the connection to overfitting. In: 2018 IEEE 31st Computer Security Foundations Symposium (CSF), pp. 268\u2013282. IEEE (2018)","DOI":"10.1109\/CSF.2018.00027"}],"container-title":["Lecture Notes in Computer Science","Web Information Systems Engineering \u2013 WISE 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-0570-5_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T01:08:42Z","timestamp":1732928922000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-0570-5_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,30]]},"ISBN":["9789819605699","9789819605705"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-0570-5_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,30]]},"assertion":[{"value":"30 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"WISE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Web Information Systems Engineering","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Doha","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Qatar","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"wise2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/wise2024-qatar.com\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}