{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T13:10:55Z","timestamp":1774703455101,"version":"3.50.1"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031773662","type":"print"},{"value":"9783031773679","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T00:00:00Z","timestamp":1731628800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T00:00:00Z","timestamp":1731628800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-77367-9_27","type":"book-chapter","created":{"date-parts":[[2024,11,16]],"date-time":"2024-11-16T04:59:22Z","timestamp":1731733162000},"page":"351-366","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["VickreyFeedback: Cost-Efficient Data Construction for\u00a0Reinforcement Learning from\u00a0Human Feedback"],"prefix":"10.1007","author":[{"given":"Guoxi","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiuding","family":"Duan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,15]]},"reference":[{"key":"27_CR1","unstructured":"Bai, Y., et\u00a0al.: Training a helpful and harmless assistant with reinforcement learning from human feedback. arXiv preprint arXiv:2204.05862 (2022)"},{"key":"27_CR2","unstructured":"Bartolome, A., Martin, G., Vila, D.: Notus (2023). https:\/\/github.com\/argilla-io\/notus"},{"issue":"3","key":"27_CR3","doi-asserted-by":"publisher","first-page":"467","DOI":"10.1287\/mnsc.1040.0329","volume":"51","author":"RR Chen","year":"2005","unstructured":"Chen, R.R., Roundy, R.O., Zhang, R.Q., Janakiraman, G.: Efficient auction mechanisms for supply chain procurement. Manage. Sci. 51(3), 467\u2013482 (2005)","journal-title":"Manage. Sci."},{"key":"27_CR4","unstructured":"Cui, G., et al.: UltraFeedback: boosting language models with high-quality feedback (2024)"},{"key":"27_CR5","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"840","DOI":"10.1007\/978-3-319-57529-2_65","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"J Duan","year":"2017","unstructured":"Duan, J., Li, J., Baba, Y., Kashima, H.: A generalized model for multidimensional intransitivity. In: Kim, J., Shim, K., Cao, L., Lee, J.-G., Lin, X., Moon, Y.-S. (eds.) PAKDD 2017. LNCS (LNAI), vol. 10235, pp. 840\u2013852. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-57529-2_65"},{"key":"27_CR6","doi-asserted-by":"crossref","unstructured":"Duetting, P., Mirrokni, V., Paes\u00a0Leme, R., Xu, H., Zuo, S.: Mechanism design for large language models. In: Proceedings of the ACM on Web Conference 2024, pp. 144\u2013155 (2024)","DOI":"10.1145\/3589334.3645511"},{"key":"27_CR7","doi-asserted-by":"crossref","unstructured":"Fu, T., Cai, D., Liu, L., Shi, S., Yan, R.: Disperse-then-merge: pushing the limits of instruction tuning via alignment tax reduction. arXiv preprint arXiv:2405.13432 (2024)","DOI":"10.18653\/v1\/2024.findings-acl.175"},{"key":"27_CR8","unstructured":"Hong, J., Bhatia, K., Dragan, A.: On the sensitivity of reward inference to misspecified human models. arXiv preprint arXiv:2212.04717 (2022)"},{"key":"27_CR9","unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. In: International Conference on Learning Representations (2022)"},{"key":"27_CR10","unstructured":"Hu, S., Lu, C., Clune, J.: Automated design of agentic systems. arXiv preprint arXiv:2408.08435 (2024)"},{"key":"27_CR11","unstructured":"Hu, Y., et al.: Towards comprehensive preference data collection for reward modeling. arXiv preprint arXiv:2406.16486 (2024)"},{"key":"27_CR12","unstructured":"Ji, J., et\u00a0al.: AI alignment: a comprehensive survey. arXiv preprint arXiv:2310.19852 (2023)"},{"key":"27_CR13","unstructured":"Jiang, R., et al.: A survey on human preference learning for large language models. arXiv preprint arXiv:2406.11191 (2024)"},{"key":"27_CR14","unstructured":"Lee, A., Miranda, B., Sundar, S., Koyejo, S.: Beyond scale: the diversity coefficient as a data quality metric demonstrates LLMs are pre-trained on formally diverse data. arXiv preprint arXiv:2306.13840 (2023)"},{"key":"27_CR15","doi-asserted-by":"crossref","unstructured":"Li, D., Wang, Z., Chen, Y., Jiang, R., Ding, W., Okumura, M.: A survey on deep active learning: Recent advances and new frontiers. IEEE Trans. Neural Netw. Learn. Syst. (2024)","DOI":"10.1109\/TNNLS.2024.3396463"},{"key":"27_CR16","doi-asserted-by":"crossref","unstructured":"Li, J.: A comparative study on annotation quality of crowdsourcing and LLM via label aggregation. In: ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6525\u20136529. IEEE (2024)","DOI":"10.1109\/ICASSP48485.2024.10447803"},{"key":"27_CR17","unstructured":"Lin, Y., et al.: Mitigating the alignment tax of RLHF (2024). https:\/\/arxiv.org\/abs\/2309.06256"},{"key":"27_CR18","unstructured":"Longpre, S., et al.: The flan collection: designing data and methods for effective instruction tuning. In: Proceedings of the Fortieth International Conference on Machine Learning, pp. 22631\u201322648. Proceedings of Machine Learning Research, PMLR (2023)"},{"key":"27_CR19","unstructured":"Lu, C., Lu, C., Lange, R.T., Foerster, J., Clune, J., Ha, D.: The AI scientist: towards fully automated open-ended scientific discovery. arXiv preprint arXiv:2408.06292 (2024)"},{"key":"27_CR20","unstructured":"Mangrulkar, S., Gugger, S., Debut, L., Belkada, Y., Paul, S., Bossan, B.: PEFT: state-of-the-art parameter-efficient fine-tuning methods (2022). https:\/\/github.com\/huggingface\/peft"},{"key":"27_CR21","doi-asserted-by":"publisher","first-page":"102831","DOI":"10.1016\/j.jmateco.2023.102831","volume":"106","author":"H Matsushima","year":"2023","unstructured":"Matsushima, H., Noda, S.: Mechanism design with general ex-ante investments. J. Math. Econ. 106, 102831 (2023)","journal-title":"J. Math. Econ."},{"key":"27_CR22","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1613\/jair.2046","volume":"29","author":"N Nisan","year":"2007","unstructured":"Nisan, N., Ronen, A.: Computationally feasible VCG mechanisms. J. Artif. Intell. Res. 29, 19\u201347 (2007)","journal-title":"J. Artif. Intell. Res."},{"key":"27_CR23","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. Adv. Neural. Inf. Process. Syst. 35, 27730\u201327744 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"27_CR24","unstructured":"Rafailov, R., Sharma, A., Mitchell, E., Manning, C.D., Ermon, S., Finn, C.: Direct preference optimization: your language model is secretly a reward model. In: Advances in Neural Information Processing Systems, pp. 53728\u201353741. Curran Associates, Inc. (2023)"},{"key":"27_CR25","unstructured":"Rosset, C., Cheng, C.A., Mitra, A., Santacroce, M., Awadallah, A., Xie, T.: Direct nash optimization: teaching language models to self-improve with general preferences. arXiv preprint arXiv:2404.03715 (2024)"},{"key":"27_CR26","doi-asserted-by":"crossref","unstructured":"Shi, C., Liang, P., Wu, Y., Zhan, T., Jin, Z.: Maximizing user experience with LLMOps-driven personalized recommendation systems. arXiv preprint arXiv:2404.00903 (2024)","DOI":"10.54254\/2755-2721\/64\/20241353"},{"key":"27_CR27","first-page":"3008","volume":"33","author":"N Stiennon","year":"2020","unstructured":"Stiennon, N., et al.: Learning to summarize with human feedback. Adv. Neural. Inf. Process. Syst. 33, 3008\u20133021 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"27_CR28","unstructured":"Sun, H., Chen, Y., Wang, S., Chen, W., Deng, X.: Mechanism design for LLM fine-tuning with multiple reward models. arXiv preprint arXiv:2405.16276 (2024)"},{"key":"27_CR29","unstructured":"Touvron, H., et\u00a0al.: LLaMA 2: open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)"},{"issue":"1","key":"27_CR30","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1111\/j.1540-6261.1961.tb02789.x","volume":"16","author":"W Vickrey","year":"1961","unstructured":"Vickrey, W.: Counterspeculation, auctions, and competitive sealed tenders. J. Financ. 16(1), 8\u201337 (1961)","journal-title":"J. Financ."},{"key":"27_CR31","unstructured":"Wang, B., et\u00a0al.: Secrets of RLHF in large language models part II: reward modeling. arXiv preprint arXiv:2401.06080 (2024)"},{"key":"27_CR32","unstructured":"Wang, Z., et al.: UNA: unifying alignments of RLHF\/PPO, DPO and KTO by a generalized implicit reward function. arXiv preprint arXiv:2408.15339 (2024)"},{"key":"27_CR33","unstructured":"von Werra, L., et al.: TRL: transformer reinforcement learning (2020). https:\/\/github.com\/huggingface\/trl"},{"key":"27_CR34","unstructured":"Xu, C., et al.: WizardLM: empowering large pre-trained language models to follow complex instructions. In: Proceedings of the Twelfth International Conference on Learning Representations (2024)"},{"key":"27_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1007\/978-3-031-26412-2_3","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"G Zhang","year":"2022","unstructured":"Zhang, G., Kashima, H.: Batch reinforcement learning from crowds. In: Amini, M.R., Canu, S., Fischer, A., Guns, T., Kralj Novak, P., Tsoumakas, G. (eds.) ECML PKDD 2022. LNCS, vol. 13716, pp. 38\u201351. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-26412-2_3"},{"key":"27_CR36","unstructured":"Zhang, H., Dong, Y., Xiao, C., Oyamada, M.: Jellyfish: a large language model for data preprocessing. arXiv preprint arXiv:2312.01678 (2023)"},{"key":"27_CR37","unstructured":"Zheng, L., et al.: Judging LLM-as-a-judge with MT-bench and chatbot arena (2023)"},{"key":"27_CR38","unstructured":"Zheng, R., et\u00a0al.: Secrets of RLHF in large language models part I: PPO. arXiv preprint arXiv:2307.04964 (2023)"},{"key":"27_CR39","unstructured":"Zhou, C., et\u00a0al.: LIMA: less is more for alignment. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"27_CR40","doi-asserted-by":"crossref","unstructured":"Zhou, W., et al.: WPO: enhancing RLHF with weighted preference optimization. arXiv preprint arXiv:2406.11827 (2024)","DOI":"10.18653\/v1\/2024.emnlp-main.475"}],"container-title":["Lecture Notes in Computer Science","PRIMA 2024: Principles and Practice of Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-77367-9_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T11:43:27Z","timestamp":1733053407000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-77367-9_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,15]]},"ISBN":["9783031773662","9783031773679"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-77367-9_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,15]]},"assertion":[{"value":"15 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRIMA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Principles and Practice of Multi-Agent Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kyoto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 November 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"prima2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}