{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T18:13:11Z","timestamp":1772043191413,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":16,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819699001","type":"print"},{"value":"9789819699018","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-9901-8_36","type":"book-chapter","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T07:58:40Z","timestamp":1753257520000},"page":"435-447","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Fine-Grained Annotation and Multi-objective Optimization Based RLHF"],"prefix":"10.1007","author":[{"given":"Junjiang","family":"Wu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhe","family":"Xue","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yawen","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yudian","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingzhao","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junping","family":"Du","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,7,24]]},"reference":[{"key":"36_CR1","unstructured":"Bai, Y., Jones, A., Ndousse, K., et al.: Training a helpful and harmless assistant with reinforcement learning from human feedback. arXiv preprint arXiv:2204.05862 (2022)"},{"key":"36_CR2","unstructured":"Dai, J., Pan, X., Sun, R., et al.: Safe RLHF: safe reinforcement learning from human feedback. arXiv preprint arXiv:2310.12773 (2023)"},{"key":"36_CR3","doi-asserted-by":"crossref","unstructured":"Xie, R., Zhang, S., Wang, R., et al.: Hierarchical reinforcement learning for integrated recommendation. In: Proceedings of the AAAI Conference on Artificial Intelligence. 35(5), 4521\u20134528 (2021)","DOI":"10.1609\/aaai.v35i5.16580"},{"key":"36_CR4","first-page":"3008","volume":"33","author":"N Stiennon","year":"2020","unstructured":"Stiennon, N., Ouyang, L., Wu, J., et al.: Learning to summarize with human feedback. Adv. Neural. Inf. Process. Syst. 33, 3008\u20133021 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"36_CR5","unstructured":"Ganguli, D., Huang, S., Lovitt, L., et al.: Collective constitutional AI: aligning a language model with public input (2024). Accessed February 2023"},{"key":"36_CR6","first-page":"81773","volume":"37","author":"C Ye","year":"2024","unstructured":"Ye, C., Xiong, W., Zhang, Y., et al.: Online iterative reinforcement learning from human feedback with general preference model. Adv. Neural. Inf. Process. Syst. 37, 81773\u201381807 (2024)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"36_CR7","first-page":"59008","volume":"36","author":"Z Wu","year":"2023","unstructured":"Wu, Z., Hu, Y., Shi, W., et al.: Fine-grained human feedback gives better rewards for language model training. Adv. Neural. Inf. Process. Syst. 36, 59008\u201359033 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"36_CR8","unstructured":"Cui, G., Yuan, L., Ding, N., et al.: UltraFeedback: boosting language models with high-quality feedback (2023)"},{"key":"36_CR9","doi-asserted-by":"crossref","unstructured":"Bradley, R.A., Terry, M.E.: Rank analysis of incomplete block designs: I. The method of paired comparisons. Biometrika 39(3\/4), 324\u2013345 (1952)","DOI":"10.1093\/biomet\/39.3-4.324"},{"key":"36_CR10","unstructured":"Jaques, N., Ghandeharioun, A., Shen, J.H., et al.: Way off-policy batch deep reinforcement learning of implicit human preferences in dialog. arXiv preprint arXiv:1907.00456 (2019)"},{"key":"36_CR11","unstructured":"Laidlaw, C., Singhal, S., Dragan, A.: Preventing reward hacking with occupancy measure regularization (2023)"},{"key":"36_CR12","unstructured":"Lee, Y., Lee, K., Park, S., et al:. QASA: advanced question answering on scientific articles. In: International Conference on Machine Learning, pp. 19036\u201319052. PMLR (2023)"},{"key":"36_CR13","unstructured":"Wang, Z., Dong, Y., Zeng, J., et al.: HelpSteer: multi-dimension helpfulness dataset for SteerLM. arXiv preprint arXiv:2311.09528 (2023)"},{"key":"36_CR14","doi-asserted-by":"crossref","unstructured":"Jiang, D., Ren, X., Lin, B.Y.: LLM-blender: ensembling large language models with pairwise ranking and generative fusion. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp.14165\u201314178 (2023)","DOI":"10.18653\/v1\/2023.acl-long.792"},{"key":"36_CR15","unstructured":"Lambert, N., Morrison, J., Pyatkin, V., et al.: Tulu 3: pushing frontiers in open language model post-training. arXiv preprint arXiv:2411.15124 (2024)"},{"key":"36_CR16","unstructured":"Wang, B., Zheng, R., Chen, L., et al.: Secrets of rlhf in large language models part II: reward modeling. arXiv preprint arXiv:2401.06080 (2024)"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-9901-8_36","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T22:17:41Z","timestamp":1753309061000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-9901-8_36"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819699001","9789819699018"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-9901-8_36","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"24 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ningbo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/icg\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}