{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:04:45Z","timestamp":1742911485786,"version":"3.40.3"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031723490"},{"type":"electronic","value":"9783031723506"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-72350-6_29","type":"book-chapter","created":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T12:14:50Z","timestamp":1726661690000},"page":"423-432","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing LM\u2019s Task Adaptability: Powerful Post-training Framework with\u00a0Reinforcement Learning from\u00a0Model Feedback"],"prefix":"10.1007","author":[{"given":"Fuju","family":"Rong","sequence":"first","affiliation":[]},{"given":"Weihao","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Zhuo","family":"Deng","sequence":"additional","affiliation":[]},{"given":"Zheng","family":"Gong","sequence":"additional","affiliation":[]},{"given":"Chucheng","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Wenze","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zhiyuan","family":"Niu","sequence":"additional","affiliation":[]},{"given":"Fang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Lan","family":"Ma","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,17]]},"reference":[{"doi-asserted-by":"publisher","unstructured":"Cer, D., Diab, M., Agirre, E., Lopez-Gazpio, I., Specia, L.: SemEval-2017 task 1: semantic textual similarity multilingual and cross lingual focused evaluation. In: Proceedings of the 11th International Workshop on Semantic Evaluation (SemEval-2017), pp. 1\u201314. Association for Computational Linguistics, Vancouver, Canada (2017). https:\/\/doi.org\/10.18653\/v1\/S17-2001, https:\/\/aclanthology.org\/S17-2001","key":"29_CR1","DOI":"10.18653\/v1\/S17-2001"},{"key":"29_CR2","doi-asserted-by":"publisher","first-page":"2400","DOI":"10.1109\/TASLP.2020.3013392","volume":"28","author":"Z Chen","year":"2020","unstructured":"Chen, Z., Chen, L., Liu, X., Yu, K.: Distributed structured actor-critic reinforcement learning for universal dialogue management. IEEE\/ACM Trans. Audio, Speech Language Process. 28, 2400\u20132411 (2020). https:\/\/doi.org\/10.1109\/TASLP.2020.3013392","journal-title":"IEEE\/ACM Trans. Audio, Speech Language Process."},{"key":"29_CR3","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1007\/11736790_9","volume-title":"Machine Learning Challenges. Evaluating Predictive Uncertainty, Visual Object Classification, and Recognising Tectual Entailment","author":"I Dagan","year":"2006","unstructured":"Dagan, I., Glickman, O., Magnini, B.: The PASCAL recognising textual entailment challenge. In: Qui\u00f1onero-Candela, J., Dagan, I., Magnini, B., d\u2019Alch\u00e9-Buc, F. (eds.) Machine Learning Challenges. Evaluating Predictive Uncertainty, Visual Object Classification, and Recognising Tectual Entailment, pp. 177\u2013190. Springer, Berlin, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11736790_9"},{"doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Burstein, J., Doran, C., Solorio, T. (eds.) Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2-7, 2019, Volume 1 (Long and Short Papers), pp. 4171\u20134186. Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/v1\/n19-1423, https:\/\/doi.org\/10.18653\/v1\/n19-1423","key":"29_CR4","DOI":"10.18653\/v1\/n19-1423"},{"unstructured":"Dolan, W.B., Brockett, C.: Automatically constructing a corpus of sentential paraphrases. In: Proceedings of the Third International Workshop on Paraphrasing (IWP2005) (2005). https:\/\/aclanthology.org\/I05-5002","key":"29_CR5"},{"doi-asserted-by":"crossref","unstructured":"Gell\u00e9rt, Weisz, Pawe\u0142, Budzianowski, Pei-Hao, Su, Milica, Ga\u0161i\u0107: Sample efficient deep reinforcement learning for dialogue systems with large action spaces. IEEE\/ACM Trans. Audio Speech Lang. Process. (2018)","key":"29_CR6","DOI":"10.1109\/TASLP.2018.2851664"},{"doi-asserted-by":"crossref","unstructured":"Han, J., Hong, T., Kim, B., Ko, Y., Seo, J.: Fine-grained post-training for improving retrieval-based dialogue systems. In: Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (2021)","key":"29_CR7","DOI":"10.18653\/v1\/2021.naacl-main.122"},{"unstructured":"Iyer, S., Kota, S., Boyd-Graber, J.: First quora dataset release: Question pairs. https:\/\/data.quora.com\/First-Quora-Dataset-Release-Question-Pairs (2017)","key":"29_CR8"},{"unstructured":"Jang, Y., Lee, J., Kim, K.E.: GPT-Critic: offline reinforcement learning for end-to-end task-oriented dialogue systems. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=qaxhBG1UUaS","key":"29_CR9"},{"unstructured":"Maas, A.L., Daly, R.E., Pham, P.T., Huang, D., Ng, A.Y., Potts, C.: Learning word vectors for sentiment analysis. In: Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies, pp. 142\u2013150. Association for Computational Linguistics, Portland, Oregon, USA (2011). https:\/\/aclanthology.org\/P11-1015","key":"29_CR10"},{"unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback. In: NeurIPS (2022). http:\/\/papers.nips.cc\/paper_files\/paper\/2022\/hash\/b1efde53be364a73914f58805a001731-Abstract-Conference.html","key":"29_CR11"},{"unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. CoRR abs\/1707.06347 (2017). http:\/\/arxiv.org\/abs\/1707.06347","key":"29_CR12"},{"unstructured":"Socher, R., et al.: Recursive deep models for semantic compositionality over a sentiment treebank. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing, pp. 1631\u20131642. Association for Computational Linguistics, Seattle, Washington, USA (2013). https:\/\/aclanthology.org\/D13-1170","key":"29_CR13"},{"issue":"2","key":"29_CR14","doi-asserted-by":"publisher","first-page":"1543","DOI":"10.1007\/s10462-022-10205-5","volume":"56","author":"V Uc-Cetina","year":"2023","unstructured":"Uc-Cetina, V., Navarro-Guerrero, N., Martin-Gonzalez, A., Weber, C., Wermter, S.: Survey on reinforcement learning for language processing. Artif. Intell. Rev. 56(2), 1543\u20131575 (2023)","journal-title":"Artif. Intell. Rev."},{"unstructured":"Wang, W., Bansal, M.: Multi-task learning for multiple choice question answering. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 41\u201351 (2018)","key":"29_CR15"},{"key":"29_CR16","doi-asserted-by":"publisher","first-page":"625","DOI":"10.1162\/tacl_a_00290","volume":"7","author":"A Warstadt","year":"2019","unstructured":"Warstadt, A., Singh, A., Bowman, S.R.: Neural network acceptability judgments. Trans. Assoc. Comput. Linguist. 7, 625\u2013641 (2019)","journal-title":"Trans. Assoc. Comput. Linguist."},{"doi-asserted-by":"crossref","unstructured":"Whang, T., Lee, D., Lee, C., Yang, K., Oh, D., Lim, H.: An effective domain adaptive post-training method for BERT in response selection. In: Conference of the International Speech Communication Association (2020)","key":"29_CR17","DOI":"10.21437\/Interspeech.2020-2153"},{"doi-asserted-by":"crossref","unstructured":"Williams, A., Nangia, N., Bowman, S.R.: A broad-coverage challenge corpus for sentence understanding through inference (2017)","key":"29_CR18","DOI":"10.18653\/v1\/N18-1101"},{"doi-asserted-by":"publisher","unstructured":"Xing, Y., Bian, C.: Text similarity based on post-training BERT. In: 2021 International Conference on Intelligent Computing, Automation and Applications (ICAA), pp. 279\u2013282 (2021). https:\/\/doi.org\/10.1109\/ICAA53760.2021.00058","key":"29_CR19","DOI":"10.1109\/ICAA53760.2021.00058"},{"unstructured":"Xu, H., Liu, B., Shu, L., Yu, P.S.: Bert post-training for review reading comprehension and aspect-based sentiment analysis (2019)","key":"29_CR20"},{"issue":"2","key":"29_CR21","doi-asserted-by":"publisher","first-page":"733","DOI":"10.1007\/s11280-022-01083-6","volume":"26","author":"M Xu","year":"2022","unstructured":"Xu, M., Peng, M., Liu, F.: Text style transfer between classical and modern Chinese through prompt-based reinforcement learning. World Wide Web 26(2), 733\u2013750 (2022)","journal-title":"World Wide Web"},{"doi-asserted-by":"crossref","unstructured":"Xu, R., Tao, C., Jiang, D., Zhao, X., Zhao, D., Yan, R.: Learning an effective context-response matching model with self-supervised tasks for retrieval-based dialogues (2020)","key":"29_CR22","DOI":"10.1609\/aaai.v35i16.17666"},{"unstructured":"Yang, K., Lee, D., Whang, T., Lee, S., Lim, H.: EmotionX-KU: BERT-max based contextual emotion classifier (2019)","key":"29_CR23"},{"doi-asserted-by":"crossref","unstructured":"Ye, D., et al.: Mastering complex control in MOBA games with deep reinforcement learning. In: The Thirty-Fourth AAAI Conference on Artificial Intelligence, AAAI 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, IAAI 2020, The Tenth AAAI Symposium on Educational Advances in Artificial Intelligence, EAAI 2020, New York, NY, USA, February 7-12, 2020, pp. 6672\u20136679. AAAI Press (2020). https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/6144","key":"29_CR24","DOI":"10.1609\/aaai.v34i04.6144"},{"unstructured":"Zhang, X., Zhao, J., Lecun, Y.: Character-level convolutional networks for text classification. MIT Press (2015)","key":"29_CR25"},{"unstructured":"Zhuang, L., Wayne, L., Ya, S., Jun, Z.: A robustly optimized BERT pre-training approach with post-training. In: Proceedings of the 20th Chinese National Conference on Computational Linguistics, pp. 1218\u20131227. Chinese Information Processing Society of China, Huhhot, China (2021). https:\/\/aclanthology.org\/2021.ccl-1.108","key":"29_CR26"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks and Machine Learning \u2013 ICANN 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72350-6_29","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T12:20:00Z","timestamp":1726662000000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72350-6_29"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031723490","9783031723506"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72350-6_29","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"17 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lugano","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Switzerland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"33","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icann2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}