{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T08:18:03Z","timestamp":1771316283627,"version":"3.50.1"},"reference-count":34,"publisher":"Informa UK Limited","issue":"16","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62133011"],"award-info":[{"award-number":["62133011"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62373288"],"award-info":[{"award-number":["62373288"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["www.tandfonline.com"],"crossmark-restriction":true},"short-container-title":["International Journal of Production Research"],"published-print":{"date-parts":[[2025,8,18]]},"DOI":"10.1080\/00207543.2025.2467448","type":"journal-article","created":{"date-parts":[[2025,2,19]],"date-time":"2025-02-19T04:03:11Z","timestamp":1739937791000},"page":"6037-6055","update-policy":"https:\/\/doi.org\/10.1080\/tandf_crossmark_01","source":"Crossref","is-referenced-by-count":0,"title":["A novel hybrid intelligent scheduling: integrating human feedback into reinforcement learning for adaptive preference objectives"],"prefix":"10.1080","volume":"63","author":[{"given":"Chen","family":"Ding","sequence":"first","affiliation":[{"name":"Tongji University","place":["Shanghai, People\u2019s Republic of China"]}]},{"given":"Fei","family":"Qiao","sequence":"additional","affiliation":[{"name":"Tongji University","place":["Shanghai, People\u2019s Republic of China"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0400-221X","authenticated-orcid":false,"given":"Dongyuan","family":"Wang","sequence":"additional","affiliation":[{"name":"Tongji University","place":["Shanghai, People\u2019s Republic of China"]}]},{"given":"Juan","family":"Liu","sequence":"additional","affiliation":[{"name":"Tongji University","place":["Shanghai, People\u2019s Republic of China"]}]}],"member":"301","published-online":{"date-parts":[[2025,2,19]]},"reference":[{"key":"e_1_3_4_2_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2024.2357740"},{"key":"e_1_3_4_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cie.2020.106778"},{"key":"e_1_3_4_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2023.05.017"},{"key":"e_1_3_4_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3208942"},{"key":"e_1_3_4_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2022.3209290"},{"key":"e_1_3_4_7_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2023.2246783"},{"key":"e_1_3_4_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cie.2023.109255"},{"issue":"1","key":"e_1_3_4_9_1","first-page":"2094","article-title":"Deep Reinforcement Learning with Double Q-Learning","volume":"30","author":"Hado V. H.","year":"2016","unstructured":"Hado, V. H., A. Guez, and D. Silver. 2016. \u201cDeep Reinforcement Learning with Double Q-Learning.\u201d In Thirtieth AAAI Conference on Artificial Intelligence 30 (1): 2094\u20132100.","journal-title":"In Thirtieth AAAI Conference on Artificial Intelligence"},{"key":"e_1_3_4_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10845-021-01847-3"},{"key":"e_1_3_4_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-02675-6_46"},{"key":"e_1_3_4_12_1","first-page":"6152","article-title":"PEBBLE: Feedback-Efficient Interactive Reinforcement Learning via Relabeling Experience and Unsupervised Pre-Training","volume":"139","author":"Lee K.","year":"2022","unstructured":"Lee, K., L. Smith, and P. Abbeel. 2022. \u201cPEBBLE: Feedback-Efficient Interactive Reinforcement Learning via Relabeling Experience and Unsupervised Pre-Training.\u201d International Conference on Machine Learning 139:6152\u20136163.","journal-title":"International Conference on Machine Learning"},{"key":"e_1_3_4_13_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2022.2058432"},{"key":"e_1_3_4_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cor.2023.106294"},{"key":"e_1_3_4_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cie.2021.107489"},{"key":"e_1_3_4_16_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2018.1524165"},{"key":"e_1_3_4_17_1","article-title":"Playing Atari with Deep Reinforcement Learning","author":"Mnih V.","year":"2013","unstructured":"Mnih, V., K. Kavukcuoglu, D. Silver, A. Graves, et al. 2013. \u201cPlaying Atari with Deep Reinforcement Learning.\u201d Computer Science.","journal-title":"Computer Science"},{"issue":"1","key":"e_1_3_4_18_1","first-page":"1","article-title":"Deep Reinforcement Learning for Solving Steelmaking-Continuous Casting Scheduling Problems Under Time-of-Use Tariffs","volume":"62","author":"Pan R.","year":"2023","unstructured":"Pan, R., Q. Wang, J. Cao, and C. Zhou. 2023. \u201cDeep Reinforcement Learning for Solving Steelmaking-Continuous Casting Scheduling Problems Under Time-of-Use Tariffs.\u201d International Journal of Production Research 62\u00a0(1): 1\u201317.","journal-title":"International Journal of Production Research"},{"key":"e_1_3_4_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2022.105277"},{"key":"e_1_3_4_20_1","first-page":"2085","volume-title":"Proceedings of the 17th International Conference on Autonomous Agents and Multi-Agent Systems","author":"Peter S.","year":"2018","unstructured":"Peter, S., L. Guy, G. Audrunas, C. W. Marian, et al. 2018. \u201cValue-decomposition Networks for Cooperative Multi-Agent Learning Based on Team Reward.\u201d Proceedings of the 17th International Conference on Autonomous Agents and Multi-Agent Systems, 2085\u20132087."},{"key":"e_1_3_4_21_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2021.1968526"},{"key":"e_1_3_4_22_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2022.2138611"},{"key":"e_1_3_4_23_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2024.2335663"},{"key":"e_1_3_4_24_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2023.2245918"},{"key":"e_1_3_4_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10845-018-1454-3"},{"key":"e_1_3_4_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.120268"},{"key":"e_1_3_4_27_1","first-page":"1083","article-title":"Composite Rules Selection Using Reinforcement Learning for Dynamic Job-Shop Scheduling","volume":"1","author":"Wei Y.","year":"2004","unstructured":"Wei, Y., and M. Zhao. 2004. \u201cComposite Rules Selection Using Reinforcement Learning for Dynamic Job-Shop Scheduling.\u201d IEEE Conference on Robotics, Automation and Mechatronics 1:1083\u20131088.","journal-title":"IEEE Conference on Robotics, Automation and Mechatronics"},{"key":"e_1_3_4_28_1","unstructured":"Wu J. L. Ouyang D. M. Ziegler N. Stiennon R. Lowe J. Leike and P. Christiano. 2021. \"Recursively Summarizing Books with Human Feedback.\" arXiv preprint arXiv:2109.10862."},{"key":"e_1_3_4_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2021.107815"},{"key":"e_1_3_4_30_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2021.1943037"},{"key":"e_1_3_4_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jmsy.2023.09.009"},{"key":"e_1_3_4_32_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2012.751509"},{"key":"e_1_3_4_33_1","first-page":"6339","article-title":"Leveraging Human Guidance for Deep Reinforcement Learning Tasks","author":"Zhang R.","year":"2020","unstructured":"Zhang, R., F. Torabi, L. Guan, D. H. Ballard, and P. Stone. 2020. \u201cLeveraging Human Guidance for Deep Reinforcement Learning Tasks.\u201d In International Joint Conference on Artificial Intelligence, 6339\u20136346.","journal-title":"In International Joint Conference on Artificial Intelligence"},{"key":"e_1_3_4_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2022.102412"},{"key":"e_1_3_4_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.111843"}],"container-title":["International Journal of Production Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.tandfonline.com\/doi\/pdf\/10.1080\/00207543.2025.2467448","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T07:50:09Z","timestamp":1771314609000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.tandfonline.com\/doi\/full\/10.1080\/00207543.2025.2467448"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,19]]},"references-count":34,"journal-issue":{"issue":"16","published-print":{"date-parts":[[2025,8,18]]}},"alternative-id":["10.1080\/00207543.2025.2467448"],"URL":"https:\/\/doi.org\/10.1080\/00207543.2025.2467448","relation":{},"ISSN":["0020-7543","1366-588X"],"issn-type":[{"value":"0020-7543","type":"print"},{"value":"1366-588X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,19]]},"assertion":[{"value":"The publishing and review policy for this title is described in its Aims & Scope.","order":1,"name":"peerreview_statement","label":"Peer Review Statement"},{"value":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tprs20","URL":"http:\/\/www.tandfonline.com\/action\/journalInformation?show=aimsScope&journalCode=tprs20","order":2,"name":"aims_and_scope_url","label":"Aim & Scope"},{"value":"2024-06-29","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-02-05","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-02-19","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}