{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,20]],"date-time":"2026-07-20T20:04:12Z","timestamp":1784577852101,"version":"3.55.0"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,3,21]],"date-time":"2025-03-21T00:00:00Z","timestamp":1742515200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,3,21]],"date-time":"2025-03-21T00:00:00Z","timestamp":1742515200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1007\/s11432-023-4282-8","type":"journal-article","created":{"date-parts":[[2025,3,29]],"date-time":"2025-03-29T15:19:43Z","timestamp":1743261583000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["One model, two skills: active vision and action learning model for robotic manipulation"],"prefix":"10.1007","volume":"68","author":[{"given":"Guokang","family":"Wang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yanhong","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Huaping","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,3,21]]},"reference":[{"key":"4282_CR1","first-page":"13438","volume-title":"Proceedings of IEEE International Conference on Robotics and Automation, Xi\u2019an","author":"M Sundermeyer","year":"2021","unstructured":"Sundermeyer M, Mousavian A, Triebel R, et al. Contact-GraspNet: efficient 6-DoF grasp generation in cluttered scenes. In: Proceedings of IEEE International Conference on Robotics and Automation, Xi\u2019an, 2021. 13438\u201313444"},{"key":"4282_CR2","volume-title":"Proceedings of Robotics: Science and Systems XVII","author":"H Zhang","year":"2021","unstructured":"Zhang H, Lu Y, Yu C, et al. INVIGORATE: interactive visual grounding and grasping in clutter. In: Proceedings of Robotics: Science and Systems XVII, 2021"},{"key":"4282_CR3","first-page":"7286","volume-title":"Proceedings of IEEE International Conference on Robotics and Automation, Paris","author":"L Yen-Chen","year":"2020","unstructured":"Yen-Chen L, Zeng A, Song S, et al. Learning to see before learning to act: visual pre-training for manipulation. In: Proceedings of IEEE International Conference on Robotics and Automation, Paris, 2020. 7286\u20137293"},{"key":"4282_CR4","volume-title":"Proceedings of Robotics: Science and Systems XIV, Pittsburgh","author":"D Morrison","year":"2018","unstructured":"Morrison D, Corke P, Leitner J. Closing the loop for robotic grasping: a real-time, generative grasp synthesis approach. In: Proceedings of Robotics: Science and Systems XIV, Pittsburgh, 2018"},{"key":"4282_CR5","first-page":"4497","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"K Ehsani","year":"2021","unstructured":"Ehsani K, Han W, Herrasti A, et al. ManipulaTHOR: a framework for visual object manipulation. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021. 4497\u20134506"},{"key":"4282_CR6","first-page":"3629","volume-title":"Proceedings of International Conference on Robotics and Automation, Montreal","author":"H Liang","year":"2019","unstructured":"Liang H, Ma X, Li S, et al. PointNetGPD: detecting grasp configurations from point sets. In: Proceedings of International Conference on Robotics and Automation, Montreal, 2019. 3629\u20133635"},{"key":"4282_CR7","first-page":"13474","volume-title":"Proceedings of IEEE International Conference on Robotics and Automation, Xi\u2019an","author":"B Zhao","year":"2021","unstructured":"Zhao B, Zhang H, Lan X, et al. REGNet: region-based grasp network for end-to-end grasp detection in point clouds. In: Proceedings of IEEE International Conference on Robotics and Automation, Xi\u2019an, 2021. 13474\u201313480"},{"key":"4282_CR8","unstructured":"Zhang J, Bai C, He H, et al. SAM-E: leveraging visual foundation model with sequence imitation for embodied manipulation. 2024. ArXiv:2405.19586"},{"key":"4282_CR9","first-page":"1113","volume-title":"Proceedings of Conference on Robot Learning, Osaka","author":"C Lynch","year":"2020","unstructured":"Lynch C, Khansari M, Xiao T, et al. Learning latent plans from play. In: Proceedings of Conference on Robot Learning, Osaka, 2020. 1113\u20131132"},{"key":"4282_CR10","doi-asserted-by":"publisher","first-page":"3046","DOI":"10.1109\/LRA.2022.3144512","volume":"7","author":"R Jangir","year":"2022","unstructured":"Jangir R, Hansen N, Ghosal S, et al. Look closer: bridging egocentric and third-person views with transformers for robotic manipulation. IEEE Robot Autom Lett, 2022, 7: 3046\u20133053","journal-title":"IEEE Robot Autom Lett"},{"key":"4282_CR11","first-page":"2849","volume-title":"Proceedings of Conference on Robot Learning, Atlanta","author":"X Zhang","year":"2023","unstructured":"Zhang X, Wang D, Han S, et al. Affordance-driven next-best-view planning for robotic grasping. In: Proceedings of Conference on Robot Learning, Atlanta, 2023. 2849\u20132862"},{"key":"4282_CR12","first-page":"1411","volume-title":"Proceedings of International Conference on Intelligent Robots and Systems, Kyoto","author":"M Breyer","year":"2022","unstructured":"Breyer M, Ott L, Siegwart R, et al. Closed-loop next-best-view planning for target-driven grasping. In: Proceedings of International Conference on Intelligent Robots and Systems, Kyoto, 2022. 1411\u20131416"},{"key":"4282_CR13","first-page":"8762","volume-title":"Proceedings of International Conference on Robotics and Automation, Montreal","author":"D Morrison","year":"2019","unstructured":"Morrison D, Corke P, Leitner J. Multi-view picking: next-best-view reaching for improved grasping in clutter. In: Proceedings of International Conference on Robotics and Automation, Montreal, 2019. 8762\u20138768"},{"key":"4282_CR14","unstructured":"Zaky Y, Paruthi G, Tripp B, et al. Active perception and representation for robotic manipulation. 2020. ArXiv:2003.06734"},{"key":"4282_CR15","first-page":"422","volume-title":"Proceedings of Conference on Robot Learning, Switzerland","author":"R Cheng","year":"2018","unstructured":"Cheng R, Agarwal A, Fragkiadaki K. Reinforcement learning of active vision for manipulating objects under occlusions. In: Proceedings of Conference on Robot Learning, Switzerland, 2018. 422\u2013431"},{"key":"4282_CR16","first-page":"6023","volume-title":"Proceedings of International Conference on Robotics and Automation, Montreal","author":"T Johannink","year":"2019","unstructured":"Johannink T, Bahl S, Nair A, et al. Residual reinforcement learning for robot control. In: Proceedings of International Conference on Robotics and Automation, Montreal, 2019. 6023\u20136029"},{"key":"4282_CR17","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1145\/1553374.1553380","volume-title":"Proceedings of the 26th Annual International Conference on Machine Learning, Montreal","author":"Y Bengio","year":"2009","unstructured":"Bengio Y, Louradour J, Collobert R, et al. Curriculum learning. In: Proceedings of the 26th Annual International Conference on Machine Learning, Montreal, 2009. 41\u201348"},{"key":"4282_CR18","first-page":"2786","volume-title":"Proceedings of IEEE International Conference on Robotics and Automation, Singapore","author":"C Finn","year":"2017","unstructured":"Finn C, Levine S. Deep visual foresight for planning robot motion. In: Proceedings of IEEE International Conference on Robotics and Automation, Singapore, 2017. 2786\u20132793"},{"key":"4282_CR19","volume-title":"Proceedings of Robotics: Science and Systems XIV, Pittsburgh","author":"Y Zhu","year":"2018","unstructured":"Zhu Y, Wang Z, Merel J, et al. Reinforcement and imitation learning for diverse visuomotor skills. In: Proceedings of Robotics: Science and Systems XIV, Pittsburgh, 2018"},{"key":"4282_CR20","first-page":"9258","volume-title":"Proceedings of International Conference on Robotics and Automation, Philadelphia","author":"Y Wang","year":"2022","unstructured":"Wang Y, Wang K, Wang Y, et al. Audio-visual grounding referring expression for robotic manipulation. In: Proceedings of International Conference on Robotics and Automation, Philadelphia, 2022. 9258\u20139264"},{"key":"4282_CR21","first-page":"9800","volume-title":"Proceedings of IEEE International Conference on Robotics and Automation, London","author":"W Wang","year":"2023","unstructured":"Wang W, Li X, Dong Y, et al. Natural language instruction understanding for robotic manipulation: a multisensory perception approach. In: Proceedings of IEEE International Conference on Robotics and Automation, London, 2023. 9800\u20139806"},{"key":"4282_CR22","doi-asserted-by":"publisher","first-page":"560","DOI":"10.1109\/LRA.2022.3227873","volume":"8","author":"P M Scheikl","year":"2022","unstructured":"Scheikl P M, Tagliabue E, Gyenes B, et al. Sim-to-real transfer for visual reinforcement learning of deformable object manipulation for robot-assisted surgery. IEEE Robot Autom Lett, 2022, 8: 560\u2013567","journal-title":"IEEE Robot Autom Lett"},{"key":"4282_CR23","first-page":"8298","volume-title":"Proceedings of International Conference on Robotics and Automation, Philadelphia","author":"J Hansen","year":"2022","unstructured":"Hansen J, Hogan F, Rivkin D, et al. Visuotactile-RL: learning multimodal manipulation policies with deep reinforcement learning. In: Proceedings of International Conference on Robotics and Automation, Philadelphia, 2022. 8298\u20138304"},{"key":"4282_CR24","unstructured":"Zhang F, Leitner J, Milford M, et al. Toward vision-based deep reinforcement learning for robotic motion control. 2015. ArXiv:1511.03791"},{"key":"4282_CR25","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1109\/TMECH.2017.2717461","volume":"23","author":"Z Li","year":"2017","unstructured":"Li Z, Zhao T, Chen F, et al. Reinforcement learning of manipulation and grasping using dynamical movement primitives for a humanoidlike mobile manipulator. IEEE ASME Trans Mechatron, 2017, 23: 121\u2013131","journal-title":"IEEE ASME Trans Mechatron"},{"key":"4282_CR26","first-page":"9209","volume-title":"Proceedings of Advances in Neural Information Processing Systems, Montreal","author":"A V Nair","year":"2018","unstructured":"Nair A V, Pong V, Dalal M, et al. Visual reinforcement learning with imagined goals. In: Proceedings of Advances in Neural Information Processing Systems, Montreal, 2018. 9209\u20139220"},{"key":"4282_CR27","first-page":"8943","volume-title":"Proceedings of International Conference on Robotics and Automation, Montreal","author":"M A Lee","year":"2019","unstructured":"Lee M A, Zhu Y, Srinivasan K, et al. Making sense of vision and touch: self-supervised learning of multimodal representations for contact-rich tasks. In: Proceedings of International Conference on Robotics and Automation, Montreal, 2019. 8943\u20138950"},{"key":"4282_CR28","doi-asserted-by":"publisher","first-page":"6145","DOI":"10.1109\/LRA.2020.3010461","volume":"5","author":"A Church","year":"2020","unstructured":"Church A, Lloyd J, Hadsell R, et al. Deep reinforcement learning for tactile robotics: learning to type on a Braille keyboard. IEEE Robot Autom Lett, 2020, 5: 6145\u20136152","journal-title":"IEEE Robot Autom Lett"},{"key":"4282_CR29","doi-asserted-by":"publisher","first-page":"85265","DOI":"10.1109\/ACCESS.2021.3082947","volume":"9","author":"Y Luo","year":"2021","unstructured":"Luo Y, Dong K, Zhao L, et al. Calibration-free monocular vision-based robot manipulations with occlusion awareness. IEEE Access, 2021, 9: 85265\u201385276","journal-title":"IEEE Access"},{"key":"4282_CR30","first-page":"770","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Las Vegas","author":"K He","year":"2016","unstructured":"He K, Zhang X, Ren S, et al. Deep residual learning for image recognition. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Las Vegas, 2016. 770\u2013778"},{"key":"4282_CR31","volume-title":"Proceedings of International Conference on Learning Representations, Puerto Rico","author":"T P Lillicrap","year":"2016","unstructured":"Lillicrap T P, Hunt J J, Pritzel A, et al. Continuous control with deep reinforcement learning. In: Proceedings of International Conference on Learning Representations, Puerto Rico, 2016"},{"key":"4282_CR32","volume-title":"Proceedings of International Conference on Learning Representations, Austria","author":"D Yarats","year":"2021","unstructured":"Yarats D, Kostrikov I, Fergus R. Image augmentation is all you need: regularizing deep reinforcement learning from pixels. In: Proceedings of International Conference on Learning Representations, Austria, 2021"},{"key":"4282_CR33","doi-asserted-by":"publisher","first-page":"1612","DOI":"10.1109\/LRA.2022.3140817","volume":"7","author":"S James","year":"2022","unstructured":"James S, Davison A J. Q-attention: enabling efficient learning for vision-based robotic manipulation. IEEE Robot Autom Lett, 2022, 7: 1612\u20131619","journal-title":"IEEE Robot Autom Lett"},{"key":"4282_CR34","first-page":"13729","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, New Orleans","author":"S James","year":"2022","unstructured":"James S, Wada K, Laidlow T, et al. Coarse-to-fine Q-attention: efficient learning for visual robotic manipulation via discretisation. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, New Orleans, 2022. 13729\u201313738"},{"key":"4282_CR35","first-page":"785","volume-title":"Proceedings of the Conference on Robot Learning, Auckland","author":"M Shridhar","year":"2022","unstructured":"Shridhar M, Manuelli L, Fox D. Perceiver-actor: a multi-task transformer for robotic manipulation. In: Proceedings of the Conference on Robot Learning, Auckland, 2022. 785\u2013799"},{"key":"4282_CR36","volume-title":"Proceedings of Robotics: Science and Systems XIX, Daegu","author":"A Brohan","year":"2023","unstructured":"Brohan A, Brown N, Carbajal J, et al. RT-1: robotics transformer for real-world control at scale. In: Proceedings of Robotics: Science and Systems XIX, Daegu, 2023"},{"key":"4282_CR37","first-page":"2165","volume-title":"Proceedings of the Conference on Robot Learning, Atlanta","author":"B Zitkovich","year":"2023","unstructured":"Zitkovich B, Yu T, Xu S, et al. RT-2: vision-language-action models transfer web knowledge to robotic control. In: Proceedings of the Conference on Robot Learning, Atlanta, 2023. 2165\u20132183"},{"key":"4282_CR38","unstructured":"Reed S E, Zolna K, Parisotto E, et al. A generalist agent. 2022. ArXiv:2205.06175"},{"key":"4282_CR39","first-page":"1861","volume-title":"Proceedings of International Conference on Machine Learning, Stockholm","author":"T Haarnoja","year":"2018","unstructured":"Haarnoja T, Zhou A, Abbeel P, et al. Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: Proceedings of International Conference on Machine Learning, Stockholm, 2018. 1861\u20131870"},{"key":"4282_CR40","first-page":"3611","volume-title":"Proceedings of IEEE International Conference on Robotics and Automation, Paris","author":"X Chen","year":"2020","unstructured":"Chen X, Ye Z, Sun J, et al. Transferable active grasping and real embodied dataset. In: Proceedings of IEEE International Conference on Robotics and Automation, Paris, 2020. 3611\u20133618"},{"key":"4282_CR41","volume-title":"Proceedings of Robotics: Science and Systems XIX, Daegu","author":"J Lv","year":"2023","unstructured":"Lv J, Feng Y, Zhang C, et al. SAM-RL: sensing-aware model-based reinforcement learning via differentiable physics-based simulation and rendering. In: Proceedings of Robotics: Science and Systems XIX, Daegu, 2023"},{"key":"4282_CR42","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1016\/j.ins.2012.07.049","volume":"222","author":"Y Wu","year":"2013","unstructured":"Wu Y, Zhou Y, Saveriades G, et al. Local Shannon entropy measure with statistical tests for image randomness. Inf Sci, 2013, 222: 323\u2013342","journal-title":"Inf Sci"},{"key":"4282_CR43","volume-title":"Proceedings of the 4th International Conference on Learning Representations, San Juan","author":"T Schaul","year":"2016","unstructured":"Schaul T, Quan J, Antonoglou I, et al. Prioritized experience replay. In: Proceedings of the 4th International Conference on Learning Representations, San Juan, 2016"},{"key":"4282_CR44","unstructured":"Amodei D, Olah C, Steinhardt J, et al. Concrete problems in AI safety. 2016. ArXiv:1606.06565"},{"key":"4282_CR45","volume-title":"Proceedings of International Conference on Learning Representations, Addis Ababa","author":"S S Du","year":"2020","unstructured":"Du S S, Kakade S M, Wang R, et al. Is a good representation sufficient for sample efficient reinforcement learning? In: Proceedings of International Conference on Learning Representations, Addis Ababa, 2020"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-023-4282-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11432-023-4282-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-023-4282-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,7,20]],"date-time":"2026-07-20T19:33:50Z","timestamp":1784576030000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11432-023-4282-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,21]]},"references-count":45,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,6]]}},"alternative-id":["4282"],"URL":"https:\/\/doi.org\/10.1007\/s11432-023-4282-8","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"value":"1674-733X","type":"print"},{"value":"1869-1919","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3,21]]},"assertion":[{"value":"9 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 April 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 June 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 March 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"162202"}}