{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,12]],"date-time":"2024-09-12T19:17:36Z","timestamp":1726168656505},"publisher-location":"Berlin, Heidelberg","reference-count":19,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642051807"},{"type":"electronic","value":"9783642051814"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010]]},"DOI":"10.1007\/978-3-642-05181-4_13","type":"book-chapter","created":{"date-parts":[[2009,12,30]],"date-time":"2009-12-30T03:17:27Z","timestamp":1262143047000},"page":"293-309","source":"Crossref","is-referenced-by-count":11,"title":["Motor Learning at Intermediate Reynolds Number: Experiments with Policy Gradient on the Flapping Flight of a Rigid Wing"],"prefix":"10.1007","author":[{"given":"John W.","family":"Roberts","sequence":"first","affiliation":[]},{"given":"Lionel","family":"Moret","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Russ","family":"Tedrake","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"13_CR1","doi-asserted-by":"publisher","first-page":"11163","DOI":"10.1073\/pnas.0505064102","volume":"102","author":"S. Alben","year":"2005","unstructured":"Alben, S., Shelley, M.: Coherent locomotion as an attracting state for a free flapping body. Proceedings of the National Academy of Science\u00a0102, 11163\u201311166 (2005)","journal-title":"Proceedings of the National Academy of Science"},{"key":"13_CR2","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1162\/089976698300017746","volume":"10","author":"S. Amari","year":"1998","unstructured":"Amari, S.: Natural gradient works efficiently in learning. Neural Computation\u00a010, 251\u2013276 (1998)","journal-title":"Neural Computation"},{"key":"13_CR3","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1016\/S0954-1810(01)00028-0","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter, J., Bartlett, P.: Infinite-horizon policy-gradient estimation. Journal of Artificial Intelligence Research\u00a015, 319\u2013350 (2001)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"13_CR4","doi-asserted-by":"crossref","unstructured":"Bennis, A., Leeser, M., Tadmor, G., Tedrake, R.: Implementation of a highly parameterized digital PIV system on reconfigurable hardware. In: Proceedings of the Twelfth Annual Workshop on High Performance Embedded Computing (HPEC), Lexington, MA (2008)","DOI":"10.1109\/ASAP.2009.20"},{"key":"13_CR5","doi-asserted-by":"publisher","first-page":"1082","DOI":"10.1126\/science.1107799","volume":"307","author":"S.H. Collins","year":"2005","unstructured":"Collins, S.H., Ruina, A., Tedrake, R., Wisse, M.: Efficient bipedal robots based on passive-dynamic walkers. Science\u00a0307, 1082\u20131085 (2005)","journal-title":"Science"},{"key":"13_CR6","first-page":"1471","volume":"5","author":"E. Greensmith","year":"2004","unstructured":"Greensmith, E., Bartlett, P.L., Baxter, J.: Variance reduction techniques for gradient estimates in reinforcement learning. Journal of Machine Learning Research\u00a05, 1471\u20131530 (2004)","journal-title":"Journal of Machine Learning Research"},{"key":"13_CR7","series-title":"SCI","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1007\/978-3-642-05181-4_12","volume-title":"From Motor Learning to Interaction Learning in Robots","author":"M. Howard","year":"2010","unstructured":"Howard, M., Klanke, S., Gienger, M., Goerick, C., Vijayakumar, S.: Methods for learning control policies from variable-constraint demonstrations. In: Sigaud, O., Peters, J. (eds.) From Motor Learning to Interaction Learning in Robots. SCI, vol.\u00a0264, pp. 253\u2013291. Springer, Heidelberg (2010)"},{"key":"13_CR8","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1109\/72.105429","volume":"3","author":"M. Jabri","year":"1992","unstructured":"Jabri, M., Flower, B.: Weight perturbation: An optimal architecture and learning technique for analog VLSI feedforward and recurrent multilayer networks. IEEE Trans. Neural Netw.\u00a03, 154\u2013157 (1992)","journal-title":"IEEE Trans. Neural Netw."},{"key":"13_CR9","doi-asserted-by":"crossref","unstructured":"Kaelbling, L.P., Littman, M.L., Cassandra, A.R.: Planning and acting in partially observable stochastic domains. Artificial Intelligence, 101 (1998)","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"13_CR10","series-title":"SCI","doi-asserted-by":"crossref","first-page":"209","DOI":"10.1007\/978-3-642-05181-4_10","volume-title":"From Motor Learning to Interaction Learning in Robots","author":"J. Kober","year":"2010","unstructured":"Kober, J., Mohler, B., Peters, J.: Imitation and reinforcement learning for motor primitives with perceptual coupling. In: Sigaud, O., Peters, J. (eds.) From Motor Learning to Interaction Learning in Robots. SCI, vol.\u00a0264, pp. 209\u2013225. Springer, Heidelberg (2010)"},{"key":"13_CR11","unstructured":"Meuleau, N., Peshkin, L., Kaelbling, L.P., Kim, K.-E.: Off-policy policy search. In: NIPS (2000)"},{"key":"13_CR12","unstructured":"Peters, J., Vijayakumar, S., Schaal, S.: Policy gradient methods for robot control (Technical Report CS-03-787). University of Southern California (2003)"},{"key":"13_CR13","unstructured":"Roberts, J.W., Tedrake, R.: Signal-to-noise ratio analysis of policy gradient algorithms. In: Advances of Neural Information Processing Systems (NIPS), vol.\u00a021, p. 8 (2009)"},{"key":"13_CR14","unstructured":"Shelley, M.: Personal Communication (2007)"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Tedrake, R., Zhang, T.W., Seung, H.S.: Stochastic policy gradient reinforcement learning on a simple 3D biped. In: Proceedings of the IEEE International Conference on Intelligent Robots and Systems (IROS), Sendai, Japan, pp. 2849\u20132854 (2004)","DOI":"10.1109\/IROS.2004.1389841"},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"Vandenberghe, N., Childress, S., Zhang, J.: On unidirectional flight of a free flapping wing. Physics of Fluids, 18 (2006)","DOI":"10.1063\/1.2148989"},{"key":"13_CR17","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1017\/S0022112004008468","volume":"506","author":"N. Vandenberghe","year":"2004","unstructured":"Vandenberghe, N., Zhang, J., Childress, S.: Symmetry breaking leads to forward flapping flight. Journal of Fluid Mechanics\u00a0506, 147\u2013155 (2004)","journal-title":"Journal of Fluid Mechanics"},{"key":"13_CR18","doi-asserted-by":"crossref","unstructured":"Williams, J.L., Fisher III, J.W., Willsky, A.S.: Importance sampling actor-critic algorithms. In: Proceedings of the 2006 American Control Conference (2006)","DOI":"10.1109\/ACC.2006.1656451"},{"key":"13_CR19","first-page":"229","volume":"8","author":"R. Williams","year":"1992","unstructured":"Williams, R.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine Learning\u00a08, 229\u2013256 (1992)","journal-title":"Machine Learning"}],"container-title":["Studies in Computational Intelligence","From Motor Learning to Interaction Learning in Robots"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-05181-4_13.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,23]],"date-time":"2020-11-23T21:49:00Z","timestamp":1606168140000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-05181-4_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010]]},"ISBN":["9783642051807","9783642051814"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-05181-4_13","relation":{},"ISSN":["1860-949X","1860-9503"],"issn-type":[{"type":"print","value":"1860-949X"},{"type":"electronic","value":"1860-9503"}],"subject":[],"published":{"date-parts":[[2010]]}}}