{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T22:41:57Z","timestamp":1743115317098,"version":"3.40.3"},"publisher-location":"Boston, MA","reference-count":10,"publisher":"Springer US","isbn-type":[{"type":"print","value":"9780387307688"},{"type":"electronic","value":"9780387301648"}],"license":[{"start":{"date-parts":[[2011,1,1]],"date-time":"2011-01-01T00:00:00Z","timestamp":1293840000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/2.zoppoz.workers.dev:443\/https\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2011,1,1]],"date-time":"2011-01-01T00:00:00Z","timestamp":1293840000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/2.zoppoz.workers.dev:443\/https\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011]]},"DOI":"10.1007\/978-0-387-30164-8_468","type":"book-chapter","created":{"date-parts":[[2010,12,29]],"date-time":"2010-12-29T17:28:18Z","timestamp":1293643698000},"page":"595-600","source":"Crossref","is-referenced-by-count":0,"title":["Least-Squares Reinforcement Learning Methods"],"prefix":"10.1007","author":[{"given":"Michail G.","family":"Lagoudakis","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"468_CR1_468","first-page":"49","volume-title":"Proceedings of the Sixteenth International Conference on Machine Learning","author":"J. A Boyan","year":"1999","unstructured":"Boyan, J. A. (1999). Least-squares temporal difference learning. Proceedings of the Sixteenth International Conference on Machine Learning, Bled, Slovenia, pp. 49\u201356."},{"key":"468_CR2_468","first-page":"33","volume":"22","author":"SJ Bradtke","year":"1996","unstructured":"Bradtke, S. J., & Barto, A. G. (1996). Linear least-squares algorithms for temporal difference learning. Machine Learning, 22, 33\u201357.","journal-title":"Machine Learning"},{"key":"468_CR3_468","first-page":"503","volume":"6","author":"D Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., & Wehenkel, L. (2005). Tree-based batch mode reinforcement learning. Journal of Machine Learning Research, 6, 503\u2013556.","journal-title":"Journal of Machine Learning Research"},{"issue":"2\u20133","key":"468_CR4_468","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1007\/s10994-009-5128-4","volume":"76","author":"J Johns","year":"2009","unstructured":"Johns, J., Petrik, M., & Mahadevan, S. (2009). Hybrid least-squares algorithms for approximate policy evaluation. Machine Learning, 76(2\u20133), 243\u2013256.","journal-title":"Machine Learning"},{"key":"468_CR5_468","first-page":"326","volume-title":"Proceedings of the Sixteenth Conference on Uncertainty in Artificial Intelligence","author":"D Koller","year":"2000","unstructured":"Koller, D., & Parr, R. (2000). Policy iteration for factored MDPs. Proceedings of the Sixteenth Conference on Uncertainty in Artificial Intelligence, Stanford, CA, USA, pp. 326\u2013334."},{"key":"468_CR6_468","first-page":"1107","volume":"4","author":"MG Lagoudakis","year":"2003","unstructured":"Lagoudakis, M. G., Parr, R. (2003). Least-squares policy iteration. Journal of Machine Learning Research, 4, 1107\u20131149.","journal-title":"Journal of Machine Learning Research"},{"issue":"1\u20132","key":"468_CR7_468","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1023\/A:1022192903948","volume":"13","author":"A Nedi\u0107","year":"2003","unstructured":"Nedi\u0107, A., & Bertsekas, D. P. (2003). Least-squares policy evaluation algorithms with linear function approximation. Discrete Event Dynamic Systems: Theory and Applications, 13(1\u20132), 79\u2013110.","journal-title":"Discrete Event Dynamic Systems: Theory and Applications"},{"key":"468_CR8_468","doi-asserted-by":"crossref","unstructured":"Parr, R., Li, L., Taylor, G., Painter-Wakefield, C., & Littman, M. L. (2008). An analysis of linear models, linear value-function approximation, and feature selection for reinforcement learning, Proceedings of the twenty-fifth international conference on machine learning, Helsinki, Finland, pp.\u00a0752\u2013759.","DOI":"10.1145\/1390156.1390251"},{"issue":"6","key":"468_CR9_468","doi-asserted-by":"crossref","first-page":"568","DOI":"10.1016\/0022-247X(85)90317-8","volume":"110","author":"PJ Schweitzer","year":"1985","unstructured":"Schweitzer, P. J., & Seidmann, A. (1985). Generalized polynomial approximations in Markovian decision processes. Journal of Mathematical Analysis and Applications, 110(6), 568\u2013582.","journal-title":"Journal of Mathematical Analysis and Applications"},{"key":"468_CR10_468","doi-asserted-by":"crossref","first-page":"259","DOI":"10.1613\/jair.946","volume":"16","author":"X Xu","year":"2002","unstructured":"Xu, X., He, H. G., & Hu, D. (2002). Efficient reinforcement learning using recursive least-squares methods. Journal of Artificial Intelligence Research, 16, 259\u2013292.","journal-title":"Journal of Artificial Intelligence Research"}],"container-title":["Encyclopedia of Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/2.zoppoz.workers.dev:443\/https\/link.springer.com\/content\/pdf\/10.1007\/978-0-387-30164-8_468","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,22]],"date-time":"2023-12-22T02:45:20Z","timestamp":1703213120000},"score":1,"resource":{"primary":{"URL":"https:\/\/2.zoppoz.workers.dev:443\/https\/link.springer.com\/10.1007\/978-0-387-30164-8_468"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011]]},"ISBN":["9780387307688","9780387301648"],"references-count":10,"URL":"https:\/\/2.zoppoz.workers.dev:443\/https\/doi.org\/10.1007\/978-0-387-30164-8_468","relation":{},"subject":[],"published":{"date-parts":[[2011]]}}}