Content deleted Content added
m Open access bot: doi updated in citation with #oabot. |
|||
Line 73:
<ref name="francoislavet2018">{{Cite journal|last1=Francois-Lavet|first1=Vincent|last2=Henderson|first2=Peter|last3=Islam|first3=Riashat|last4=Bellemare|first4=Marc G.|last5=Pineau|first5=Joelle|date=2018|title=An Introduction to Deep Reinforcement Learning|journal=Foundations and Trends in Machine Learning|volume=11|issue=3–4|pages=219–354|arxiv=1811.12560|bibcode=2018arXiv181112560F|doi=10.1561/2200000071|issn=1935-8237|s2cid=54434537}}</ref>
<ref name="Hassabis">{{cite speech |last1=Demis |first1=Hassabis | date=March 11, 2016 |title= Artificial Intelligence and the Future. |url= https://www.youtube.com/watch?v=8Z2eLTSCuBk}}</ref>
<ref name="TD-Gammon">{{cite journal | url=http://www.bkgm.com/articles/tesauro/tdl.html | title=Temporal Difference Learning and TD-Gammon | date=March 1995 | last=Tesauro | first=Gerald | journal=Communications of the ACM | volume=38 | issue=3 | doi=10.1145/203330.203343 | pages=58–68 | s2cid=8763243 | access-date=2017-03-10 | archive-url=https://web.archive.org/web/20100209103427/http://www.bkgm.com/articles/tesauro/tdl.html | archive-date=2010-02-09 | url-status=dead | doi-access=free }}</ref>
<ref name="sutton1996">{{cite book |last1=Sutton |first1=Richard |last2=Barto |first2=Andrew |date=September 1996 |title=Reinforcement Learning: An Introduction |publisher=Athena Scientific}}</ref>
<ref name="tsitsiklis1996">{{cite book |last1=Bertsekas |first2=Dimitri |last2=Tsitsiklis |first1=John |date=September 1996 |title=Neuro-Dynamic Programming |url=http://athenasc.com/ndpbook.html |publisher=Athena Scientific |isbn=1-886529-10-8}}</ref>
|