Created using Colab
This commit is contained in:
@@ -4,7 +4,6 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"colab": {
|
"colab": {
|
||||||
"provenance": [],
|
"provenance": [],
|
||||||
"authorship_tag": "ABX9TyOlD6kmCxX3SKKuh3oJikKA",
|
|
||||||
"include_colab_link": true
|
"include_colab_link": true
|
||||||
},
|
},
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
@@ -406,6 +405,10 @@
|
|||||||
" state_values_new[state] = 3.0\n",
|
" state_values_new[state] = 3.0\n",
|
||||||
" break\n",
|
" break\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" # TODO -- Write this function (from equation 19.11, but bear in mind policy is deterministic here)\n",
|
||||||
|
" # Replace this line\n",
|
||||||
|
" state_values_new[state] = 0\n",
|
||||||
|
"\n",
|
||||||
" return state_values_new\n",
|
" return state_values_new\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Greedily choose the action that maximizes the value for each state.\n",
|
"# Greedily choose the action that maximizes the value for each state.\n",
|
||||||
|
|||||||
Reference in New Issue
Block a user