fix: correct terminal state
This commit is contained in:
@@ -437,7 +437,7 @@
|
|||||||
" new_state = np.random.choice(a=np.arange(0,transition_probabilities_given_action.shape[0]),p = transition_probabilities_given_action[:,state,action])\n",
|
" new_state = np.random.choice(a=np.arange(0,transition_probabilities_given_action.shape[0]),p = transition_probabilities_given_action[:,state,action])\n",
|
||||||
" # Return the reward\n",
|
" # Return the reward\n",
|
||||||
" reward = reward_structure[new_state]\n",
|
" reward = reward_structure[new_state]\n",
|
||||||
" is_terminal = new_state in [terminal_states]\n",
|
" is_terminal = new_state in terminal_states\n",
|
||||||
"\n",
|
"\n",
|
||||||
" return new_state, reward, action, is_terminal"
|
" return new_state, reward, action, is_terminal"
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user