diff --git a/Sprint-Challenge/Taxi RL.ipynb b/Sprint-Challenge/Taxi RL.ipynb new file mode 100644 index 0000000..fc7f2ab --- /dev/null +++ b/Sprint-Challenge/Taxi RL.ipynb @@ -0,0 +1,1429 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "import numpy as np\n", + "import random\n", + "from collections import defaultdict" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : :\u001b[43m \u001b[0m|\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + "\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : :\u001b[43m \u001b[0m|\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : :\u001b[43m \u001b[0m|\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : :\u001b[43m \u001b[0m|\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : :\u001b[43m \u001b[0m: |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "|\u001b[43m \u001b[0m: : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "|\u001b[43m \u001b[0m: : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| |\u001b[43m \u001b[0m: | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| |\u001b[43m \u001b[0m: | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | :\u001b[43m \u001b[0m| : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | :\u001b[43m \u001b[0m| : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[43mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[43mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "|\u001b[43m \u001b[0m: : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | :\u001b[43m \u001b[0m| : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| :\u001b[43m \u001b[0m|B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| :\u001b[43m \u001b[0m|B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| :\u001b[43m \u001b[0m|B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| :\u001b[43m \u001b[0m|B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| :\u001b[43m \u001b[0m|B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| :\u001b[43m \u001b[0m|B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | :\u001b[43m \u001b[0m| : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | :\u001b[43m \u001b[0m| : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : :\u001b[43m \u001b[0m: |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : :\u001b[43m \u001b[0m|\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | :\u001b[43m \u001b[0m|\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : :\u001b[43m \u001b[0m|\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : : : :\u001b[43m \u001b[0m|\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :\u001b[43mG\u001b[0m|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :\u001b[43mG\u001b[0m|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | :\u001b[43m \u001b[0m:G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: |\u001b[43m \u001b[0m: :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | :\u001b[43m \u001b[0m:G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: |\u001b[43m \u001b[0m: :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m:\u001b[43m \u001b[0m| : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m:\u001b[43m \u001b[0m| : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m:\u001b[43m \u001b[0m| : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[34;1m\u001b[43mR\u001b[0m\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[42mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[42mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[42mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[42mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[42mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[42mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[42mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|R: | : :G|\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|R: | : :G|\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|R: | : :G|\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|R: | : :G|\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :G|\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|R:\u001b[42m_\u001b[0m| : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[42mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :G|\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : :\u001b[42m_\u001b[0m: : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : :\u001b[42m_\u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : :\u001b[42m_\u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : :\u001b[42m_\u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : :\u001b[42m_\u001b[0m: |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : :\u001b[42m_\u001b[0m: |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : |\u001b[42m_\u001b[0m: |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : |\u001b[42m_\u001b[0m: |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : :\u001b[42m_\u001b[0m: |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : |\u001b[42m_\u001b[0m: |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | :\u001b[42m_\u001b[0m|\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | :\u001b[42m_\u001b[0m|\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : |\u001b[42m_\u001b[0m: |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | :\u001b[42m_\u001b[0m|\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : |\u001b[42m_\u001b[0m: |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | :\u001b[42m_\u001b[0m|\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : :\u001b[42m_\u001b[0m|\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : :\u001b[42m_\u001b[0m|\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : :\u001b[42m_\u001b[0m: |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : :\u001b[42m_\u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : :\u001b[42m_\u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|R: | : :G|\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : :\u001b[42m_\u001b[0m: : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R:\u001b[42m_\u001b[0m| : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R:\u001b[42m_\u001b[0m| : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R:\u001b[42m_\u001b[0m| : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R:\u001b[42m_\u001b[0m| : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|R:\u001b[42m_\u001b[0m| : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|\u001b[42mR\u001b[0m: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "|\u001b[42m_\u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[42mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[42mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[42mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[42mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[42mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[42mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[42mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : : : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[42mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (Dropoff)\n", + "Total reward: -472\n" + ] + } + ], + "source": [ + "# Random agent - not very good result\n", + "env = gym.make('Taxi-v2')\n", + "state = env.reset()\n", + "env.render()\n", + "\n", + "total_reward = 0\n", + "done = False\n", + "while not done:\n", + " state, reward, done, info = env.step(env.action_space.sample())\n", + " total_reward += reward\n", + " env.render()\n", + " \n", + "print('Total reward:', total_reward)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average score over time: -386.813\n" + ] + } + ], + "source": [ + "episodes = 1000\n", + "rewards = []\n", + "max_steps = 99\n", + "\n", + "for episode in range(episodes):\n", + " state = env.reset() # Assuming Random Agent code above\n", + " total_rewards = 0\n", + " \n", + " for step in range(max_steps):\n", + " action = env.action_space.sample() # TODO Add policy here!!! Goal 1 - Beat Random\n", + " state, reward, done, info = env.step(env.action_space.sample())\n", + " total_rewards += reward\n", + " if done:\n", + " break\n", + " rewards.append(total_rewards)\n", + " \n", + "print('Average score over time:', sum(rewards) / episodes)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "defaultdict(<function <lambda> at 0x7f2bf6803730>, {})\n" + ] + } + ], + "source": [ + "qtable_dict = defaultdict(lambda: np.zeros(6)) # array of actions\n", + "print(qtable_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "total_episodes = 100000 # Total episodes\n", + "learning_rate = 0.1 # Learning rate\n", + "max_steps = 99 # Max steps per episode\n", + "gamma = 0.01 # Discounting rate\n", + "\n", + "# Exploration parameters\n", + "epsilon = 1.0 # Exploration rate\n", + "max_epsilon = 1.0 # Exploration probability at start\n", + "min_epsilon = 0.01 # Minimum exploration probability \n", + "decay_rate = 0.01 # Exponential decay rate for exploration prob" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Score over time: -68.47119\n" + ] + } + ], + "source": [ + "# List of rewards\n", + "rewards = []\n", + "\n", + "# Learn through the episodes\n", + "for episode in range(total_episodes):\n", + " # Reset the environment\n", + " state = env.reset()\n", + " done = False\n", + " total_rewards = 0\n", + " \n", + " for step in range(max_steps):\n", + " \n", + " # Action selection - decide if we explore or exploit\n", + " if random.uniform(0, 1) < epsilon:\n", + " # Time to explore!\n", + " action = env.action_space.sample()\n", + " else:\n", + " # Exploit based on best available rewards\n", + " action = np.argmax(qtable_dict[state])\n", + " \n", + " # Take the action, observe the outcome and reward\n", + " new_state, reward, done, info = env.step(action)\n", + " \n", + " \n", + " # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n", + " # qtable[new_state,:] : all the actions we can take from new state\n", + " # port code from blackjack game if not using qtable_dict\n", + " \n", + " \n", + " # Update the qtable with new expected rewards\n", + " qtable_dict[state][action] += (learning_rate *\n", + " (reward + gamma *\n", + " (np.max(qtable_dict[new_state]) -\n", + " np.max(qtable_dict[state]))))\n", + " \n", + " \n", + " total_rewards += reward\n", + " state = new_state\n", + " if done:\n", + " break\n", + " \n", + " # Reduce epsilon (explore less)\n", + " epsilon = (min_epsilon +\n", + " (max_epsilon - min_epsilon) * np.exp(-decay_rate * episode))\n", + " rewards.append(total_rewards)\n", + "\n", + "print('Score over time:', sum(rewards) / total_episodes)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/cc1/progress.md b/cc1/progress.md new file mode 100644 index 0000000..5df406b --- /dev/null +++ b/cc1/progress.md @@ -0,0 +1,18 @@ +### Progress So Far: +hello-world - check +docker build - check +exercises - todo + +### Laptop specs: +OS - Win7 +RAM - 16 GB +CPU - i7-3720QM +GPU - AMD Radeon with 1 GB dedicated, 3 GB Shared - max 4 GB total + +### Plan: +Run Ubuntu on VirtualBox - check +Install Docker - check +Build Docker - check +Make Run in Hands On Folder - check +Complete Projects - todo +PROFIT :D diff --git a/cc2/reward.py b/cc2/reward.py index ace03cc..43e8b1d 100644 --- a/cc2/reward.py +++ b/cc2/reward.py @@ -1,2 +1,10 @@ def reward(R, gamma): - return -1.0 # TODO + # return -1.0 + k = 0 + total = 0 + coef = gamma**k + while coef > 0.000001: + total += coef * R + k += 1 + coef = gamma**k + return total diff --git a/cc2/test_reward.py b/cc2/test_reward.py index cadf782..245c963 100644 --- a/cc2/test_reward.py +++ b/cc2/test_reward.py @@ -3,7 +3,6 @@ class TestReward(unittest.TestCase): - def test_always_passing_test(self): self.assertEqual(3 + 1, 4) @@ -11,13 +10,13 @@ def test_reward_1(self): self.assertAlmostEqual(reward(-2.1, 0.32), -3.0882352, 3) def test_reward_2(self): - self.assertAlmostEqual(reward(-0.1, 0.55), -0.222222, 3) + self.assertAlmostEqual(reward(-0.1, 0.55), -0.222222, 3) def test_reward_3(self): - self.assertAlmostEqual(reward(3.2, 0.95), 64.0, 3) + self.assertAlmostEqual(reward(3.2, 0.95), 64.0, 3) def test_reward_4(self): - self.assertAlmostEqual(reward(3.0, 0.79), 14.285714, 3) + self.assertAlmostEqual(reward(3.0, 0.79), 14.285714, 3) if __name__ == '__main__':