一、游戏环境:
玩家从左上角绿色位置跑到右下角黄色位置,每次只能选择上下左右走一步,深绿色是安全区,紫色的位置是陷阱,掉下去就没命。玩家是不知道陷阱在哪的,他需要不断的从失败中学习,获得知识,最后轻盈的走到黄色奖品区。
环境代码:
import numpy as np
from matplotlib import pyplot as plt
import random
from itertools import count
class Env:
def __init__(self):
self.action_space = []
self.agent = None
self.env = np.zeros((4,12))
self.env[-1,1:-1] = -1
self.env[-1,-1] = 1
self.env_show = self.env.copy()
def step(self,a=[0,0],ai=False):
s_ = np.array(self.agent.s) + np.array(a)
if 0<=s_[0]<=3 and 0<=s_[1]<=11:
self.agent.s = s_
r = self.env[s_[0],s_[1]]
else:
s_ = self.agent.s
r = -1
self.agent.s_chain = np.vstack([self.agent.s_chain,s_])
self.agent.sar_chain = np.vstack([self.agent.sar_chain,np.hstack([s_,a,r])])
return s_,r
def play(self):
env.reset()
for t in count():
a = agent.chose_action()
s,r = env.step(a)
if r in [-1,1]:
break
env.render()
return t,r
def render(self):
for i,j in self.agent.s_chain:
self.env_show[i,j] = 0.5
plt.imshow(self.env_show)
def reset(self):
self.agent.reset()
self.env_show = self.env.copy()
def register(self,agent):
self.agent = agent
环境中必备step函数,输入每次动作返回新位置与奖励值,这里生存奖励0,角色失败返回-1.
二、随机到处乱跑的人工智能
class Agent:
def __init__(self):
self.action_space = np.array([[0,1],[-1,0],[0,-1],[1,0]])
self.s = np.array([0,0])
self.s_chain = np.expand_dims(self.s,0)
self.sar_chain = np.expand_dims(np.hstack([np.array([0,0]),[0,0],0]),0)
def chose_action(self,ai=False):
a = random.choice(self.action_space)
return a
def learn(self):
...
def reset(self):
self.action_space = np.array([[0,1],[-1,0],[0,-1],[1,0]])
self.s = np.array([0,0])
self.s_chain = np.expand_dims(self.s,0)
self.sar_chain = np.expand_dims(np.hstack([np.array([0,0]),[0,0],0]),0)
env = Env()
agent = Agent()
env.register(agent)
三、开始游戏
给环境添加 play_until_success 函数:
def play_until_success(self):
for t in count():
_,r = self.play()
if t%20000 == 0:
print(f"playing {t} times!")
if r == 1:
print(f"{t} times success!")
self.render()
break
run一下:
playing 0 times!
playing 20000 times!
playing 40000 times!
42777 times success!
经过 4.2 万 轮瞎猫碰死老鼠的进程,笨笨还是成功抵达终点!
笨笨和乱撞的无头苍蝇唯一的区别是:他会记录自己的每一步,成功之后可以复盘查看路径。