graphicsCrawlerDisplay.py


# graphicsCrawlerDisplay.py
# -------------------------
# Licensing Information: Please do not distribute or publish solutions to this
# project. You are free to use and extend these projects for educational
# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
# John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# For more info, see http://inst.eecs.berkeley.edu/~cs188/sp09/pacman.html

import Tkinter
import qlearningAgents
import time
import threading
import sys
import crawler
#import pendulum
import math
from math import pi as PI

robotType = 'crawler'

class Application:

    def sigmoid(self, x):
        return 1.0 / (1.0 + 2.0 ** (-x))

    def incrementSpeed(self, inc):
        self.tickTime *= inc        
#        self.epsilon = min(1.0, self.epsilon)
#        self.epsilon = max(0.0,self.epsilon)    
#        self.learner.setSpeed(self.epsilon)
        self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime)

    def incrementEpsilon(self, inc):
        self.ep += inc     
        self.epsilon = self.sigmoid(self.ep)   
        self.learner.setEpsilon(self.epsilon)
        self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon)
                           
    def incrementGamma(self, inc):
        self.ga += inc     
        self.gamma = self.sigmoid(self.ga)   
        self.learner.setDiscount(self.gamma)
        self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma)
                
    def incrementAlpha(self, inc):
        self.al += inc     
        self.alpha = self.sigmoid(self.al)   
        self.learner.setLearningRate(self.alpha)
        self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha)
        
    def __initGUI(self, win):
        ## Window ##
        self.win = win
        
        ## Initialize Frame ##    
        win.grid()
        self.dec = -.5
        self.inc = .5
        self.tickTime = 0.1

        ## Epsilon Button + Label ##
        self.setupSpeedButtonAndLabel(win)
        
        self.setupEpsilonButtonAndLabel(win)
        
        ## Gamma Button + Label ##
        self.setUpGammaButtonAndLabel(win)
        
        ## Alpha Button + Label ##
        self.setupAlphaButtonAndLabel(win)
        
        ## Exit Button ##
        #self.exit_button = Tkinter.Button(win,text='Quit', command=self.exit)
        #self.exit_button.grid(row=0, column=9)
        
        ## Simulation Buttons ##
#        self.setupSimulationButtons(win)
        
         ## Canvas ##
        self.canvas = Tkinter.Canvas(root, height=200, width=1000)
        self.canvas.grid(row=2,columnspan=10)

    def setupAlphaButtonAndLabel(self, win):
        self.alpha_minus = Tkinter.Button(win, 
        text="-",command=(lambda: self.incrementAlpha(self.dec)))                
        self.alpha_minus.grid(row=1, column=3, padx=10)
        
        self.alpha = self.sigmoid(self.al)
        self.alpha_label = Tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha))
        self.alpha_label.grid(row=1, column=4)
        
        self.alpha_plus = Tkinter.Button(win, 
        text="+",command=(lambda: self.incrementAlpha(self.inc)))        
        self.alpha_plus.grid(row=1, column=5, padx=10)

    def setUpGammaButtonAndLabel(self, win):
        self.gamma_minus = Tkinter.Button(win, 
        text="-",command=(lambda: self.incrementGamma(self.dec)))                
        self.gamma_minus.grid(row=1, column=0, padx=10)
        
        self.gamma = self.sigmoid(self.ga)   
        self.gamma_label = Tkinter.Label(win, text='Discount: %.3f' % (self.gamma))
        self.gamma_label.grid(row=1, column=1)
        
        self.gamma_plus = Tkinter.Button(win, 
        text="+",command=(lambda: self.incrementGamma(self.inc)))        
        self.gamma_plus.grid(row=1, column=2, padx=10)

    def setupEpsilonButtonAndLabel(self, win):
        self.epsilon_minus = Tkinter.Button(win, 
        text="-",command=(lambda: self.incrementEpsilon(self.dec)))                
        self.epsilon_minus.grid(row=0, column=3)
        
        self.epsilon = self.sigmoid(self.ep)   
        self.epsilon_label = Tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon))
        self.epsilon_label.grid(row=0, column=4)
        
        self.epsilon_plus = Tkinter.Button(win, 
        text="+",command=(lambda: self.incrementEpsilon(self.inc)))        
        self.epsilon_plus.grid(row=0, column=5)

    def setupSpeedButtonAndLabel(self, win):
        self.speed_minus = Tkinter.Button(win, 
        text="-",command=(lambda: self.incrementSpeed(.5)))                
        self.speed_minus.grid(row=0, column=0)
        
        self.speed_label = Tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime))
        self.speed_label.grid(row=0, column=1)
        
        self.speed_plus = Tkinter.Button(win, 
        text="+",command=(lambda: self.incrementSpeed(2)))        
        self.speed_plus.grid(row=0, column=2)

        
                           
                
        
        
                                               
    def skip5kSteps(self):
        self.stepsToSkip = 5000

    def __init__(self, win):
    
        self.ep = 0
        self.ga = 2
        self.al = 2
        self.stepCount = 0
        ## Init Gui        
            
        self.__initGUI(win)

        # Init environment
        if robotType == 'crawler':
            self.robot = crawler.CrawlingRobot(self.canvas)                                        
            self.robotEnvironment = crawler.CrawlingRobotEnvironment(self.robot)            
        elif robotType == 'pendulum':
            self.robot = pendulum.PendulumRobot(self.canvas)
            self.robotEnvironment = \
                pendulum.PendulumRobotEnvironment(self.robot)
        else:
            raise "Unknown RobotType"
  
        # Init Agent      
        simulationFn = lambda agent: \
          simulation.SimulationEnvironment(self.robotEnvironment,agent)        
        actionFn = lambda state: \
          self.robotEnvironment.getPossibleActions(state)
        self.learner = qlearningAgents.QLearningAgent(actionFn=actionFn)
        
        self.learner.setEpsilon(self.epsilon)
        self.learner.setLearningRate(self.alpha)
        self.learner.setDiscount(self.gamma)
        
        # Start GUI
        self.running = True
        self.stopped = False
        self.stepsToSkip = 0
        self.thread = threading.Thread(target=self.run)
        self.thread.start()


    def exit(self):
      self.running = False
      for i in range(5):
        if not self.stopped:
#          print "Waiting for thread to die..."
          time.sleep(0.1)
      self.win.destroy()      
      sys.exit(0)
      
    def step(self):
        
        self.stepCount += 1
        
        state = self.robotEnvironment.getCurrentState()
        actions = self.robotEnvironment.getPossibleActions(state)
        if len(actions) == 0.0:
            self.robotEnvironment.reset()
            state = self.robotEnvironment.getCurrentState()
            actions = self.robotEnvironment.getPossibleActions(state)        
            print 'Reset!'
        action = self.learner.getAction(state)
        if action == None:
            raise 'None action returned: Code Not Complete'
        nextState, reward = self.robotEnvironment.doAction(action)
        self.learner.observeTransition(state, action, nextState, reward)
        
    def animatePolicy(self):
        if robotType != 'pendulum':
            raise 'Only pendulum can animatePolicy'
        

        totWidth = self.canvas.winfo_reqwidth()
        totHeight = self.canvas.winfo_reqheight()
        
        length = 0.48 * min(totWidth, totHeight)
        x,y = totWidth-length-30, length+10
    
        
    
        angleMin, angleMax = self.robot.getMinAndMaxAngle()
        velMin, velMax = self.robot.getMinAndMaxAngleVelocity()
        
        if not 'animatePolicyBox' in dir(self):
            self.canvas.create_line(x,y,x+length,y)
            self.canvas.create_line(x+length,y,x+length,y-length)
            self.canvas.create_line(x+length,y-length,x,y-length)
            self.canvas.create_line(x,y-length,x,y)
            self.animatePolicyBox = 1
            self.canvas.create_text(x+length/2,y+10,text='angle')
            self.canvas.create_text(x-30,y-length/2,text='velocity')
            self.canvas.create_text(x-60,y-length/4,text='Blue = kickLeft')
            self.canvas.create_text(x-60,y-length/4+20,text='Red = kickRight')
            self.canvas.create_text(x-60,y-length/4+40,text='White = doNothing')
            
            
        
        angleDelta = (angleMax-angleMin) / 100
        velDelta = (velMax-velMin) / 100
        for i in range(100):
            angle = angleMin + i * angleDelta
 
            for j in range(100):
                vel = velMin + j * velDelta
                state = self.robotEnvironment.getState(angle,vel)
                max, argMax = None, None
                if not self.learner.seenState(state):
                    argMax = 'unseen'
                else:
                     for action in ('kickLeft','kickRight','doNothing'):
                         qVal = self.learner.getQValue(state, action)
                         if max == None or qVal > max:
                             max, argMax = qVal, action
                if argMax != 'unseen':
                    if argMax == 'kickLeft':
                        color = 'blue'
                    elif argMax == 'kickRight':
                        color = 'red'
                    elif argMax == 'doNothing':
                        color = 'white'
                    dx = length / 100.0
                    dy = length / 100.0
                    x0, y0 = x+i*dx, y-j*dy
                    self.canvas.create_rectangle(x0,y0,x0+dx,y0+dy,fill=color)
                   
                    
                        
        
    def run(self):
        self.stepCount = 0
        self.learner.startEpisode()
        while True:
          minSleep = .01
          tm = max(minSleep, self.tickTime)
          time.sleep(tm)
          self.stepsToSkip = int(tm / self.tickTime) - 1

          if not self.running:
            self.stopped = True
            return
          for i in range(self.stepsToSkip):
              self.step()  
          self.stepsToSkip = 0       
          self.step()
#          self.robot.draw()
        self.learner.stopEpisode()                                                      
    
    def start(self):
        self.win.mainloop()





def run():
  global root
  root = Tkinter.Tk()
  root.title( 'Crawler GUI' )
  root.resizable( 0, 0 )

#  root.mainloop()


  app = Application(root)
  def update_gui():
    app.robot.draw(app.stepCount, app.tickTime)
    root.after(10, update_gui)
  update_gui()

  root.protocol( 'WM_DELETE_WINDOW', app.exit)
  app.start()