iwannamassage.py v0.4
# Fetch and parse an HTML page into a DOM-like tree of elements. # Uses my new favorite XML parser, ElementTree by the effbot. # Rather than use ElementTree's interface to the Tidy command # (which runs Tidy in a subshell), I'm using M.-A. Lemburg's version # that turns Tidy into a Python extention. from __future__ import generators import sys, os, types, tempfile from urllib2 import urlopen, URLError from mx.Tidy import Tidy from mx import DateTime # I'm using ElementTree 1.2a, which supports limited XPath from elementtree import ElementTree, HTMLTreeBuilder from fileadaptor import FileAdaptor from toolbox import enumerate class TidyError(Exception): pass class TidyFilter: def __init__(self, src): self.src = src # mx.tidy.Tidy must have a true file object for input and output. # If the data source is not a true file object, use FileAdaptor to # turn it into one. if not isinstance(self.src, types.FileType): self.inFileObj = FileAdaptor(self.src).file() else: self.inFileObj = self.src # Set up a temp file for output. self.outFileObj = os.tmpfile() # Use mx.Tidy.tidy to convert our input HTML into clean # output XHTML. nerrors, nwarnings, outputdata, errordata = Tidy.tidy(self.inFileObj, self.outFileObj, output_xhtml=1) # Raise an exception if Tidy got an error if nerrors: raise TidyError # Make sure the temp output file is ready for input self.outFileObj.flush() self.outFileObj.seek(0,0) return def read(self, size=None): return self.outFileObj.read(size) class ApptSched: """Initialize an instance of this class with the object returned by ElementTree. It will extract the schedule into a searchable form. It provides methods for getting the next available appointment slot.""" def __init__(self, treeObj):
# Create a dictionary to map a month name to a month number. # When I can switch to Python 2.3, this can be done using dict # initialization. monthNames = [ "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec" ] self.monthNameToNumMap = {} for monthIndex, monthName in enumerate(monthNames): self.monthNameToNumMap[monthName] = monthIndex + 1 # First, do the very data-specific stuff needed to find the # appointment schedule table element. This was determined empirically. # ElementTree 1.2a supports a limited form of XPath, which is a way of # addressing a particular element in an XML file. Here, we are getting # the element that contains the body of the table that contains the # appointment schedule, which just happens to be the last 'tbody' # element on the web page. elem = treeObj.findall(".//tbody")[-1] # Get a list of all the rows in the table. rows = elem.getchildren() # Some of the columns in the table contain row labels, which need to # be skipped. skipCols = (0, 4, 8) # Row 0 holds the date labels, and some blanks. self.dates = [ dateElem[0].text for index, dateElem in enumerate(rows[0]) if index not in skipCols ] #print "self.dates is", self.dates # Column 0 contains the time labels. self.times = [ row[0].text for row in rows[1:] ] #print "self.times is", self.times # Now pull out the appointment slots availability info. # Each slot element has a 'class' attribute, that can be one of # "taken", "avail" or "na" (meaning 'Not Available'). # Here, we generate a two-dimensional array (actually, a list of # lists) to represent the appointment schedule slots. Each element in # the array contains the value of the 'class' attribute. self.slots = [ [ col.get("class") for colIndex, col in enumerate(row.getchildren()) if colIndex not in skipCols ] for row in rows[1:] ] return def slotToDateTime(self, timeIndex, dateIndex): """Given the indexes into a two-dimension array that identifies a slot, this method converts the date and time for that slot into a form that can be used to instantiate an mx.DateTime object, and returns that object.""" timeRaw = self.times[timeIndex] dateRaw = self.dates[dateIndex] dateSplit = dateRaw.split() # The appointment schedule dates have no year info. For now, assume # 2003. year = 2003 month = self.monthNameToNumMap[dateSplit[1][:3].lower()] day = int(dateSplit[2]) timeSplit1 = timeRaw.split() timeSplit2 = timeSplit1[0].split(":") hour = int(timeSplit2[0]) if timeSplit1[1].lower() == "pm": hour += 12 minute = int(timeSplit2[1]) return DateTime.DateTime(year, month, day, hour, minute) def getNextAvail(self): """This method is a generator which will return the next available appointment slot each time it is called, as an mx.DateTime object.""" for timeIndex, timeSlots in enumerate(self.slots): for dateIndex, dateSlot in enumerate(timeSlots): if dateSlot.startswith("avail"): yield self.slotToDateTime(timeIndex, dateIndex) return class MassageAppt: """This class will extract the appointment schedule table from the web page, and provides methods for getting the next available appointment slot, and making an appointment for that slot.""" def __init__(self, url): # Initialize an empty list of tuples of DateTime pairs used to exclude # date/time ranges when finding available appointment slots. self.excludes = [] # Get an ElementTree HTML parser. htmlParserObj = HTMLTreeBuilder.TreeBuilder()
# Open the URL, getting a file-like URL object. try: urlObj = urlopen(url) except URLError: print "Error opening URL '%s'" % url raise # Tell ElementTree to parse our input XHTML source, using an # HTML parser. This will give us a top-level ElementTree object. treeObj = ElementTree.parse(TidyFilter(urlObj), parser=htmlParserObj) # Use an instance of the ApptSched class to extract the appointment # schedule data from the tree of elements. self.apptSched = ApptSched(treeObj) return def findSlot(self): """This method will find the next available appointment slot which does not fall within excluded times. These excluded times are specified by a list of tuples of start and end times, given as mx.DateTime objects.""" foundSlot = None # For each available appointment slot... for slotDT in self.apptSched.getNextAvail(): # For each date/time range to exclude... for startDT, endDT in self.excludes: # If the available slot falls withing an excluded date/time # range, skip it. if DateTime.cmp(startDT, slotDT) <= 0 and DateTime.cmp(slotDT, endDT) <= 0: continue # Else this is the first available slot not excluded. else: foundSlot = slotDT break return foundSlot def addExcludeRange(self, startDT, endDT): """Add a tuple of DateTime objects that specify a date/time range to exclude when finding an available appointment slot.""" self.excludes.append((startDT, endDT)) return if __name__ == "__main__": app = MassageAppt(sys.argv[1]) # For testing purposes, add a couple of excludes. app.addExcludeRange(DateTime.DateTime(2003, 6, 24, 13, 0), DateTime.DateTime(2003, 6, 24, 14, 0)) print app.findSlot() sys.exit(0)
|
© Copyright
2003
Michael Kent.
Last update:
7/1/2003; 2:36:16 PM.
This theme is based on the SoundWaves
(blue) Manila theme. |
|