#! /usr/bin/env ruby

#Copyright (c) 2010 Ben Weissmann
#
#Permission is hereby granted, free of charge, to any person obtaining a copy
#of this software and associated documentation files (the "Software"), to deal
#in the Software without restriction, including without limitation the rights
#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#copies of the Software, and to permit persons to whom the Software is
#furnished to do so, subject to the following conditions:
#
#The above copyright notice and this permission notice shall be included in
#all copies or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
#THE SOFTWARE.

# This script grabs the cpw schedule from the website, parses it,
# and outputs an iCalendar file to stdout.
# 
# Please note that the "htmlentities" gem and the "icalendar" gem are required
# to run this script.

require 'htmlentities'
require 'icalendar'
require 'uri'
require 'net/http'
require 'date'

# Turns a time like "Fri 2:40 PM" into a DateTime
def parse_time(str)
  day, time, ampm = str.split ' '
  ampm.downcase!

  day_num = case day
  when 'Thu'
    8
  when 'Fri'
    9
  when 'Sat'
    10
  when 'Sun'
    11
  when 'Mon'
    12
  end

  hour, min = time.split(':').collect {|n| n.to_i}

  if hour == 12
    if ampm == 'am'
      hour = 0
    end
  elsif ampm == 'pm'
    hour += 12
  end
  return DateTime.civil(2010, 4, day_num, hour, min)
end

class String
  # removes html entities and backslashes
  def clean
    coder = HTMLEntities.new
    return coder.decode(self.gsub('\\', ''))
  end
end

cal = Icalendar::Calendar.new

cal.timezone do
  timezone_id             "America/New York"

  daylight do
    timezone_offset_from  "-0500"
    timezone_offset_to    "-0400"
    timezone_name         "EDT"
    dtstart               "19700308TO20000"
    add_recurrence_rule   "FREQ=YEARLY;BYMONTH=3;BYDAY=2SU"
  end

  standard do
    timezone_offset_from  "-0400"
    timezone_offset_to    "-0500"
    timezone_name         "EST"
    dtstart               "19701101T020000"
    add_recurrence_rule   "FREQ=YEARLY;BYMONTH=11;BYDAY=1SU"
  end
end
url = 'http://mitcpw.mit.edu/index.php?data=QlpoNDFBWSZTWZuXz3IAANAfgHBjf/ok5/0gvu/%2BvjABSEaGiJT2qfppNqn6FGmZTyT1GnoQ2mp%2BpPTTSeoNExDQp6jUeoAHpADIAAAaSZRo2miAxNMEMgMTQaaNMG6kwQ3jnf1Qj8WOH2LiQtQwlmqgiiIZY64gAWy%2BmOnZIOIJlTkj2Gk1yzEVEjDTUNXTncBReFEN/V84oUxErbIm5gpi5hCCDpzUaYPR5rhYPet8KCs8AiCNgkCxaNmiYqohoMYuQS9XiLvIEcWCHAKTNCV1FFeSPA5ET1ipwYzEwxkhpIEiU3gQ3zABY8tsnUEAK7vS7dYmSirg2WV1WN7rtr7%2BXKHe93k%2BwMf4rA3FIDKskC3Rn0GHeDCq9eWNdDLQ0rEtMKgCNG0KOFtp5J95wIwkmONCxH2KAc6WBIkHZBBb2JEQg0koUTF109EjCdwaStaThv%2BABcYgB2VnSSjFtcOCWLKqUyAgREDmyLNl3GvMYmhJOh8/ifNk3%2BLuSKcKEhNy%2Be5A'

html = open(url).read
# Yay regular expressions!
events = html.scan(/<tr class="o">\s*?<td>(.*?)<\/td>\s*?<td>(.*?)<\/td>\s*?<td>(.*?)<\/td>\s*?<td>(.*?)<\/td>\s*?<td>(.*?)<\/td>\s*?<td>(.*?)<\/td>\s*?<\/tr>/i)
events.each do |event|
  event.collect! {|s| s.strip}

  start_time = parse_time(event[0])
  end_time = parse_time(event[1])

  name = "#{event[3]} (#{event[2]})"
  location = event[4]
  desc =  event[5]

  event = cal.event
  event.summary = name.clean
  event.location = location.clean
  event.start = start_time
  event.end = end_time
  event.description = desc.clean
end

# output to stdout
puts cal.to_ical
