diff --git a/timetabling/README.md b/timetabling/README.md index 8041e03160fcfadc5f00f196b9ed7078bd04c034..605c1ab610fdb0ca7fbf3b959d029a6271d6571d 100644 --- a/timetabling/README.md +++ b/timetabling/README.md @@ -3,7 +3,7 @@ * `timetable-ics.py` -- convert the Timetabling team spreadsheets into ICS format. This is a rough and ready script and will probably need tweaks to your particular spreadsheet. * `get-practical-groups.py` -- find out which lab groups your students belong to on the web timetable. -* `scrape-timetable.py` -- pull ICS files from the timetabling system for departments/modules. Needs some config. +* `scrape-timetable.py` -- pull ICS files from the timetabling system for departments/modules/rooms. Needs some config. Use "rooms" as a command line argument to get rooms, else gets modules. To view the generated .ics files, you might like [ttcal][ttcal] (or just import into your favouring calendar app/website). diff --git a/timetabling/scrape-timetable.py b/timetabling/scrape-timetable.py index bc9d4eed214de016bdbf3fda690f75391d0021d1..ae15fcf15925f420a331ece53004418a45d8a8e5 100644 --- a/timetabling/scrape-timetable.py +++ b/timetabling/scrape-timetable.py @@ -1,5 +1,7 @@ # Scrapes timetable from web timetable system # +# Run with "rooms" argument to get timetable for rooms on campus +# # Configure: # # * ACADEMIC_YEAR -- e.g. 202425 @@ -12,8 +14,10 @@ import re import requests import pytz +import sys from bs4 import BeautifulSoup, Tag +from collections import defaultdict from datetime import datetime, timedelta from dataclasses import dataclass from getpass import getpass @@ -32,19 +36,39 @@ DEPARTMENTS = [ "Mathematics and Information Security Group" ] +ROOM_SETS = [ + "Central Booking", + "Computer Science" +] + # Match modules to get tt for # Match module format e.g CS1811 | Object Oriented Programming I MODULES_RE = re.compile("CS.*|IY.*|PC.*|DC.*") -ACTIVITIES = set([ - "Practical", - "Optional_Attendance", +ACTIVITIES = { + "Assessment", "Lecture", "Lecture_-_Online", - "Workshop", + "Optional_Attendance", + "Optional_Attendance_-_Online", + "Practical", "Practical_-_Online", + "Workshop", "Workshop_-_Online" -]) +} + +# td class of timetable cell that is not an activity +ALL_ACTIVITIES = ACTIVITIES | { + "Tutorial", + "Booking", + "Booking_Accepted", + "Booking_Requested", + "Drop_In", + "Employability", + "Rescheduled", + "Seminar", + "Tutorial", +} # Time delta from midnight TT_BASETIME = timedelta(hours=8) @@ -95,6 +119,11 @@ class Module: mod_code : str tt_mod_id : str +@dataclass(frozen=True) +class Room: + room_name: str + tt_name_id : str + @dataclass(frozen=True) class Fingerprint: mod_code : str @@ -160,6 +189,9 @@ def make_request( return response.text def get_weeks(weeks : str) -> Generator[int, None, None]: + if len(weeks.strip()) == 0: + return + for period in weeks.split(","): bounds = period.strip().split("-") if len(bounds) == 1: @@ -189,6 +221,13 @@ def get_activity(cell : Tag) -> Optional[str]: return next(iter(acts)) return None +def is_activity(cell : Tag) -> bool: + """Any kind of activity not just those in ACTIVITIES""" + if "class" in cell.attrs: + return len(set(cell["class"]) & ALL_ACTIVITIES) > 0 + else: + return False + def is_location(cell : str) -> bool: return LOCATION_RE.match(cell) is not None @@ -202,7 +241,7 @@ def get_day_index(day : str) -> int: return idx return -1 -def parse_timetable( +def parse_module_timetable( mod_code : str, timetable : str, cals : Dict[int, FingerCal] @@ -279,7 +318,9 @@ def parse_timetable( "byday": CAL_DAYS[day_idx] }) - fingerprint = Fingerprint(mod_codes, location, start_time, end_time) + fingerprint = Fingerprint( + mod_codes, location, start_time, end_time + ) for year in get_years(module_list): # HACK: do this more efficiently and will @@ -299,32 +340,32 @@ def parse_timetable( def get_years(mod_codes : List[str]) -> Set[int]: return set(int(name[2]) for name in mod_codes) -def get_department_page( +def get_set_page( cookies : requests.cookies.RequestsCookieJar, - department : str, + link_type : str, + data_set_name : str, form_data : Dict[str, str] ) -> BeautifulSoup: - """Returns a soup of the department page + """Returns a soup of the locations page Updates form data for the next request""" - form_data.update({ "__EVENTTARGET": "LinkBtn_module" }) + form_data.update({ "__EVENTTARGET": f"LinkBtn_{link_type}" }) module_page = make_request(form_data, cookies) soup = BeautifulSoup(module_page, "lxml") - dept_id : str = soup.find(id="dlFilter2") \ - .find("option", string=department) \ + data_set_id : str = soup.find(id="dlFilter2") \ + .find("option", string=data_set_name) \ .get("value") # select the department from the departments list (loads module list) form_data.update({ "__EVENTTARGET": "dlFilter2", - "tLinkType": "module", - "dlFilter2": dept_id + "tLinkType": link_type, + "dlFilter2": data_set_id }) - dept_page = make_request(form_data, cookies) - - return BeautifulSoup(dept_page, "lxml") + set_page = make_request(form_data, cookies) + return BeautifulSoup(set_page, "lxml") def get_department_modules( cookies : requests.cookies.RequestsCookieJar, @@ -333,7 +374,7 @@ def get_department_modules( ) -> Set[Module]: """Gets list of module IDs for department""" form_data = dict(init_form_data) - soup = get_department_page(cookies, department, form_data) + soup = get_set_page(cookies, "module", department, form_data) return set( Module( # parse e.g. CS1811 | Object Oriented Programming I @@ -355,7 +396,7 @@ def get_module_timetable( The id should come from the timetabling system, not the module code. See get_department_modules""" form_data = dict(init_form_data) - soup = get_department_page(cookies, department, form_data) + soup = get_set_page(cookies, "module", department, form_data) weeks = None for option in soup.find(id="lbWeeks").find_all("option"): @@ -377,9 +418,9 @@ def get_module_timetable( timetable = make_request(form_data, cookies) - parse_timetable(module.mod_code, timetable, cals) + parse_module_timetable(module.mod_code, timetable, cals) -def get_timetables(password : str): +def get_timetables_years(password : str): cals = { year: FingerCal(Calendar(), set()) for year in range(0, 6) } cals.update({ BEDFORD : FingerCal(Calendar(), set()) }) @@ -404,7 +445,7 @@ def get_timetables(password : str): return cals -def save_cals(cals : Dict[int, FingerCal]): +def save_cals_years(cals : Dict[int, FingerCal]): for year in cals: if year != BEDFORD: with open(f"Year{year}.ics", 'wb') as icsfile: @@ -413,8 +454,165 @@ def save_cals(cals : Dict[int, FingerCal]): with open(f"Bedford.ics", 'wb') as icsfile: icsfile.write(cals[BEDFORD].cal.to_ical()) +def parse_room_timetable( + room_name : str, + timetable : str, + cals : Dict[str, Calendar] +): + soup = BeautifulSoup(timetable, "lxml") + + capacity = int(soup.body.find("span", { "class": "header-1-0-1" }).text) + + room_title = f"CAP {capacity:03} {room_name}" + + for ttable in soup.body.findChildren("table", recursive=False): + if "class" not in ttable.attrs: + continue + table_type = ttable["class"] + if "grid-border-args" in table_type: + tt_rows = ttable.findChildren("tr", recursive=False) + weeks = "" + day_idx = 0 # Monday + for (row_idx, tt_row) in enumerate(tt_rows): + if row_idx == 1: + weeks = tt_row.find("td", { "class": "row-label-two" }).text + + day_label = tt_row.find("td", { "class": "row-label-one" }) + if day_label: + day_idx = get_day_index(day_label.text) + + cell_pos = 0 + for cell in tt_row.findChildren("td", recursive=False): + colspan = int(cell.attrs.get("colspan", "1")) + + if is_activity(cell): + activity_time = TT_BASETIME \ + + cell_pos * timedelta(minutes=30) + length = colspan * timedelta(minutes=30) + description = re.sub(r"\n+", "\n", cell.text) + tt_weeks = list(get_weeks(weeks)) + + start_time = get_time( + tt_weeks[0], day_idx, activity_time + ) + end_time = start_time + length + + year_weeks = list(map( + lambda w: get_time( + w, day_idx, activity_time + ).isocalendar()[1], + tt_weeks + )) + + event = Event() + summary = next( + ( + line + for line in description.split("\n") + if len(line) > 0 + ), + "Unknown Event" + ) + event.add("summary", summary) + event.add("description", description) + event.add("location", f"{room_name} ({capacity})") + event.add("dtstart", start_time) + event.add("dtend", end_time) + event.add("rrule", { + "freq": "YEARLY", + "count": len(tt_weeks), + "byweekno": year_weeks, + "byday": CAL_DAYS[day_idx] + }) + + cals[room_title].add_component(event) + + if not is_row_label(cell): + cell_pos += colspan + +def get_rooms( + cookies : requests.cookies.RequestsCookieJar, + init_form_data : Dict[str, str], + room_set : str, +) -> Set[Room]: + """Gets list of room IDs in set""" + form_data = dict(init_form_data) + soup = get_set_page(cookies, "location", room_set, form_data) + return set( + Room(option.text, option.get("value")) + for option in soup.find(id="dlObject").find_all("option") + ) + +def get_room_timetable( + cookies : requests.cookies.RequestsCookieJar, + init_form_data : Dict[str, str], + room_set : str, + room : Room, + cals : Dict[str, Calendar] +): + form_data = dict(init_form_data) + soup = get_set_page(cookies, "location", room_set, form_data) + + weeks = None + for option in soup.find(id="lbWeeks").find_all("option"): + val_weeks = option.get("value") + val_name = option.text + if "all" in val_name.lower(): + weeks = val_weeks + break + + if weeks is not None: + form_data.update({ + "dlObject": room.tt_name_id, + "lbWeeks": weeks, + "lbDays": "1-5", + "dlPeriod": "1-28", + "RadioType": "individual;swsurl;swsurl", + "bGetTimetable": "View Timetable", + }) + + timetable = make_request(form_data, cookies) + + parse_room_timetable(room.room_name, timetable, cals) + +def get_timetables_rooms(password : str): + cals = defaultdict(Calendar) + + form_data = dict() + initialise_form_data(form_data) + cookies = login(form_data, password) + + # need to refresh form data with a blank request + make_request(form_data, cookies) + + initial_form_data = dict(form_data) + + for room_set in ROOM_SETS: + rooms = get_rooms( + cookies, initial_form_data, room_set + ) + for room in rooms: + print("Getting", room.room_name) + get_room_timetable( + cookies, initial_form_data, room_set, room, cals + ) + + return cals + +def save_cals_rooms(cals : Dict[str, Calendar]): + for room in cals: + filename = f"room-{room.replace(" ", "-")}.ics" + with open(filename, 'wb') as icsfile: + icsfile.write(cals[room].to_ical()) + +rooms_mode = "rooms" in sys.argv + password = getpass(f"Password for {USERNAME}: ") -cals = get_timetables(password) -save_cals(cals) +if rooms_mode: + cals = get_timetables_rooms(password) + save_cals_rooms(cals) +else: + cals = get_timetables_years(password) + save_cals_years(cals)