diff --git a/timetabling/scrape-timetable.py b/timetabling/scrape-timetable.py index 2ef3560012fb85f27e5a3fce52e9f081d949f0c7..bc9d4eed214de016bdbf3fda690f75391d0021d1 100644 --- a/timetabling/scrape-timetable.py +++ b/timetabling/scrape-timetable.py @@ -95,6 +95,19 @@ class Module: mod_code : str tt_mod_id : str +@dataclass(frozen=True) +class Fingerprint: + mod_code : str + location : str + dt_start : datetime + dt_end : datetime + +@dataclass +class FingerCal: + """Calendar with set of finger prints of events contained""" + cal : Calendar + fingerprints : Set[Fingerprint] + def update_form_data(response, form_data): soup = BeautifulSoup(response.text, "lxml") @@ -192,7 +205,7 @@ def get_day_index(day : str) -> int: def parse_timetable( mod_code : str, timetable : str, - cals : Dict[int, Calendar] + cals : Dict[int, FingerCal] ): soup = BeautifulSoup(timetable, "lxml") for ttable in soup.body.findChildren("table", recursive=False): @@ -266,11 +279,19 @@ def parse_timetable( "byday": CAL_DAYS[day_idx] }) + fingerprint = Fingerprint(mod_codes, location, start_time, end_time) + for year in get_years(module_list): # HACK: do this more efficiently and will # better duplicate detection - if event not in cals[year].walk(): - cals[year].add_component(event) + if fingerprint not in cals[year].fingerprints: + cals[year].cal.add_component(event) + cals[year].fingerprints.add(fingerprint) + + if "BEDFORD" in location.upper(): + if fingerprint not in cals[BEDFORD].fingerprints: + cals[BEDFORD].cal.add_component(event) + cals[BEDFORD].fingerprints.add(fingerprint) if not is_row_label(cell): cell_pos += colspan @@ -328,7 +349,7 @@ def get_module_timetable( init_form_data : Dict[str, str], department : str, module : Module, - cals : Dict[int, Calendar] + cals : Dict[int, FingerCal] ): """Get the timetable for the module with the given id The id should come from the timetabling system, not the module code. @@ -359,8 +380,8 @@ def get_module_timetable( parse_timetable(module.mod_code, timetable, cals) def get_timetables(password : str): - cals = { year: Calendar() for year in range(0, 6) } - cals.update({ BEDFORD : Calendar() }) + cals = { year: FingerCal(Calendar(), set()) for year in range(0, 6) } + cals.update({ BEDFORD : FingerCal(Calendar(), set()) }) form_data = dict() initialise_form_data(form_data) @@ -383,14 +404,14 @@ def get_timetables(password : str): return cals -def save_cals(cals : Dict[int, Calendar]): +def save_cals(cals : Dict[int, FingerCal]): for year in cals: if year != BEDFORD: with open(f"Year{year}.ics", 'wb') as icsfile: - icsfile.write(cals[year].to_ical()) + icsfile.write(cals[year].cal.to_ical()) with open(f"Bedford.ics", 'wb') as icsfile: - icsfile.write(cals[BEDFORD].to_ical()) + icsfile.write(cals[BEDFORD].cal.to_ical()) password = getpass(f"Password for {USERNAME}: ") cals = get_timetables(password)