from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.common.exceptions import TimeoutException from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.webdriver.common.print_page_options import PrintOptions from selenium.webdriver.firefox.service import Service as FirefoxService from selenium.webdriver.firefox.firefox_profile import FirefoxProfile from webdriver_manager.firefox import GeckoDriverManager from pypdf import PdfWriter import configparser import os import sys import base64 import errno import glob import argparse GECKO_DRIVER = 'gecko' CHROME_DRIVER = 'chrome' DEFAULT_CONFIG_PATH = './config.ini' DEFAULT_OUTPUT_PATH = './' DEFAULT_WORKING_DIRECTORY = './tmp' DEFAULT_APP_URL = 'https://app.flightschedulepro.com' DEFAULT_TIMEOUT = 10 # seconds DEFAULT_MAX_SESSIONS = 500 parser = argparse.ArgumentParser( prog='FSP Progress Combiner', description='Retrieves progress reports for a given student and combines them into a single PDF', epilog='Kill urself') parser.add_argument('-c', '--config', default=DEFAULT_CONFIG_PATH) parser.add_argument('-o', '--output_dir', default=DEFAULT_OUTPUT_PATH) parser.add_argument('students', nargs='*', help='Name of the student(s) to retrieve sessions for. If a partial name is provided, the first student with a matching name will be retrieved (case insensitive)') # Useful for debugging parser.add_argument('-m', '--maximum_sessions', default=DEFAULT_MAX_SESSIONS, type=int, help='Limits the number of sessions retrieved per student') parser.add_argument('-l', '--list_students', help='Outputs names all students', action='store_true') parser.add_argument('-s', '--show_browser', help='Show browser window', action='store_true') args = parser.parse_args() config = configparser.ConfigParser() config.read(args.config) working_dir = config['main'].get('working_directory', DEFAULT_WORKING_DIRECTORY) app_url = config['main'].get('app_url', DEFAULT_APP_URL) timeout = config['main'].get('timeout', DEFAULT_TIMEOUT) tenant_id = config['main'].get('tenant_id') if tenant_id is None or tenant_id == "": print('No Tenant ID configured!') sys.exit(1) try: os.makedirs(working_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise files = glob.glob(os.path.join(working_dir, "*.pdf")) for f in files: os.remove(f) # HACK maybe Ubuntu specific if config['main'].getboolean('use_working_dir_for_tmp', False): os.environ["TMPDIR"] = working_dir driver_type = config['main']['driver'] # TODO only firefox for now if driver_type == GECKO_DRIVER: opt = webdriver.FirefoxOptions() if config['firefox'].get('binary_path') is not None: opt.binary_location = config['firefox'].get('binary_path') if not args.show_browser: opt.add_argument('-headless') # Force window size to avoid issues with responsive design opt.add_argument("--width=1920") opt.add_argument("--height=1080") # Disable print dialog. Causes race conditions with window switching firefox_profile = FirefoxProfile() firefox_profile.set_preference("print.enabled", False) opt.profile = firefox_profile driver = webdriver.Firefox(options=opt, service=FirefoxService(GeckoDriverManager().install())) else: print(f'unsupported driver type "{driver_type}"') sys.exit(1) # optionally prints a message and cleans up before exiting def die(message=None): if message is not None: print(message) driver.quit() sys.exit(0) login_url = app_url + '/Account/Login?company='+ tenant_id driver.get(login_url) # wait for stupid cookie modal to load and accept all ¯\_(ツ)_/¯ try: WebDriverWait(driver, timeout).until( EC.presence_of_all_elements_located((By.ID, 'onetrust-accept-btn-handler')) ) cookie_btn = driver.find_element(By.ID, 'onetrust-accept-btn-handler') cookie_btn.click() except: pass # Login form = driver.find_element(By.CLASS_NAME, 'account-form') username = driver.find_element(By.ID, 'username') username.send_keys(config['main']['username']) username.submit() try: WebDriverWait(driver, timeout).until( EC.presence_of_all_elements_located((By.ID, 'password')) ) except TimeoutException: if len(driver.find_elements(By.XPATH, '//div[@id="alerts"]')) > 0: die('Bad username') die('unknown authentication error') password = driver.find_element(By.ID, 'password') password.send_keys(config['main']['password']) password.submit() # Wait for app to load and naviagte to students "page" try: WebDriverWait(driver, timeout).until( EC.presence_of_all_elements_located((By.CLASS_NAME, 'fsp-main-drawer-content')) ) except TimeoutException: if len(driver.find_elements(By.XPATH, '//div[@id="alerts"]')) > 0: die('Bad password') die('unknown authentication error') def get_student_rows(): driver.get(app_url + '/App/Students') WebDriverWait(driver, timeout).until( EC.presence_of_all_elements_located((By.XPATH, '//tr[contains(@class, "clickable-course")]')) ) # Get all the student rows and iterate student_rows = driver.find_elements(By.XPATH, '//tr[contains(@class, "clickable-course")]') return student_rows # Just list the students if -l switch is passed in if args.list_students: student_rows = get_student_rows() for row in student_rows: student_name = row.find_element(By.XPATH, './td/div[@class="student"]/div[@class="bold"]').get_attribute("innerText") print(student_name) die() for student in args.students: student_rows = get_student_rows() # Iterate over the student rows to find the desired student target = None for student_row in student_rows: student_name = student_row.find_element(By.XPATH, './td/div[@class="student"]/div[@class="bold"]').get_attribute("innerText") if student_name.lower().startswith(student.lower()): print(f'Found matching student name "{student_name}"') target = student_row break if target is None: print(f'No student with a name matching "{student}" found') continue student_name_no_space = student_name.replace(' ', '_') course_td = target.find_element(By.CLASS_NAME, 'course-td') course_td.click() WebDriverWait(driver, timeout).until( EC.presence_of_all_elements_located((By.XPATH, '//span[text()[contains(., "Sessions")]]')) ) sessions_tab = driver.find_element(By.XPATH, '//span[text()[contains(., "Sessions")]]') sessions_tab.click() WebDriverWait(driver, timeout).until( EC.presence_of_all_elements_located((By.XPATH, '//fsp-ui-button[@Text="View"]')) ) # Ugh... Sessions Table reloads on modal opening after the first modal is opened. So we have to record # the count of sessions initially and iterate while reloading the elements each time a button is clicked view_btns = driver.find_elements(By.XPATH, '//fsp-ui-button[@Text="View"]') btn_count = len(view_btns) # Need to save reference to current window since we will be navigating to the printable document that pops up in another window original_window = driver.current_window_handle for i in range(0, btn_count): if i + 1 > args.maximum_sessions: print('Reached max sessions') break print(f'Downloading session {i+1} of {btn_count} for student "{student_name}"') # DOM probably reloaded so existing view_btn elements are stale view_btns = driver.find_elements(By.XPATH, '//fsp-ui-button[@Text="View"]') if len(view_btns) is not btn_count: die('session count changed. aborting...') view_btns[i].click() WebDriverWait(driver, timeout).until( EC.presence_of_all_elements_located((By.XPATH, '//fsp-ui-button[@Text="Print"]')) ) print_btn = driver.find_element(By.XPATH, '//fsp-ui-button[@Text="Print"]') print_btn.click() # Wait for the new window or tab WebDriverWait(driver, timeout).until(EC.number_of_windows_to_be(2)) # Loop through until we find a new window handle for window_handle in driver.window_handles: if window_handle != original_window: driver.switch_to.window(window_handle) break # Wait for the new tab to finish loading content WebDriverWait(driver, timeout).until(EC.title_is('Print Training Session')) # Get base64 representation of pdf of the page and write to file print_options = PrintOptions() print_options.orientation = "portrait" pdf = driver.print_page(print_options) with open(os.path.join(working_dir, student_name_no_space + "{:03d}".format(i) + '.pdf'), 'wb') as file: file.write(base64.decodebytes(pdf.encode('utf-8'))) # Close the current window driver.close() # Restore original window driver.switch_to.window(original_window) WebDriverWait(driver, timeout).until(EC.title_is('Flight Schedule Pro')) close_btn = driver.find_element(By.XPATH, '//fsp-ui-button[@Text="Close"]') close_btn.click() # Merge all Session PDFs for the student output_path = os.path.join(args.output_dir, student_name_no_space + '.pdf') print(f'Merging PDFs into "{output_path}"') writer = PdfWriter() # Get all the idividual sessions PDFs for this student pdfs = [a for a in os.listdir(working_dir) if a.startswith(student_name_no_space) and a.endswith('.pdf')] pdfs.sort() # Merge for pdf in pdfs: writer.append(os.path.join(working_dir, pdf)) writer.write(output_path) writer.close() # You should really just die()