auto-loading completely done

This commit is contained in:
Sriram Hariharan
2023-03-05 14:34:26 -06:00
parent 2b952d0591
commit 0956525e94
13 changed files with 248 additions and 53 deletions

View File

@@ -1,4 +1,4 @@
import { Course, Instructor, Status, InstructionMode, CourseRow } from 'src/shared/types/Course';
import { Course, Instructor, Status, InstructionMode, ScrapedRow } from 'src/shared/types/Course';
import { CourseSchedule, CourseMeeting } from 'src/shared/types/CourseSchedule';
import { SiteSupport } from 'src/views/lib/getSiteSupport';
@@ -27,7 +27,7 @@ enum DetailsSelector {
}
/**
* A class that allows use to scrape information from UT's course catalog to create our internal representation of a course
* A class that allows us to scrape information from UT's course catalog to create our internal representation of a course
*/
export class CourseCatalogScraper {
support: SiteSupport;
@@ -41,8 +41,8 @@ export class CourseCatalogScraper {
* @param rows the rows of the course catalog table
* @returns an array of course row objects (which contain courses corresponding to the htmltable row)
*/
public scrape(rows: NodeListOf<HTMLTableRowElement>): CourseRow[] {
const courses: CourseRow[] = [];
public scrape(rows: NodeListOf<HTMLTableRowElement> | HTMLTableRowElement[]): ScrapedRow[] {
const courses: ScrapedRow[] = [];
let fullName = this.getFullName();
@@ -94,7 +94,7 @@ export class CourseCatalogScraper {
},
});
courses.push({
rowElement: row,
element: row,
course: newCourse,
});
});

View File

@@ -0,0 +1,11 @@
const TABLE_ROW_SELECTOR = 'table tbody tr';
/**
* Returns an array of all the rows in the course table on the passed in document
* @param doc the document to get the course table rows from
* @returns an array of all the rows in the course table on the passed in document
*/
export default function getCourseTableRows(doc: Document): HTMLTableRowElement[] {
const courseRows = doc.querySelectorAll<HTMLTableRowElement>(TABLE_ROW_SELECTOR);
return Array.from(courseRows);
}

View File

@@ -0,0 +1,78 @@
import getCourseTableRows from './getCourseTableRows';
const NEXT_PAGE_BUTTON_SELECTOR = '#next_nav_link';
const PREV_PAGE_BUTTON_SELECTOR = '#prev_nav_link';
/**
* Represents all the states that we care about when autoloading the next page of courses
*/
export enum AutoLoadStatus {
LOADING = 'LOADING',
IDLE = 'IDLE',
ERROR = 'ERROR',
}
let isLoading = false;
let nextPageURL = getNextButton(document)?.href;
/**
* This will scrape the pagination buttons from the course list and use them to load the next page
* and then return the table rows from the next page
* @returns a tuple of the current LoadStatus (whether are currently loading the next page, or if we have reached the end of the course catalog,
* or if there was an error loading the next page) and an array of the table rows from the next page (or an empty array
* if we have reached the end of the course catalog
*/
export async function loadNextCourseCatalogPage(): Promise<[AutoLoadStatus, HTMLTableRowElement[]]> {
// if there is no more nextPageURL, then we have reached the end of the course catalog, so we can stop
if (!nextPageURL) {
return [AutoLoadStatus.IDLE, []];
}
// remove the next button so that we don't load the same page twice
removePaginationButtons(document);
if (isLoading) {
// if we are already loading the next page, then we don't need to do anything
return [AutoLoadStatus.LOADING, []];
}
// begin loading the next page
isLoading = true;
try {
const response = await fetch(nextPageURL);
const html = await response.text();
const parser = new DOMParser();
const newDocument = parser.parseFromString(html, 'text/html');
// extract the table rows from the document of the next page
const tableRows = getCourseTableRows(newDocument);
if (!tableRows) {
return [AutoLoadStatus.ERROR, []];
}
// extract the next page url from the document of the next page, so when we scroll again we can use that
nextPageURL = getNextButton(newDocument)?.href;
isLoading = false;
return [AutoLoadStatus.IDLE, Array.from(tableRows)];
} catch (e) {
console.error(e);
return [AutoLoadStatus.ERROR, []];
}
}
/**
* Scrapes the next button from the document
* @param doc the document to get the next button from
* @returns the next button from the document
*/
function getNextButton(doc: Document) {
return doc.querySelector<HTMLAnchorElement>(NEXT_PAGE_BUTTON_SELECTOR);
}
/**
* Removes the next and previous buttons from the document so that we don't load the same page twice
* @param doc the document to remove the next and previous buttons from
*/
export function removePaginationButtons(doc: Document) {
const nextButton = doc.querySelectorAll<HTMLAnchorElement>(NEXT_PAGE_BUTTON_SELECTOR);
nextButton.forEach(button => button.remove());
const prevButton = doc.querySelectorAll<HTMLAnchorElement>(PREV_PAGE_BUTTON_SELECTOR);
prevButton.forEach(button => button.remove());
}