CourseScraper completely done
'
This commit is contained in:
@@ -18,12 +18,6 @@ export type Instructor = {
|
||||
*/
|
||||
export type InstructionMode = 'Online' | 'In Person' | 'Hybrid';
|
||||
|
||||
export type Links = {
|
||||
syllabi?: string;
|
||||
textbook?: string;
|
||||
eCIS?: string;
|
||||
};
|
||||
|
||||
export enum Status {
|
||||
OPEN = 'OPEN',
|
||||
CLOSED = 'CLOSED',
|
||||
@@ -34,16 +28,15 @@ export enum Status {
|
||||
export class Course {
|
||||
uniqueId: number;
|
||||
number: string;
|
||||
name: string;
|
||||
fullName: string;
|
||||
courseName: string;
|
||||
department: string;
|
||||
status: Status;
|
||||
instructors: Instructor[];
|
||||
isReserved: boolean;
|
||||
description: string[];
|
||||
description?: string[];
|
||||
schedule: CourseSchedule;
|
||||
currentStatus: string;
|
||||
url: string;
|
||||
links: Links;
|
||||
registerURL?: string;
|
||||
flags: string[];
|
||||
instructionMode: InstructionMode;
|
||||
|
||||
@@ -1,13 +1,23 @@
|
||||
import { Serialized } from 'chrome-extension-toolkit';
|
||||
|
||||
type Day = 'M' | 'T' | 'W' | 'TH' | 'F' | 'S' | 'SU';
|
||||
const dayMap = {
|
||||
M: 'Monday',
|
||||
T: 'Tuesday',
|
||||
W: 'Wednesday',
|
||||
TH: 'Thursday',
|
||||
F: 'Friday',
|
||||
S: 'Saturday',
|
||||
SU: 'Sunday',
|
||||
} as const;
|
||||
|
||||
type Day = typeof dayMap[keyof typeof dayMap];
|
||||
|
||||
type Room = {
|
||||
building: string;
|
||||
number: string;
|
||||
};
|
||||
|
||||
type CourseSection = {
|
||||
export type CourseSection = {
|
||||
day: Day;
|
||||
startTime: number;
|
||||
endTime: number;
|
||||
@@ -21,9 +31,49 @@ export class CourseSchedule {
|
||||
Object.assign(this, courseSchedule);
|
||||
}
|
||||
|
||||
static parse(days: Day[] , times, hours): CourseSchedule {}
|
||||
static parse(dayLine: string, timeLine: string, roomLine: string): CourseSection[] {
|
||||
try {
|
||||
let days: Day[] = dayLine
|
||||
.split('')
|
||||
.map((char, i) => {
|
||||
const nextChar = dayLine.charAt(i + 1);
|
||||
let day = char;
|
||||
if (char === 'T' && nextChar === 'H') {
|
||||
day += nextChar;
|
||||
}
|
||||
if (char === 'S' && nextChar === 'U') {
|
||||
day += nextChar;
|
||||
}
|
||||
return dayMap[day];
|
||||
})
|
||||
.filter(Boolean) as Day[];
|
||||
|
||||
toString(): string {
|
||||
return '';
|
||||
const [startTime, endTime] = timeLine
|
||||
.replaceAll('.', '')
|
||||
.split('-')
|
||||
.map(time => {
|
||||
const [hour, rest] = time.split(':');
|
||||
const [minute, ampm] = rest.split(' ');
|
||||
|
||||
if (ampm === 'pm') {
|
||||
return Number(hour) * 60 + Number(minute) + 12 * 60;
|
||||
}
|
||||
return Number(hour) * 60 + Number(minute);
|
||||
});
|
||||
|
||||
const [building, number] = roomLine.split(' ');
|
||||
|
||||
return days.map(day => ({
|
||||
day,
|
||||
startTime,
|
||||
endTime,
|
||||
room: {
|
||||
building,
|
||||
number,
|
||||
},
|
||||
}));
|
||||
} catch (e) {
|
||||
throw new Error(`Failed to parse schedule: ${dayLine} ${timeLine} ${roomLine}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import React, { useEffect, useMemo, useState } from 'react';
|
||||
import React, { useEffect, useState } from 'react';
|
||||
import ReactDOM from 'react-dom';
|
||||
import { Course, CourseRow, CourseScraper } from 'src/shared/types/Course';
|
||||
import { CourseCatalogDetailsScraper } from 'src/shared/types/CourseCatalogDetailsScraper';
|
||||
import { CourseCatalogRowScraper } from 'src/shared/types/CourseCatalogRowScraper';
|
||||
import { Course, CourseRow } from 'src/shared/types/Course';
|
||||
import useInfiniteScroll from '../hooks/useInfiniteScroll';
|
||||
import { CourseScraper } from '../lib/courseCatalog/CourseScraper';
|
||||
import { populateSearchInputs } from '../lib/courseCatalog/populateSearchInputs';
|
||||
import { SiteSupport } from '../lib/getSiteSupport';
|
||||
import TableHead from './injected/TableHead';
|
||||
@@ -22,7 +21,7 @@ export default function CourseCatalogMain({ support }: Props) {
|
||||
|
||||
const isScrolling = useInfiniteScroll(async () => {
|
||||
console.log('infinite scroll');
|
||||
return false;
|
||||
return true;
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
@@ -30,7 +29,9 @@ export default function CourseCatalogMain({ support }: Props) {
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
const rows = scrapeCourseRows(support);
|
||||
const scraper = new CourseScraper(support);
|
||||
const rows = scraper.scrape(document.querySelectorAll<HTMLTableRowElement>('table tbody tr'));
|
||||
console.log('useEffect -> rows:', rows);
|
||||
setRows(rows);
|
||||
}, []);
|
||||
|
||||
@@ -42,44 +43,14 @@ export default function CourseCatalogMain({ support }: Props) {
|
||||
<div>
|
||||
<TableHead>Plus</TableHead>
|
||||
{rows.map(row => (
|
||||
<TableRow element={row.rowElement} support={support} onClick={handleRowButtonClick} />
|
||||
<TableRow
|
||||
element={row.rowElement}
|
||||
course={row.course}
|
||||
support={support}
|
||||
onClick={handleRowButtonClick}
|
||||
/>
|
||||
))}
|
||||
{isScrolling && <div>Scrolling...</div>}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function scrapeCourseRows(support: SiteSupport): CourseRow[] {
|
||||
const rows: CourseRow[] = [];
|
||||
|
||||
let name: string | null = null;
|
||||
if (support === SiteSupport.COURSE_CATALOG_DETAILS) {
|
||||
const header = document.querySelector('#details h2');
|
||||
if (!header?.textContent) {
|
||||
throw new Error('Could not find course name on course details page.');
|
||||
}
|
||||
name = header.textContent.trim();
|
||||
}
|
||||
|
||||
document.querySelectorAll<HTMLTableRowElement>('table tbody tr').forEach(row => {
|
||||
// rows that have a course header are the start of a new section, so save the section name and skip
|
||||
const header = row.querySelector('td.course_header');
|
||||
if (header?.textContent) {
|
||||
name = header.textContent.trim();
|
||||
return;
|
||||
}
|
||||
if (!name) {
|
||||
throw new Error('Could not find any course sections.');
|
||||
}
|
||||
|
||||
const course = scrapeCourseFromRow(name, support, row);
|
||||
});
|
||||
return rows;
|
||||
}
|
||||
|
||||
function scrapeCourseFromRow(name: string, support: SiteSupport, row: HTMLTableRowElement): Course {
|
||||
let url = support === SiteSupport.COURSE_CATALOG_DETAILS ? window.location.href : null;
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
import React, { useEffect, useState } from 'react';
|
||||
import ReactDOM from 'react-dom';
|
||||
import { Course } from 'src/shared/types/Course';
|
||||
import { Course, CourseRow } from 'src/shared/types/Course';
|
||||
import { SiteSupport } from 'src/views/lib/getSiteSupport';
|
||||
import { Button } from '../common/Button/Button';
|
||||
|
||||
interface Props {
|
||||
support: SiteSupport;
|
||||
course: Course;
|
||||
element: HTMLTableRowElement;
|
||||
onClick: (course: Course) => void;
|
||||
onClick: (...args: any[]) => any;
|
||||
}
|
||||
|
||||
/**
|
||||
* This component is injected into each row of the course catalog table.
|
||||
* @returns a react portal to the new td in the column or null if the column has not been created yet.
|
||||
*/
|
||||
export default function TableRow({ support, element, onClick }: Props): JSX.Element | null {
|
||||
export default function TableRow({ support, course, element, onClick }: Props): JSX.Element | null {
|
||||
const [container, setContainer] = useState<HTMLTableCellElement | null>(null);
|
||||
const [course, setCourse] = useState<Course | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
const portalContainer = document.createElement('td');
|
||||
@@ -25,17 +25,9 @@ export default function TableRow({ support, element, onClick }: Props): JSX.Elem
|
||||
setContainer(portalContainer);
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
setCourse(course);
|
||||
}, [element]);
|
||||
|
||||
if (!container || !course) {
|
||||
if (!container) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const handleOnClick = () => {
|
||||
onClick(course);
|
||||
};
|
||||
|
||||
return ReactDOM.createPortal(<Button onClick={handleOnClick}>Plus</Button>, container);
|
||||
return ReactDOM.createPortal(<Button onClick={onClick}>Plus</Button>, container);
|
||||
}
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
import { Instructor, Status } from 'src/shared/types/Course';
|
||||
import { Course, Instructor, Status, InstructionMode, CourseRow } from 'src/shared/types/Course';
|
||||
import { CourseSchedule, CourseSection } from 'src/shared/types/CourseSchedule';
|
||||
import { SiteSupport } from 'src/views/lib/getSiteSupport';
|
||||
|
||||
enum TableDataSelector {
|
||||
COURSE_HEADER = 'td.course_header',
|
||||
UNIQUE_ID = 'td[data-th="Unique"]',
|
||||
REGISTER_URL = 'td[data-th="Add"] a',
|
||||
INSTRUCTORS = 'td[data-th="Instructor"] span',
|
||||
INSTRUCTION_MODE = 'td[data-th="Instruction Mode"]',
|
||||
STATUS = 'td[data-th="Status"]',
|
||||
SCHEDULE_DAYS = 'td[data-th="Days"]>span',
|
||||
SCHEDULE_HOURS = 'td[data-th="Hour"]>span',
|
||||
@@ -12,37 +15,95 @@ enum TableDataSelector {
|
||||
FLAGS = 'td[data-th="Flags"] ul li',
|
||||
}
|
||||
|
||||
enum CatalogDetailsSelector {
|
||||
enum DetailsSelector {
|
||||
COURSE_NAME = '#details h2',
|
||||
COURSE_DESCRIPTION = '#details p',
|
||||
}
|
||||
|
||||
export class CourseScraper {
|
||||
support: SiteSupport;
|
||||
row: HTMLTableRowElement;
|
||||
|
||||
constructor(support: SiteSupport, row: HTMLTableRowElement) {
|
||||
constructor(support: SiteSupport) {
|
||||
this.support = support;
|
||||
this.row = row;
|
||||
}
|
||||
|
||||
scrapeUniqueId(): number {
|
||||
const div = this.row.querySelector(TableDataSelector.UNIQUE_ID);
|
||||
public scrape(rows: NodeListOf<HTMLTableRowElement>): CourseRow[] {
|
||||
const courses: CourseRow[] = [];
|
||||
|
||||
let fullName = this.getFullName();
|
||||
|
||||
rows.forEach(row => {
|
||||
if (this.isHeaderRow(row)) {
|
||||
fullName = this.getFullName(row);
|
||||
return;
|
||||
}
|
||||
// we are now ready to build the course object
|
||||
|
||||
if (!fullName) {
|
||||
throw new Error('Course name not found');
|
||||
}
|
||||
|
||||
fullName = fullName.replace(/\s\s+/g, ' ').trim();
|
||||
|
||||
const [courseName, department, number] = this.separateCourseName(fullName);
|
||||
|
||||
const [status, isReserved] = this.getStatus(row);
|
||||
const newCourse = new Course({
|
||||
fullName,
|
||||
courseName,
|
||||
department,
|
||||
number,
|
||||
status,
|
||||
isReserved,
|
||||
schedule: this.getSchedule(row),
|
||||
registerURL: this.getRegisterURL(row),
|
||||
url: this.getURL(row),
|
||||
flags: this.getFlags(row),
|
||||
uniqueId: this.getUniqueId(row),
|
||||
instructionMode: this.getInstructionMode(row),
|
||||
instructors: this.getInstructors(row),
|
||||
description: this.getDescription(document),
|
||||
});
|
||||
courses.push({
|
||||
rowElement: row,
|
||||
course: newCourse,
|
||||
});
|
||||
});
|
||||
|
||||
return courses;
|
||||
}
|
||||
|
||||
separateCourseName(name: string): [courseName: string, department: string, number: string] {
|
||||
let courseNumberIndex = name.search(/\d/);
|
||||
let department = name.substring(0, courseNumberIndex).trim();
|
||||
let number = name.substring(courseNumberIndex, name.indexOf(' ', courseNumberIndex)).trim();
|
||||
let courseName = name.substring(name.indexOf(' ', courseNumberIndex)).trim();
|
||||
|
||||
return [courseName, department, number];
|
||||
}
|
||||
|
||||
getUniqueId(row: HTMLTableRowElement): number {
|
||||
const div = row.querySelector(TableDataSelector.UNIQUE_ID);
|
||||
if (!div) {
|
||||
throw new Error('Unique ID not found');
|
||||
}
|
||||
return Number(div.textContent);
|
||||
}
|
||||
|
||||
scrapeInstructors(): Instructor[] {
|
||||
const spans = this.row.querySelectorAll(TableDataSelector.INSTRUCTORS);
|
||||
getURL(row: HTMLTableRowElement): string {
|
||||
const div = row.querySelector<HTMLAnchorElement>(`${TableDataSelector.UNIQUE_ID} a`);
|
||||
return div?.href || window.location.href;
|
||||
}
|
||||
|
||||
getInstructors(row: HTMLTableRowElement): Instructor[] {
|
||||
const spans = row.querySelectorAll(TableDataSelector.INSTRUCTORS);
|
||||
const names = Array.from(spans)
|
||||
.map(span => span.textContent || '')
|
||||
.map(name => name.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
return names.map(name => {
|
||||
const [lastName, rest] = name.split(',');
|
||||
const [lastName, rest] = name.split(',').map(s => s.trim());
|
||||
const [firstName, middleInitial] = rest.split(' ');
|
||||
|
||||
return {
|
||||
@@ -54,21 +115,45 @@ export class CourseScraper {
|
||||
});
|
||||
}
|
||||
|
||||
scrapeName(): string {
|
||||
const div = document.querySelector(CatalogDetailsSelector.COURSE_NAME);
|
||||
if (!div) {
|
||||
throw new Error('Course name not found');
|
||||
}
|
||||
return div.textContent || '';
|
||||
isHeaderRow(row: HTMLTableRowElement): boolean {
|
||||
return row.querySelector(TableDataSelector.COURSE_HEADER) !== null;
|
||||
}
|
||||
|
||||
scrapeRegisterURL(): string | undefined {
|
||||
const a = this.row.querySelector<HTMLAnchorElement>(TableDataSelector.REGISTER_URL);
|
||||
getInstructionMode(row: HTMLTableRowElement): InstructionMode {
|
||||
const text = (row.querySelector(TableDataSelector.INSTRUCTION_MODE)?.textContent || '').toLowerCase();
|
||||
|
||||
if (text.includes('internet')) {
|
||||
return 'Online';
|
||||
}
|
||||
if (text.includes('hybrid')) {
|
||||
return 'Hybrid';
|
||||
}
|
||||
return 'In Person';
|
||||
}
|
||||
|
||||
getDescription(document: Document): string[] {
|
||||
const lines = document.querySelectorAll(DetailsSelector.COURSE_DESCRIPTION);
|
||||
return Array.from(lines)
|
||||
.map(line => line.textContent || '')
|
||||
.map(line => line.replace(/\s\s+/g, ' ').trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
getFullName(row?: HTMLTableRowElement): string {
|
||||
if (!row) {
|
||||
return document.querySelector(DetailsSelector.COURSE_NAME)?.textContent || '';
|
||||
}
|
||||
const div = row.querySelector(TableDataSelector.COURSE_HEADER);
|
||||
return div?.textContent || '';
|
||||
}
|
||||
|
||||
getRegisterURL(row: HTMLTableRowElement): string | undefined {
|
||||
const a = row.querySelector<HTMLAnchorElement>(TableDataSelector.REGISTER_URL);
|
||||
return a?.href;
|
||||
}
|
||||
|
||||
scrapeStatus(): [status: Status, isReserved: boolean] {
|
||||
const div = this.row.querySelector(TableDataSelector.STATUS);
|
||||
getStatus(row: HTMLTableRowElement): [status: Status, isReserved: boolean] {
|
||||
const div = row.querySelector(TableDataSelector.STATUS);
|
||||
if (!div) {
|
||||
throw new Error('Status not found');
|
||||
}
|
||||
@@ -93,39 +178,33 @@ export class CourseScraper {
|
||||
throw new Error(`Unknown status: ${text}`);
|
||||
}
|
||||
|
||||
scrapeFlags(): string[] {
|
||||
const lis = this.row.querySelectorAll(TableDataSelector.FLAGS);
|
||||
getFlags(row: HTMLTableRowElement): string[] {
|
||||
const lis = row.querySelectorAll(TableDataSelector.FLAGS);
|
||||
return Array.from(lis).map(li => li.textContent || '');
|
||||
}
|
||||
|
||||
scrapeDescription(): string[] {
|
||||
const lines = document.querySelectorAll(CatalogDetailsSelector.COURSE_DESCRIPTION);
|
||||
return Array.from(lines)
|
||||
.map(line => line.textContent || '')
|
||||
.filter(Boolean);
|
||||
}
|
||||
getSchedule(row: HTMLTableRowElement): CourseSchedule {
|
||||
const dayLines = row.querySelectorAll(TableDataSelector.SCHEDULE_DAYS);
|
||||
const hourLines = row.querySelectorAll(TableDataSelector.SCHEDULE_HOURS);
|
||||
const roomLines = row.querySelectorAll(TableDataSelector.SCHEDULE_ROOM);
|
||||
|
||||
scrapeSchedule(): CourseSchedule {
|
||||
const days = this.row.querySelectorAll(TableDataSelector.SCHEDULE_DAYS);
|
||||
const hours = this.row.querySelectorAll(TableDataSelector.SCHEDULE_HOURS);
|
||||
const rooms = this.row.querySelectorAll(TableDataSelector.SCHEDULE_ROOM);
|
||||
|
||||
if (days.length !== hours.length) {
|
||||
if (dayLines.length !== hourLines.length) {
|
||||
throw new Error('Schedule data is malformed');
|
||||
}
|
||||
|
||||
// const schedule: = [];
|
||||
// for (let i = 0; i < days.length; i++) {
|
||||
// const day = days[i].textContent || '';
|
||||
// const hour = hours[i].textContent || '';
|
||||
// const room = rooms[i].textContent || '';
|
||||
const sections: CourseSection[] = [];
|
||||
|
||||
// schedule.push({
|
||||
// day,
|
||||
// hour,
|
||||
// room,
|
||||
// });
|
||||
// }
|
||||
// return schedule;
|
||||
for (let i = 0; i < dayLines.length; i += 1) {
|
||||
const lineSections = CourseSchedule.parse(
|
||||
dayLines[i].textContent || '',
|
||||
hourLines[i].textContent || '',
|
||||
roomLines[i].textContent || ''
|
||||
);
|
||||
sections.push(...lineSections);
|
||||
}
|
||||
|
||||
return new CourseSchedule({
|
||||
sections,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user