wip scraping infra
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
import React, { useEffect, useMemo, useState } from 'react';
|
||||
import ReactDOM from 'react-dom';
|
||||
import { Course } from 'src/shared/types/Course';
|
||||
import { Course, CourseRow, CourseScraper } from 'src/shared/types/Course';
|
||||
import { CourseCatalogDetailsScraper } from 'src/shared/types/CourseCatalogDetailsScraper';
|
||||
import { CourseCatalogRowScraper } from 'src/shared/types/CourseCatalogRowScraper';
|
||||
import useInfiniteScroll from '../hooks/useInfiniteScroll';
|
||||
import { populateSearchInputs } from '../lib/courseCatalog/populateSearchInputs';
|
||||
import { SiteSupport } from '../lib/getSiteSupport';
|
||||
@@ -15,7 +17,7 @@ interface Props {
|
||||
* This is the top level react component orchestrating the course catalog page.
|
||||
*/
|
||||
export default function CourseCatalogMain({ support }: Props) {
|
||||
const [rows, setRows] = React.useState<HTMLTableRowElement[]>([]);
|
||||
const [rows, setRows] = React.useState<CourseRow[]>([]);
|
||||
const [selectedCourse, setSelectedCourse] = useState<Course | null>(null);
|
||||
|
||||
const isScrolling = useInfiniteScroll(async () => {
|
||||
@@ -28,7 +30,7 @@ export default function CourseCatalogMain({ support }: Props) {
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
const rows = scrapeRowsFromCourseTable();
|
||||
const rows = scrapeCourseRows(support);
|
||||
setRows(rows);
|
||||
}, []);
|
||||
|
||||
@@ -40,23 +42,44 @@ export default function CourseCatalogMain({ support }: Props) {
|
||||
<div>
|
||||
<TableHead>Plus</TableHead>
|
||||
{rows.map(row => (
|
||||
<TableRow row={row} onClick={handleRowButtonClick} />
|
||||
<TableRow element={row.rowElement} support={support} onClick={handleRowButtonClick} />
|
||||
))}
|
||||
{isScrolling && <div>Scrolling...</div>}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function scrapeRowsFromCourseTable(): HTMLTableRowElement[] {
|
||||
const rows = Array.from(document.querySelectorAll('table tbody tr')) as HTMLTableRowElement[];
|
||||
function scrapeCourseRows(support: SiteSupport): CourseRow[] {
|
||||
const rows: CourseRow[] = [];
|
||||
|
||||
return Array.from(rows).filter(row => {
|
||||
if (row.querySelector('th')) {
|
||||
return false;
|
||||
let name: string | null = null;
|
||||
if (support === SiteSupport.COURSE_CATALOG_DETAILS) {
|
||||
const header = document.querySelector('#details h2');
|
||||
if (!header?.textContent) {
|
||||
throw new Error('Could not find course name on course details page.');
|
||||
}
|
||||
if (row.querySelector('td.course_header')) {
|
||||
return false;
|
||||
name = header.textContent.trim();
|
||||
}
|
||||
|
||||
document.querySelectorAll<HTMLTableRowElement>('table tbody tr').forEach(row => {
|
||||
// rows that have a course header are the start of a new section, so save the section name and skip
|
||||
const header = row.querySelector('td.course_header');
|
||||
if (header?.textContent) {
|
||||
name = header.textContent.trim();
|
||||
return;
|
||||
}
|
||||
return true;
|
||||
if (!name) {
|
||||
throw new Error('Could not find any course sections.');
|
||||
}
|
||||
|
||||
const course = scrapeCourseFromRow(name, support, row);
|
||||
});
|
||||
return rows;
|
||||
}
|
||||
|
||||
function scrapeCourseFromRow(name: string, support: SiteSupport, row: HTMLTableRowElement): Course {
|
||||
let url = support === SiteSupport.COURSE_CATALOG_DETAILS ? window.location.href : null;
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user