diff --git a/google3e243da49c638b18.html b/docs/google3e243da49c638b18.html similarity index 100% rename from google3e243da49c638b18.html rename to docs/google3e243da49c638b18.html diff --git a/robots.txt b/docs/robots.txt similarity index 100% rename from robots.txt rename to docs/robots.txt diff --git a/scripts/check_dupes.py b/scripts/check_dupes.py new file mode 100644 index 0000000..3db65b2 --- /dev/null +++ b/scripts/check_dupes.py @@ -0,0 +1,14 @@ +import json + +comments = json.load(open("docs/comments.json", encoding="utf-8")) +seen, dupes = {}, 0 + +for i, c in enumerate(comments): + k = (c.get("id",""), c.get("text",""), c.get("timestamp",0)) + if k in seen: + dupes += 1; + print(f" Index {i} dupes {seen[k]} | id={k[0]} | ts={k[2]} | text={k[1][:80]}") + else: + seen[k] = i + +print(f"Found {dupes} duplicate(s)" if dupes else f"No duplicates among us.") \ No newline at end of file diff --git a/scripts/check_dupes.rs b/scripts/check_dupes.rs new file mode 100644 index 0000000..ee9cb14 --- /dev/null +++ b/scripts/check_dupes.rs @@ -0,0 +1,34 @@ +use std::{collections::HashMap, fs}; +use serde_json::Value; + +fn main() { + let comments: Vec = + serde_json::from_str(&fs::read_to_string("docs/comments.json").unwrap()).unwrap(); + let mut seen: HashMap<(String, String, i64), usize> = HashMap::new(); + let mut dupes = 0; + for (i, c) in comments.iter().enumerate() { + let k = ( + c["id"].as_str().unwrap_or("").into(), + c["text"].as_str().unwrap_or("").into(), + c["timestamp"].as_i64().unwrap_or(0), + ); + + if let Some(&first) = seen.get(&k) { + dupes += 1; + println!( + " Index {} dupes index {} | id={} | ts={} | text={:.80}", + i, first, k.0, k.2, k.1 + ); + } else { + seen.insert(k, i); + } + } + println!( + "{}", + if dupes > 0 { + format!("Found {} duplicate(s)", dupes) + } else { + format!("No duplicates among {} comments.", comments.len()) + } + ); +} \ No newline at end of file diff --git a/extract_comments.py b/scripts/extract_comments.py similarity index 100% rename from extract_comments.py rename to scripts/extract_comments.py