Fixed a certain command, scraping as the api is closed

This commit is contained in:
Kwoth
2022-07-07 22:08:22 +02:00
parent 470bb9657f
commit 83c9c372e4
8 changed files with 163 additions and 229 deletions

View File

@@ -0,0 +1,9 @@
using NadekoBot.Modules.Searches.Common;
namespace NadekoBot.Modules.Nsfw;
public interface INhentaiService
{
Task<Gallery?> GetAsync(uint id);
Task<IReadOnlyList<uint>> GetIdsBySearchAsync(string search);
}

View File

@@ -0,0 +1,115 @@
using AngleSharp.Html.Dom;
using AngleSharp.Html.Parser;
using NadekoBot.Modules.Searches.Common;
namespace NadekoBot.Modules.Nsfw;
public sealed class NhentaiScraperService : INhentaiService, INService
{
private readonly IHttpClientFactory _httpFactory;
private static readonly HtmlParser _htmlParser = new(new()
{
IsScripting = false,
IsEmbedded = false,
IsSupportingProcessingInstructions = false,
IsKeepingSourceReferences = false,
IsNotSupportingFrames = true
});
public NhentaiScraperService(IHttpClientFactory httpFactory)
{
_httpFactory = httpFactory;
}
private HttpClient GetHttpClient()
{
var http = _httpFactory.CreateClient();
http.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36");
http.DefaultRequestHeaders.Add("Cookie", "cf_clearance=I5pR71P4wJkRBFTLFjBndI.GwfKwT.Gx06uS8XNmRJo-1657214595-0-150; csrftoken=WMWRLtsQtBVQYvYkbqXKJHI9T1JwWCdd3tNhoxHn7aHLUYHAqe60XFUKAoWsJtda");
return http;
}
public async Task<Gallery?> GetAsync(uint id)
{
using var http = GetHttpClient();
try
{
var url = $"https://nhentai.net/g/{id}/";
var strRes = await http.GetStringAsync(url);
var doc = await _htmlParser.ParseDocumentAsync(strRes);
var title = doc.QuerySelector("#info .title")?.TextContent;
var fullTitle = doc.QuerySelector("meta[itemprop=\"name\"]")?.Attributes["content"]?.Value
?? title;
var thumb = (doc.QuerySelector("#cover a img") as IHtmlImageElement)?.Dataset["src"];
var tagsElem = doc.QuerySelector("#tags");
var pageCount = tagsElem?.QuerySelector("a.tag[href^=\"/search/?q=pages\"] span")?.TextContent;
var likes = doc.QuerySelector(".buttons .btn-disabled.btn.tooltip span span")?.TextContent?.Trim('(', ')');
var uploadedAt = (tagsElem?.QuerySelector(".tag-container .tags time.nobold") as IHtmlTimeElement)?.DateTime;
var tags = tagsElem?.QuerySelectorAll(".tag-container .tags > a.tag[href^=\"/tag\"]")
.Cast<IHtmlAnchorElement>()
.Select(x => new Tag()
{
Name = x.QuerySelector("span:first-child")?.TextContent,
Url = $"https://nhentai.net{x.PathName}"
})
.ToArray();
if (string.IsNullOrWhiteSpace(fullTitle))
return null;
if (!int.TryParse(pageCount, out var pc))
return null;
if (!int.TryParse(likes, out var lc))
return null;
if (!DateTime.TryParse(uploadedAt, out var ua))
return null;
return new Gallery(id,
url,
fullTitle,
title,
thumb,
pc,
lc,
ua,
tags);
}
catch (HttpRequestException)
{
Log.Warning("Nhentai with id {NhentaiId} not found", id);
return null;
}
}
public async Task<IReadOnlyList<uint>> GetIdsBySearchAsync(string search)
{
using var http = GetHttpClient();
try
{
var url = $"https://nhentai.net/search/?q={Uri.EscapeDataString(search)}&sort=popular-today";
var strRes = await http.GetStringAsync(url);
var doc = await _htmlParser.ParseDocumentAsync(strRes);
var elems = doc.QuerySelectorAll(".container .gallery a")
.Cast<IHtmlAnchorElement>()
.Where(x => x.PathName.StartsWith("/g/"))
.Select(x => x.PathName[3..^1])
.Select(uint.Parse)
.ToArray();
return elems;
}
catch (HttpRequestException)
{
Log.Warning("Nhentai search for {NhentaiSearch} failed", search);
return Array.Empty<uint>();
}
}
}

View File

@@ -410,7 +410,11 @@ public partial class NSFW : NadekoModule<ISearchImagesService>
.WithUrl(g.Url)
.AddField(GetText(strs.favorites), g.Likes, true)
.AddField(GetText(strs.pages), g.PageCount, true)
.AddField(GetText(strs.tags), tagString, true)
.AddField(GetText(strs.tags),
string.IsNullOrWhiteSpace(tagString)
? "?"
: tagString,
true)
.WithFooter(g.UploadedAt.ToString("f"))
.WithOkColor();

View File

@@ -1,10 +0,0 @@
#nullable disable
namespace NadekoBot.Modules.Nsfw;
public interface INsfwService
{
}
public class NsfwService
{
}

View File

@@ -1,21 +1,11 @@
#nullable disable
#nullable disable warnings
using LinqToDB;
using NadekoBot.Modules.Nsfw.Common;
using NadekoBot.Modules.Searches.Common;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
namespace NadekoBot.Modules.Nsfw;
public record UrlReply
{
public string Error { get; init; }
public string Url { get; init; }
public string Rating { get; init; }
public string Provider { get; init; }
public List<string> Tags { get; } = new();
}
public class SearchImagesService : ISearchImagesService, INService
{
private ConcurrentDictionary<ulong, HashSet<string>> BlacklistedTags { get; }
@@ -23,18 +13,22 @@ public class SearchImagesService : ISearchImagesService, INService
public ConcurrentDictionary<ulong, Timer> AutoHentaiTimers { get; } = new();
public ConcurrentDictionary<ulong, Timer> AutoBoobTimers { get; } = new();
public ConcurrentDictionary<ulong, Timer> AutoButtTimers { get; } = new();
private readonly Random _rng;
private readonly SearchImageCacher _cache;
private readonly IHttpClientFactory _httpFactory;
private readonly DbService _db;
private readonly INhentaiService _nh;
private readonly object _taglock = new();
public SearchImagesService(
DbService db,
SearchImageCacher cacher,
IHttpClientFactory httpFactory)
IHttpClientFactory httpFactory,
INhentaiService nh)
{
_nh = nh;
_db = db;
_rng = new NadekoRandom();
_cache = cacher;
@@ -284,85 +278,18 @@ public class SearchImagesService : ISearchImagesService, INService
#region Nhentai
private string GetNhentaiExtensionInternal(string s)
=> s switch
{
"j" => "jpg",
"p" => "png",
"g" => "gif",
_ => "jpg"
};
public Task<Gallery?> GetNhentaiByIdAsync(uint id)
=> _nh.GetAsync(id);
private Gallery ModelToGallery(NhentaiApiModel.Gallery model)
public async Task<Gallery?> GetNhentaiBySearchAsync(string search)
{
var thumbnail = $"https://t.nhentai.net/galleries/{model.MediaId}/thumb."
+ GetNhentaiExtensionInternal(model.Images.Thumbnail.T);
var ids = await _nh.GetIdsBySearchAsync(search);
var url = $"https://nhentai.net/g/{model.Id}";
return new(model.Id.ToString(),
url,
model.Title.English,
model.Title.Pretty,
thumbnail,
model.NumPages,
model.NumFavorites,
model.UploadDate.ToUnixTimestamp().UtcDateTime,
model.Tags.Map(x => new Tag
{
Name = x.Name,
Url = "https://nhentai.com/" + x.Url
}));
}
private async Task<NhentaiApiModel.Gallery> GetNhentaiByIdInternalAsync(uint id)
{
using var http = _httpFactory.CreateClient();
try
{
var res = await http.GetStringAsync("https://nhentai.net/api/gallery/" + id);
return JsonConvert.DeserializeObject<NhentaiApiModel.Gallery>(res);
}
catch (HttpRequestException)
{
Log.Warning("Nhentai with id {NhentaiId} not found", id);
return null;
}
}
private async Task<NhentaiApiModel.Gallery[]> SearchNhentaiInternalAsync(string search)
{
using var http = _httpFactory.CreateClient();
try
{
var res = await http.GetStringAsync("https://nhentai.net/api/galleries/search?query=" + search);
return JsonConvert.DeserializeObject<NhentaiApiModel.SearchResult>(res).Result;
}
catch (HttpRequestException)
{
Log.Warning("Nhentai with search {NhentaiSearch} not found", search);
return null;
}
}
public async Task<Gallery> GetNhentaiByIdAsync(uint id)
{
var model = await GetNhentaiByIdInternalAsync(id);
return ModelToGallery(model);
}
private static readonly string[] _bannedTags = { "loli", "lolicon", "shota", "shotacon", "cub" };
public async Task<Gallery> GetNhentaiBySearchAsync(string search)
{
var models = await SearchNhentaiInternalAsync(search);
models = models.Where(x => !x.Tags.Any(t => _bannedTags.Contains(t.Name))).ToArray();
if (models.Length == 0)
if (ids.Count == 0)
return null;
return ModelToGallery(models[_rng.Next(0, models.Length)]);
var id = ids[_rng.Next(0, ids.Count)];
return await _nh.GetAsync(id);
}
#endregion

View File

@@ -0,0 +1,10 @@
namespace NadekoBot.Modules.Nsfw;
public record UrlReply
{
public string Error { get; init; }
public string Url { get; init; }
public string Rating { get; init; }
public string Provider { get; init; }
public List<string> Tags { get; } = new();
}

View File

@@ -9,7 +9,7 @@ public sealed class Tag
public sealed class Gallery
{
public string Id { get; }
public uint Id { get; }
public string Url { get; }
public string FullTitle { get; }
public string Title { get; }
@@ -21,7 +21,7 @@ public sealed class Gallery
public Gallery(
string id,
uint id,
string url,
string fullTitle,
string title,

View File

@@ -1,121 +0,0 @@
#nullable disable
using Newtonsoft.Json;
namespace NadekoBot.Modules.Searches.Common;
public static class NhentaiApiModel
{
public class Title
{
[JsonProperty("english")]
public string English { get; set; }
[JsonProperty("japanese")]
public string Japanese { get; set; }
[JsonProperty("pretty")]
public string Pretty { get; set; }
}
public class Page
{
[JsonProperty("t")]
public string T { get; set; }
[JsonProperty("w")]
public int W { get; set; }
[JsonProperty("h")]
public int H { get; set; }
}
public class Cover
{
[JsonProperty("t")]
public string T { get; set; }
[JsonProperty("w")]
public int W { get; set; }
[JsonProperty("h")]
public int H { get; set; }
}
public class Thumbnail
{
[JsonProperty("t")]
public string T { get; set; }
[JsonProperty("w")]
public int W { get; set; }
[JsonProperty("h")]
public int H { get; set; }
}
public class Images
{
[JsonProperty("pages")]
public List<Page> Pages { get; set; }
[JsonProperty("cover")]
public Cover Cover { get; set; }
[JsonProperty("thumbnail")]
public Thumbnail Thumbnail { get; set; }
}
public class Tag
{
[JsonProperty("id")]
public int Id { get; set; }
[JsonProperty("type")]
public string Type { get; set; }
[JsonProperty("name")]
public string Name { get; set; }
[JsonProperty("url")]
public string Url { get; set; }
[JsonProperty("count")]
public int Count { get; set; }
}
public class Gallery
{
[JsonProperty("id")]
public int Id { get; set; }
[JsonProperty("media_id")]
public string MediaId { get; set; }
[JsonProperty("title")]
public Title Title { get; set; }
[JsonProperty("images")]
public Images Images { get; set; }
[JsonProperty("scanlator")]
public string Scanlator { get; set; }
[JsonProperty("upload_date")]
public double UploadDate { get; set; }
[JsonProperty("tags")]
public Tag[] Tags { get; set; }
[JsonProperty("num_pages")]
public int NumPages { get; set; }
[JsonProperty("num_favorites")]
public int NumFavorites { get; set; }
}
public class SearchResult
{
[JsonProperty("result")]
public Gallery[] Result { get; set; }
}
}