mirror of
https://gitlab.com/Kwoth/nadekobot.git
synced 2025-09-10 09:18:27 -04:00
Fixed a certain command, scraping as the api is closed
This commit is contained in:
9
src/NadekoBot/Modules/Nsfw/Nhentai/INhentaiService.cs
Normal file
9
src/NadekoBot/Modules/Nsfw/Nhentai/INhentaiService.cs
Normal file
@@ -0,0 +1,9 @@
|
||||
using NadekoBot.Modules.Searches.Common;
|
||||
|
||||
namespace NadekoBot.Modules.Nsfw;
|
||||
|
||||
public interface INhentaiService
|
||||
{
|
||||
Task<Gallery?> GetAsync(uint id);
|
||||
Task<IReadOnlyList<uint>> GetIdsBySearchAsync(string search);
|
||||
}
|
115
src/NadekoBot/Modules/Nsfw/Nhentai/NhentaiScraperService.cs
Normal file
115
src/NadekoBot/Modules/Nsfw/Nhentai/NhentaiScraperService.cs
Normal file
@@ -0,0 +1,115 @@
|
||||
using AngleSharp.Html.Dom;
|
||||
using AngleSharp.Html.Parser;
|
||||
using NadekoBot.Modules.Searches.Common;
|
||||
|
||||
namespace NadekoBot.Modules.Nsfw;
|
||||
|
||||
public sealed class NhentaiScraperService : INhentaiService, INService
|
||||
{
|
||||
private readonly IHttpClientFactory _httpFactory;
|
||||
|
||||
private static readonly HtmlParser _htmlParser = new(new()
|
||||
{
|
||||
IsScripting = false,
|
||||
IsEmbedded = false,
|
||||
IsSupportingProcessingInstructions = false,
|
||||
IsKeepingSourceReferences = false,
|
||||
IsNotSupportingFrames = true
|
||||
});
|
||||
|
||||
public NhentaiScraperService(IHttpClientFactory httpFactory)
|
||||
{
|
||||
_httpFactory = httpFactory;
|
||||
}
|
||||
|
||||
private HttpClient GetHttpClient()
|
||||
{
|
||||
var http = _httpFactory.CreateClient();
|
||||
http.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36");
|
||||
http.DefaultRequestHeaders.Add("Cookie", "cf_clearance=I5pR71P4wJkRBFTLFjBndI.GwfKwT.Gx06uS8XNmRJo-1657214595-0-150; csrftoken=WMWRLtsQtBVQYvYkbqXKJHI9T1JwWCdd3tNhoxHn7aHLUYHAqe60XFUKAoWsJtda");
|
||||
return http;
|
||||
}
|
||||
|
||||
public async Task<Gallery?> GetAsync(uint id)
|
||||
{
|
||||
using var http = GetHttpClient();
|
||||
try
|
||||
{
|
||||
var url = $"https://nhentai.net/g/{id}/";
|
||||
var strRes = await http.GetStringAsync(url);
|
||||
var doc = await _htmlParser.ParseDocumentAsync(strRes);
|
||||
|
||||
var title = doc.QuerySelector("#info .title")?.TextContent;
|
||||
var fullTitle = doc.QuerySelector("meta[itemprop=\"name\"]")?.Attributes["content"]?.Value
|
||||
?? title;
|
||||
var thumb = (doc.QuerySelector("#cover a img") as IHtmlImageElement)?.Dataset["src"];
|
||||
|
||||
var tagsElem = doc.QuerySelector("#tags");
|
||||
|
||||
var pageCount = tagsElem?.QuerySelector("a.tag[href^=\"/search/?q=pages\"] span")?.TextContent;
|
||||
var likes = doc.QuerySelector(".buttons .btn-disabled.btn.tooltip span span")?.TextContent?.Trim('(', ')');
|
||||
var uploadedAt = (tagsElem?.QuerySelector(".tag-container .tags time.nobold") as IHtmlTimeElement)?.DateTime;
|
||||
|
||||
var tags = tagsElem?.QuerySelectorAll(".tag-container .tags > a.tag[href^=\"/tag\"]")
|
||||
.Cast<IHtmlAnchorElement>()
|
||||
.Select(x => new Tag()
|
||||
{
|
||||
Name = x.QuerySelector("span:first-child")?.TextContent,
|
||||
Url = $"https://nhentai.net{x.PathName}"
|
||||
})
|
||||
.ToArray();
|
||||
|
||||
if (string.IsNullOrWhiteSpace(fullTitle))
|
||||
return null;
|
||||
|
||||
if (!int.TryParse(pageCount, out var pc))
|
||||
return null;
|
||||
|
||||
if (!int.TryParse(likes, out var lc))
|
||||
return null;
|
||||
|
||||
if (!DateTime.TryParse(uploadedAt, out var ua))
|
||||
return null;
|
||||
|
||||
return new Gallery(id,
|
||||
url,
|
||||
fullTitle,
|
||||
title,
|
||||
thumb,
|
||||
pc,
|
||||
lc,
|
||||
ua,
|
||||
tags);
|
||||
}
|
||||
catch (HttpRequestException)
|
||||
{
|
||||
Log.Warning("Nhentai with id {NhentaiId} not found", id);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<uint>> GetIdsBySearchAsync(string search)
|
||||
{
|
||||
using var http = GetHttpClient();
|
||||
try
|
||||
{
|
||||
var url = $"https://nhentai.net/search/?q={Uri.EscapeDataString(search)}&sort=popular-today";
|
||||
var strRes = await http.GetStringAsync(url);
|
||||
var doc = await _htmlParser.ParseDocumentAsync(strRes);
|
||||
|
||||
var elems = doc.QuerySelectorAll(".container .gallery a")
|
||||
.Cast<IHtmlAnchorElement>()
|
||||
.Where(x => x.PathName.StartsWith("/g/"))
|
||||
.Select(x => x.PathName[3..^1])
|
||||
.Select(uint.Parse)
|
||||
.ToArray();
|
||||
|
||||
return elems;
|
||||
}
|
||||
catch (HttpRequestException)
|
||||
{
|
||||
Log.Warning("Nhentai search for {NhentaiSearch} failed", search);
|
||||
return Array.Empty<uint>();
|
||||
}
|
||||
}
|
||||
}
|
@@ -404,15 +404,19 @@ public partial class NSFW : NadekoModule<ISearchImagesService>
|
||||
.Join(" ");
|
||||
|
||||
var embed = _eb.Create()
|
||||
.WithTitle(g.Title)
|
||||
.WithDescription(g.FullTitle)
|
||||
.WithImageUrl(g.Thumbnail)
|
||||
.WithUrl(g.Url)
|
||||
.AddField(GetText(strs.favorites), g.Likes, true)
|
||||
.AddField(GetText(strs.pages), g.PageCount, true)
|
||||
.AddField(GetText(strs.tags), tagString, true)
|
||||
.WithFooter(g.UploadedAt.ToString("f"))
|
||||
.WithOkColor();
|
||||
.WithTitle(g.Title)
|
||||
.WithDescription(g.FullTitle)
|
||||
.WithImageUrl(g.Thumbnail)
|
||||
.WithUrl(g.Url)
|
||||
.AddField(GetText(strs.favorites), g.Likes, true)
|
||||
.AddField(GetText(strs.pages), g.PageCount, true)
|
||||
.AddField(GetText(strs.tags),
|
||||
string.IsNullOrWhiteSpace(tagString)
|
||||
? "?"
|
||||
: tagString,
|
||||
true)
|
||||
.WithFooter(g.UploadedAt.ToString("f"))
|
||||
.WithOkColor();
|
||||
|
||||
await ctx.Channel.EmbedAsync(embed);
|
||||
}
|
||||
|
@@ -1,10 +0,0 @@
|
||||
#nullable disable
|
||||
namespace NadekoBot.Modules.Nsfw;
|
||||
|
||||
public interface INsfwService
|
||||
{
|
||||
}
|
||||
|
||||
public class NsfwService
|
||||
{
|
||||
}
|
@@ -1,21 +1,11 @@
|
||||
#nullable disable
|
||||
#nullable disable warnings
|
||||
using LinqToDB;
|
||||
using NadekoBot.Modules.Nsfw.Common;
|
||||
using NadekoBot.Modules.Searches.Common;
|
||||
using Newtonsoft.Json;
|
||||
using Newtonsoft.Json.Linq;
|
||||
|
||||
namespace NadekoBot.Modules.Nsfw;
|
||||
|
||||
public record UrlReply
|
||||
{
|
||||
public string Error { get; init; }
|
||||
public string Url { get; init; }
|
||||
public string Rating { get; init; }
|
||||
public string Provider { get; init; }
|
||||
public List<string> Tags { get; } = new();
|
||||
}
|
||||
|
||||
public class SearchImagesService : ISearchImagesService, INService
|
||||
{
|
||||
private ConcurrentDictionary<ulong, HashSet<string>> BlacklistedTags { get; }
|
||||
@@ -23,18 +13,22 @@ public class SearchImagesService : ISearchImagesService, INService
|
||||
public ConcurrentDictionary<ulong, Timer> AutoHentaiTimers { get; } = new();
|
||||
public ConcurrentDictionary<ulong, Timer> AutoBoobTimers { get; } = new();
|
||||
public ConcurrentDictionary<ulong, Timer> AutoButtTimers { get; } = new();
|
||||
|
||||
private readonly Random _rng;
|
||||
private readonly SearchImageCacher _cache;
|
||||
private readonly IHttpClientFactory _httpFactory;
|
||||
private readonly DbService _db;
|
||||
private readonly INhentaiService _nh;
|
||||
|
||||
private readonly object _taglock = new();
|
||||
|
||||
public SearchImagesService(
|
||||
DbService db,
|
||||
SearchImageCacher cacher,
|
||||
IHttpClientFactory httpFactory)
|
||||
IHttpClientFactory httpFactory,
|
||||
INhentaiService nh)
|
||||
{
|
||||
_nh = nh;
|
||||
_db = db;
|
||||
_rng = new NadekoRandom();
|
||||
_cache = cacher;
|
||||
@@ -284,85 +278,18 @@ public class SearchImagesService : ISearchImagesService, INService
|
||||
|
||||
#region Nhentai
|
||||
|
||||
private string GetNhentaiExtensionInternal(string s)
|
||||
=> s switch
|
||||
{
|
||||
"j" => "jpg",
|
||||
"p" => "png",
|
||||
"g" => "gif",
|
||||
_ => "jpg"
|
||||
};
|
||||
public Task<Gallery?> GetNhentaiByIdAsync(uint id)
|
||||
=> _nh.GetAsync(id);
|
||||
|
||||
private Gallery ModelToGallery(NhentaiApiModel.Gallery model)
|
||||
public async Task<Gallery?> GetNhentaiBySearchAsync(string search)
|
||||
{
|
||||
var thumbnail = $"https://t.nhentai.net/galleries/{model.MediaId}/thumb."
|
||||
+ GetNhentaiExtensionInternal(model.Images.Thumbnail.T);
|
||||
var ids = await _nh.GetIdsBySearchAsync(search);
|
||||
|
||||
var url = $"https://nhentai.net/g/{model.Id}";
|
||||
return new(model.Id.ToString(),
|
||||
url,
|
||||
model.Title.English,
|
||||
model.Title.Pretty,
|
||||
thumbnail,
|
||||
model.NumPages,
|
||||
model.NumFavorites,
|
||||
model.UploadDate.ToUnixTimestamp().UtcDateTime,
|
||||
model.Tags.Map(x => new Tag
|
||||
{
|
||||
Name = x.Name,
|
||||
Url = "https://nhentai.com/" + x.Url
|
||||
}));
|
||||
}
|
||||
|
||||
private async Task<NhentaiApiModel.Gallery> GetNhentaiByIdInternalAsync(uint id)
|
||||
{
|
||||
using var http = _httpFactory.CreateClient();
|
||||
try
|
||||
{
|
||||
var res = await http.GetStringAsync("https://nhentai.net/api/gallery/" + id);
|
||||
return JsonConvert.DeserializeObject<NhentaiApiModel.Gallery>(res);
|
||||
}
|
||||
catch (HttpRequestException)
|
||||
{
|
||||
Log.Warning("Nhentai with id {NhentaiId} not found", id);
|
||||
if (ids.Count == 0)
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NhentaiApiModel.Gallery[]> SearchNhentaiInternalAsync(string search)
|
||||
{
|
||||
using var http = _httpFactory.CreateClient();
|
||||
try
|
||||
{
|
||||
var res = await http.GetStringAsync("https://nhentai.net/api/galleries/search?query=" + search);
|
||||
return JsonConvert.DeserializeObject<NhentaiApiModel.SearchResult>(res).Result;
|
||||
}
|
||||
catch (HttpRequestException)
|
||||
{
|
||||
Log.Warning("Nhentai with search {NhentaiSearch} not found", search);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<Gallery> GetNhentaiByIdAsync(uint id)
|
||||
{
|
||||
var model = await GetNhentaiByIdInternalAsync(id);
|
||||
|
||||
return ModelToGallery(model);
|
||||
}
|
||||
|
||||
private static readonly string[] _bannedTags = { "loli", "lolicon", "shota", "shotacon", "cub" };
|
||||
|
||||
public async Task<Gallery> GetNhentaiBySearchAsync(string search)
|
||||
{
|
||||
var models = await SearchNhentaiInternalAsync(search);
|
||||
|
||||
models = models.Where(x => !x.Tags.Any(t => _bannedTags.Contains(t.Name))).ToArray();
|
||||
|
||||
if (models.Length == 0)
|
||||
return null;
|
||||
|
||||
return ModelToGallery(models[_rng.Next(0, models.Length)]);
|
||||
|
||||
var id = ids[_rng.Next(0, ids.Count)];
|
||||
return await _nh.GetAsync(id);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
10
src/NadekoBot/Modules/Nsfw/UrlReply.cs
Normal file
10
src/NadekoBot/Modules/Nsfw/UrlReply.cs
Normal file
@@ -0,0 +1,10 @@
|
||||
namespace NadekoBot.Modules.Nsfw;
|
||||
|
||||
public record UrlReply
|
||||
{
|
||||
public string Error { get; init; }
|
||||
public string Url { get; init; }
|
||||
public string Rating { get; init; }
|
||||
public string Provider { get; init; }
|
||||
public List<string> Tags { get; } = new();
|
||||
}
|
@@ -9,7 +9,7 @@ public sealed class Tag
|
||||
|
||||
public sealed class Gallery
|
||||
{
|
||||
public string Id { get; }
|
||||
public uint Id { get; }
|
||||
public string Url { get; }
|
||||
public string FullTitle { get; }
|
||||
public string Title { get; }
|
||||
@@ -21,7 +21,7 @@ public sealed class Gallery
|
||||
|
||||
|
||||
public Gallery(
|
||||
string id,
|
||||
uint id,
|
||||
string url,
|
||||
string fullTitle,
|
||||
string title,
|
||||
|
@@ -1,121 +0,0 @@
|
||||
#nullable disable
|
||||
using Newtonsoft.Json;
|
||||
|
||||
namespace NadekoBot.Modules.Searches.Common;
|
||||
|
||||
public static class NhentaiApiModel
|
||||
{
|
||||
public class Title
|
||||
{
|
||||
[JsonProperty("english")]
|
||||
public string English { get; set; }
|
||||
|
||||
[JsonProperty("japanese")]
|
||||
public string Japanese { get; set; }
|
||||
|
||||
[JsonProperty("pretty")]
|
||||
public string Pretty { get; set; }
|
||||
}
|
||||
|
||||
public class Page
|
||||
{
|
||||
[JsonProperty("t")]
|
||||
public string T { get; set; }
|
||||
|
||||
[JsonProperty("w")]
|
||||
public int W { get; set; }
|
||||
|
||||
[JsonProperty("h")]
|
||||
public int H { get; set; }
|
||||
}
|
||||
|
||||
public class Cover
|
||||
{
|
||||
[JsonProperty("t")]
|
||||
public string T { get; set; }
|
||||
|
||||
[JsonProperty("w")]
|
||||
public int W { get; set; }
|
||||
|
||||
[JsonProperty("h")]
|
||||
public int H { get; set; }
|
||||
}
|
||||
|
||||
public class Thumbnail
|
||||
{
|
||||
[JsonProperty("t")]
|
||||
public string T { get; set; }
|
||||
|
||||
[JsonProperty("w")]
|
||||
public int W { get; set; }
|
||||
|
||||
[JsonProperty("h")]
|
||||
public int H { get; set; }
|
||||
}
|
||||
|
||||
public class Images
|
||||
{
|
||||
[JsonProperty("pages")]
|
||||
public List<Page> Pages { get; set; }
|
||||
|
||||
[JsonProperty("cover")]
|
||||
public Cover Cover { get; set; }
|
||||
|
||||
[JsonProperty("thumbnail")]
|
||||
public Thumbnail Thumbnail { get; set; }
|
||||
}
|
||||
|
||||
public class Tag
|
||||
{
|
||||
[JsonProperty("id")]
|
||||
public int Id { get; set; }
|
||||
|
||||
[JsonProperty("type")]
|
||||
public string Type { get; set; }
|
||||
|
||||
[JsonProperty("name")]
|
||||
public string Name { get; set; }
|
||||
|
||||
[JsonProperty("url")]
|
||||
public string Url { get; set; }
|
||||
|
||||
[JsonProperty("count")]
|
||||
public int Count { get; set; }
|
||||
}
|
||||
|
||||
public class Gallery
|
||||
{
|
||||
[JsonProperty("id")]
|
||||
public int Id { get; set; }
|
||||
|
||||
[JsonProperty("media_id")]
|
||||
public string MediaId { get; set; }
|
||||
|
||||
[JsonProperty("title")]
|
||||
public Title Title { get; set; }
|
||||
|
||||
[JsonProperty("images")]
|
||||
public Images Images { get; set; }
|
||||
|
||||
[JsonProperty("scanlator")]
|
||||
public string Scanlator { get; set; }
|
||||
|
||||
[JsonProperty("upload_date")]
|
||||
public double UploadDate { get; set; }
|
||||
|
||||
[JsonProperty("tags")]
|
||||
public Tag[] Tags { get; set; }
|
||||
|
||||
[JsonProperty("num_pages")]
|
||||
public int NumPages { get; set; }
|
||||
|
||||
[JsonProperty("num_favorites")]
|
||||
public int NumFavorites { get; set; }
|
||||
}
|
||||
|
||||
public class SearchResult
|
||||
{
|
||||
[JsonProperty("result")]
|
||||
public Gallery[] Result { get; set; }
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user