diff --git a/src/NadekoBot/Modules/Nsfw/Nhentai/INhentaiService.cs b/src/NadekoBot/Modules/Nsfw/Nhentai/INhentaiService.cs new file mode 100644 index 000000000..9e654df12 --- /dev/null +++ b/src/NadekoBot/Modules/Nsfw/Nhentai/INhentaiService.cs @@ -0,0 +1,9 @@ +using NadekoBot.Modules.Searches.Common; + +namespace NadekoBot.Modules.Nsfw; + +public interface INhentaiService +{ + Task GetAsync(uint id); + Task> GetIdsBySearchAsync(string search); +} \ No newline at end of file diff --git a/src/NadekoBot/Modules/Nsfw/Nhentai/NhentaiScraperService.cs b/src/NadekoBot/Modules/Nsfw/Nhentai/NhentaiScraperService.cs new file mode 100644 index 000000000..ddb5f1079 --- /dev/null +++ b/src/NadekoBot/Modules/Nsfw/Nhentai/NhentaiScraperService.cs @@ -0,0 +1,115 @@ +using AngleSharp.Html.Dom; +using AngleSharp.Html.Parser; +using NadekoBot.Modules.Searches.Common; + +namespace NadekoBot.Modules.Nsfw; + +public sealed class NhentaiScraperService : INhentaiService, INService +{ + private readonly IHttpClientFactory _httpFactory; + + private static readonly HtmlParser _htmlParser = new(new() + { + IsScripting = false, + IsEmbedded = false, + IsSupportingProcessingInstructions = false, + IsKeepingSourceReferences = false, + IsNotSupportingFrames = true + }); + + public NhentaiScraperService(IHttpClientFactory httpFactory) + { + _httpFactory = httpFactory; + } + + private HttpClient GetHttpClient() + { + var http = _httpFactory.CreateClient(); + http.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36"); + http.DefaultRequestHeaders.Add("Cookie", "cf_clearance=I5pR71P4wJkRBFTLFjBndI.GwfKwT.Gx06uS8XNmRJo-1657214595-0-150; csrftoken=WMWRLtsQtBVQYvYkbqXKJHI9T1JwWCdd3tNhoxHn7aHLUYHAqe60XFUKAoWsJtda"); + return http; + } + + public async Task GetAsync(uint id) + { + using var http = GetHttpClient(); + try + { + var url = $"https://nhentai.net/g/{id}/"; + var strRes = await http.GetStringAsync(url); + var doc = await _htmlParser.ParseDocumentAsync(strRes); + + var title = doc.QuerySelector("#info .title")?.TextContent; + var fullTitle = doc.QuerySelector("meta[itemprop=\"name\"]")?.Attributes["content"]?.Value + ?? title; + var thumb = (doc.QuerySelector("#cover a img") as IHtmlImageElement)?.Dataset["src"]; + + var tagsElem = doc.QuerySelector("#tags"); + + var pageCount = tagsElem?.QuerySelector("a.tag[href^=\"/search/?q=pages\"] span")?.TextContent; + var likes = doc.QuerySelector(".buttons .btn-disabled.btn.tooltip span span")?.TextContent?.Trim('(', ')'); + var uploadedAt = (tagsElem?.QuerySelector(".tag-container .tags time.nobold") as IHtmlTimeElement)?.DateTime; + + var tags = tagsElem?.QuerySelectorAll(".tag-container .tags > a.tag[href^=\"/tag\"]") + .Cast() + .Select(x => new Tag() + { + Name = x.QuerySelector("span:first-child")?.TextContent, + Url = $"https://nhentai.net{x.PathName}" + }) + .ToArray(); + + if (string.IsNullOrWhiteSpace(fullTitle)) + return null; + + if (!int.TryParse(pageCount, out var pc)) + return null; + + if (!int.TryParse(likes, out var lc)) + return null; + + if (!DateTime.TryParse(uploadedAt, out var ua)) + return null; + + return new Gallery(id, + url, + fullTitle, + title, + thumb, + pc, + lc, + ua, + tags); + } + catch (HttpRequestException) + { + Log.Warning("Nhentai with id {NhentaiId} not found", id); + return null; + } + } + + public async Task> GetIdsBySearchAsync(string search) + { + using var http = GetHttpClient(); + try + { + var url = $"https://nhentai.net/search/?q={Uri.EscapeDataString(search)}&sort=popular-today"; + var strRes = await http.GetStringAsync(url); + var doc = await _htmlParser.ParseDocumentAsync(strRes); + + var elems = doc.QuerySelectorAll(".container .gallery a") + .Cast() + .Where(x => x.PathName.StartsWith("/g/")) + .Select(x => x.PathName[3..^1]) + .Select(uint.Parse) + .ToArray(); + + return elems; + } + catch (HttpRequestException) + { + Log.Warning("Nhentai search for {NhentaiSearch} failed", search); + return Array.Empty(); + } + } +} \ No newline at end of file diff --git a/src/NadekoBot/Modules/Nsfw/Nsfw.cs b/src/NadekoBot/Modules/Nsfw/Nsfw.cs index 6ebe77d9a..05d18f075 100644 --- a/src/NadekoBot/Modules/Nsfw/Nsfw.cs +++ b/src/NadekoBot/Modules/Nsfw/Nsfw.cs @@ -404,15 +404,19 @@ public partial class NSFW : NadekoModule .Join(" "); var embed = _eb.Create() - .WithTitle(g.Title) - .WithDescription(g.FullTitle) - .WithImageUrl(g.Thumbnail) - .WithUrl(g.Url) - .AddField(GetText(strs.favorites), g.Likes, true) - .AddField(GetText(strs.pages), g.PageCount, true) - .AddField(GetText(strs.tags), tagString, true) - .WithFooter(g.UploadedAt.ToString("f")) - .WithOkColor(); + .WithTitle(g.Title) + .WithDescription(g.FullTitle) + .WithImageUrl(g.Thumbnail) + .WithUrl(g.Url) + .AddField(GetText(strs.favorites), g.Likes, true) + .AddField(GetText(strs.pages), g.PageCount, true) + .AddField(GetText(strs.tags), + string.IsNullOrWhiteSpace(tagString) + ? "?" + : tagString, + true) + .WithFooter(g.UploadedAt.ToString("f")) + .WithOkColor(); await ctx.Channel.EmbedAsync(embed); } diff --git a/src/NadekoBot/Modules/Nsfw/NsfwService.cs b/src/NadekoBot/Modules/Nsfw/NsfwService.cs deleted file mode 100644 index a58ae21e1..000000000 --- a/src/NadekoBot/Modules/Nsfw/NsfwService.cs +++ /dev/null @@ -1,10 +0,0 @@ -#nullable disable -namespace NadekoBot.Modules.Nsfw; - -public interface INsfwService -{ -} - -public class NsfwService -{ -} \ No newline at end of file diff --git a/src/NadekoBot/Modules/Nsfw/SearchImagesService.cs b/src/NadekoBot/Modules/Nsfw/SearchImagesService.cs index 1b266fbea..8e15dd0d2 100644 --- a/src/NadekoBot/Modules/Nsfw/SearchImagesService.cs +++ b/src/NadekoBot/Modules/Nsfw/SearchImagesService.cs @@ -1,21 +1,11 @@ -#nullable disable +#nullable disable warnings using LinqToDB; using NadekoBot.Modules.Nsfw.Common; using NadekoBot.Modules.Searches.Common; -using Newtonsoft.Json; using Newtonsoft.Json.Linq; namespace NadekoBot.Modules.Nsfw; -public record UrlReply -{ - public string Error { get; init; } - public string Url { get; init; } - public string Rating { get; init; } - public string Provider { get; init; } - public List Tags { get; } = new(); -} - public class SearchImagesService : ISearchImagesService, INService { private ConcurrentDictionary> BlacklistedTags { get; } @@ -23,18 +13,22 @@ public class SearchImagesService : ISearchImagesService, INService public ConcurrentDictionary AutoHentaiTimers { get; } = new(); public ConcurrentDictionary AutoBoobTimers { get; } = new(); public ConcurrentDictionary AutoButtTimers { get; } = new(); + private readonly Random _rng; private readonly SearchImageCacher _cache; private readonly IHttpClientFactory _httpFactory; private readonly DbService _db; + private readonly INhentaiService _nh; private readonly object _taglock = new(); public SearchImagesService( DbService db, SearchImageCacher cacher, - IHttpClientFactory httpFactory) + IHttpClientFactory httpFactory, + INhentaiService nh) { + _nh = nh; _db = db; _rng = new NadekoRandom(); _cache = cacher; @@ -284,85 +278,18 @@ public class SearchImagesService : ISearchImagesService, INService #region Nhentai - private string GetNhentaiExtensionInternal(string s) - => s switch - { - "j" => "jpg", - "p" => "png", - "g" => "gif", - _ => "jpg" - }; + public Task GetNhentaiByIdAsync(uint id) + => _nh.GetAsync(id); - private Gallery ModelToGallery(NhentaiApiModel.Gallery model) + public async Task GetNhentaiBySearchAsync(string search) { - var thumbnail = $"https://t.nhentai.net/galleries/{model.MediaId}/thumb." - + GetNhentaiExtensionInternal(model.Images.Thumbnail.T); + var ids = await _nh.GetIdsBySearchAsync(search); - var url = $"https://nhentai.net/g/{model.Id}"; - return new(model.Id.ToString(), - url, - model.Title.English, - model.Title.Pretty, - thumbnail, - model.NumPages, - model.NumFavorites, - model.UploadDate.ToUnixTimestamp().UtcDateTime, - model.Tags.Map(x => new Tag - { - Name = x.Name, - Url = "https://nhentai.com/" + x.Url - })); - } - - private async Task GetNhentaiByIdInternalAsync(uint id) - { - using var http = _httpFactory.CreateClient(); - try - { - var res = await http.GetStringAsync("https://nhentai.net/api/gallery/" + id); - return JsonConvert.DeserializeObject(res); - } - catch (HttpRequestException) - { - Log.Warning("Nhentai with id {NhentaiId} not found", id); + if (ids.Count == 0) return null; - } - } - - private async Task SearchNhentaiInternalAsync(string search) - { - using var http = _httpFactory.CreateClient(); - try - { - var res = await http.GetStringAsync("https://nhentai.net/api/galleries/search?query=" + search); - return JsonConvert.DeserializeObject(res).Result; - } - catch (HttpRequestException) - { - Log.Warning("Nhentai with search {NhentaiSearch} not found", search); - return null; - } - } - - public async Task GetNhentaiByIdAsync(uint id) - { - var model = await GetNhentaiByIdInternalAsync(id); - - return ModelToGallery(model); - } - - private static readonly string[] _bannedTags = { "loli", "lolicon", "shota", "shotacon", "cub" }; - - public async Task GetNhentaiBySearchAsync(string search) - { - var models = await SearchNhentaiInternalAsync(search); - - models = models.Where(x => !x.Tags.Any(t => _bannedTags.Contains(t.Name))).ToArray(); - - if (models.Length == 0) - return null; - - return ModelToGallery(models[_rng.Next(0, models.Length)]); + + var id = ids[_rng.Next(0, ids.Count)]; + return await _nh.GetAsync(id); } #endregion diff --git a/src/NadekoBot/Modules/Nsfw/UrlReply.cs b/src/NadekoBot/Modules/Nsfw/UrlReply.cs new file mode 100644 index 000000000..85bde32d7 --- /dev/null +++ b/src/NadekoBot/Modules/Nsfw/UrlReply.cs @@ -0,0 +1,10 @@ +namespace NadekoBot.Modules.Nsfw; + +public record UrlReply +{ + public string Error { get; init; } + public string Url { get; init; } + public string Rating { get; init; } + public string Provider { get; init; } + public List Tags { get; } = new(); +} \ No newline at end of file diff --git a/src/NadekoBot/Modules/Searches/_Common/Gallery.cs b/src/NadekoBot/Modules/Searches/_Common/Gallery.cs index 4a0141a37..61a6c65a4 100644 --- a/src/NadekoBot/Modules/Searches/_Common/Gallery.cs +++ b/src/NadekoBot/Modules/Searches/_Common/Gallery.cs @@ -9,7 +9,7 @@ public sealed class Tag public sealed class Gallery { - public string Id { get; } + public uint Id { get; } public string Url { get; } public string FullTitle { get; } public string Title { get; } @@ -21,7 +21,7 @@ public sealed class Gallery public Gallery( - string id, + uint id, string url, string fullTitle, string title, diff --git a/src/NadekoBot/Modules/Searches/_Common/NhentaiApiModel.cs b/src/NadekoBot/Modules/Searches/_Common/NhentaiApiModel.cs deleted file mode 100644 index a7852a67e..000000000 --- a/src/NadekoBot/Modules/Searches/_Common/NhentaiApiModel.cs +++ /dev/null @@ -1,121 +0,0 @@ -#nullable disable -using Newtonsoft.Json; - -namespace NadekoBot.Modules.Searches.Common; - -public static class NhentaiApiModel -{ - public class Title - { - [JsonProperty("english")] - public string English { get; set; } - - [JsonProperty("japanese")] - public string Japanese { get; set; } - - [JsonProperty("pretty")] - public string Pretty { get; set; } - } - - public class Page - { - [JsonProperty("t")] - public string T { get; set; } - - [JsonProperty("w")] - public int W { get; set; } - - [JsonProperty("h")] - public int H { get; set; } - } - - public class Cover - { - [JsonProperty("t")] - public string T { get; set; } - - [JsonProperty("w")] - public int W { get; set; } - - [JsonProperty("h")] - public int H { get; set; } - } - - public class Thumbnail - { - [JsonProperty("t")] - public string T { get; set; } - - [JsonProperty("w")] - public int W { get; set; } - - [JsonProperty("h")] - public int H { get; set; } - } - - public class Images - { - [JsonProperty("pages")] - public List Pages { get; set; } - - [JsonProperty("cover")] - public Cover Cover { get; set; } - - [JsonProperty("thumbnail")] - public Thumbnail Thumbnail { get; set; } - } - - public class Tag - { - [JsonProperty("id")] - public int Id { get; set; } - - [JsonProperty("type")] - public string Type { get; set; } - - [JsonProperty("name")] - public string Name { get; set; } - - [JsonProperty("url")] - public string Url { get; set; } - - [JsonProperty("count")] - public int Count { get; set; } - } - - public class Gallery - { - [JsonProperty("id")] - public int Id { get; set; } - - [JsonProperty("media_id")] - public string MediaId { get; set; } - - [JsonProperty("title")] - public Title Title { get; set; } - - [JsonProperty("images")] - public Images Images { get; set; } - - [JsonProperty("scanlator")] - public string Scanlator { get; set; } - - [JsonProperty("upload_date")] - public double UploadDate { get; set; } - - [JsonProperty("tags")] - public Tag[] Tags { get; set; } - - [JsonProperty("num_pages")] - public int NumPages { get; set; } - - [JsonProperty("num_favorites")] - public int NumFavorites { get; set; } - } - - public class SearchResult - { - [JsonProperty("result")] - public Gallery[] Result { get; set; } - } -} \ No newline at end of file