LearnGodot/Parsers/RawParser.cs
2026-01-02 23:35:05 +08:00

374 lines
10 KiB
C#

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Learn.Config;
using Learn.Models;
using Learn.Utils;
namespace Learn.Parsers;
public class RawParser(Configs configs) : ItemParser
{
private RawParserConfig config => configs.Get<RawParserConfig>();
private List<string> FilterParts(List<string> parts)
{
var result = parts.ToList();
foreach (var regex in config.TokenFilterRules.Regexes)
{
result.RemoveAll(part => Regex.Match(part.Trim(), regex).Success);
}
return result;
}
private List<string> GetParts(Item item)
{
var matches = Regex.Matches(item.Name(), config.SplitRegex).Select(match => match.Value)
.Select(match => match.Trim())
.Where(match => !string.IsNullOrEmpty(match))
.ToList();
return FilterParts(matches);
}
private bool TryNormalizeSeason(string seasonPart, out string season)
{
season = null;
if (int.TryParse(seasonPart, out var seasonInt))
{
season = seasonInt.ToString();
return true;
}
switch (seasonPart)
{
case "零":
season = "0";
return true;
case "一":
season = "1";
return true;
case "二":
season = "2";
return true;
case "三":
season = "3";
return true;
case "四":
season = "4";
return true;
case "五":
season = "5";
return true;
case "六":
season = "6";
return true;
case "七":
season = "7";
return true;
case "八":
season = "8";
return true;
case "九":
season = "9";
return true;
}
return false;
}
private bool TryMatchSeason(string token, out string seasonPart, out string matchPart)
{
seasonPart = null;
matchPart = null;
var regexes = config.SeasonMatchRules?.Regexes;
if (regexes == null) return false;
token = token.Trim();
foreach (var regex in regexes)
{
var match = Regex.Match(token, regex);
if (!match.Success) continue;
matchPart = match.Value;
return TryNormalizeSeason(match.Groups[1].Value, out seasonPart);
}
return false;
}
private bool TryParseSeason(TreeNode node, out string season, out MatchInfo matchInfo)
{
season = null;
matchInfo = null;
var parts = GetParts(node.Info);
for (int i = 0; i < parts.Count; i++)
{
var part = parts[i];
var tokens = part.Split("-");
foreach (var token in tokens)
{
if (!TryMatchSeason(token, out season, out var content)) continue;
matchInfo = new MatchInfo
{
content = content,
partIndex = i
};
return true;
}
}
return false;
}
private bool IsFullMatch(string item, List<string> sequence)
{
if(sequence == null) return false;
for (int i = 0; i < sequence.Count; i++)
{
if (item.Equals(sequence[i], StringComparison.OrdinalIgnoreCase))
{
return true;
}
}
return false;
}
private bool IsPartialMatch(string item, List<string> sequence)
{
if(sequence == null) return false;
for (int i = 0; i < sequence.Count; i++)
{
if (item.Contains(sequence[i], StringComparison.OrdinalIgnoreCase))
{
return true;
}
}
return false;
}
private bool TryParseGroup(TreeNode node, out string group, out MatchInfo matchInfo)
{
group = null;
matchInfo = null;
var parts = GetParts(node.Info);
for (int i = 0; i < parts.Count; i++)
{
if (IsFullMatch(parts[i], config.GroupsMatchRules?.Full))
{
group = parts[i];
matchInfo = new MatchInfo
{
content = parts[i],
partIndex = i
};
return true;
}
}
for (int i = 0; i < parts.Count; i++)
{
if (IsPartialMatch(parts[i], config.GroupsMatchRules?.Partial))
{
group = parts[i];
matchInfo = new MatchInfo
{
content = parts[i],
partIndex = i
};
return true;
}
}
return false;
}
private ItemFields.ItemType ParseItemType(TreeNode node)
{
// 1. 判断是否属于Extras
foreach (var extraMatchName in config.TypeMatchRules.Extra.IfDirNameIs)
{
if (node.Info.Name().Equals(extraMatchName, StringComparison.OrdinalIgnoreCase))
{
return ItemFields.ItemType.Extra;
}
}
if (node.Info.IsFolder())
{
return ItemFields.ItemType.Unknown;
}
var infoExt = Path.GetExtension(node.Info.Name());
if (string.IsNullOrEmpty(infoExt)) return ItemFields.ItemType.Extra;
foreach (var ext in config.TypeMatchRules.Extra.IfFileExtensionIs)
{
if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase))
{
return ItemFields.ItemType.Extra;
}
}
// 2. 判断是不是字幕
foreach (var ext in config.TypeMatchRules.Subtitle.IfFileExtensionIs)
{
if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase))
{
return ItemFields.ItemType.Subtitle;
}
}
// 3. 判断是不是剧集
foreach (var ext in config.TypeMatchRules.Episode.IfFileExtensionIs)
{
if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase))
{
return ItemFields.ItemType.Episode;
}
}
// 4. 啥都不是,不知道
return ItemFields.ItemType.Unknown;
}
private bool TryParseType(TreeNode node, out string type, out MatchInfo matchInfo)
{
matchInfo = null;
type = null;
var typeEnum = ParseItemType(node);
if (typeEnum == ItemFields.ItemType.Unknown) return false;
type = typeEnum.ToString();
return true;
}
private bool TryParseSubtitleLanguage(TreeNode node, out string language, out MatchInfo matchInfo)
{
language = null;
matchInfo = null;
if (node.Info.Type() != ItemFields.ItemType.Subtitle)
{
return false;
}
var name = node.Info.Name();
var parts = name.Split(".");
if (parts.Length < 3) return false;
language = parts[^2];
return true;
}
private bool TryParseEpisode(TreeNode node, out string episode, out MatchInfo matchInfo)
{
episode = null;
matchInfo = null;
if (node.Info.Type() != ItemFields.ItemType.Episode)
{
return false;
}
var parts = GetParts(node.Info);
for (int i = 0; i < parts.Count; i++)
{
var part = parts[i];
var tokens = part.Split("-");
foreach (var token in tokens)
{
var match = Regex.Match(token.Trim(), @"^\d{1,2}$");
if (match.Success)
{
matchInfo = new MatchInfo
{
content = content,
partIndex = i
};
}
return true;
}
}
return false;
}
//
// private bool TryParseRawTitle(TreeNode node, out string rawTitle, out MatchInfo matchInfo)
// {
//
// }
class MatchInfo
{
public string content;
public int partIndex;
}
private delegate bool FieldParser(TreeNode node, out string result, out MatchInfo matchInfo);
private bool TryParseField(TreeNode node, FieldParser fieldParser, out string result)
{
result = null;
if (node.Info == null) return false;
if (!fieldParser(node, out var fieldValue, out _)) return false;
var parsed = new List<string>();
foreach (var child in node.Children)
{
if (fieldParser(child, out var childFieldValue, out _))
{
parsed.Add(childFieldValue);
}
}
var totalCount = parsed.Count;
if (totalCount == 0)
{
result = fieldValue;
return true;
}
var maxGroupCount = parsed.GroupBy(value => value)
.Select(group => group.Count())
.Max();
if (maxGroupCount > totalCount / 2)
{
result = fieldValue;
return true;
}
return false;
}
private void DoParse(TreeNode node, FieldParser fieldParser, string fieldName)
{
var queue = new Queue<TreeNode>();
queue.Enqueue(node);
while (queue.Count > 0)
{
var current = queue.Dequeue();
if(current.Info.Info.ContainsKey(fieldName)) continue;
if (TryParseField(current, fieldParser, out var fieldValue))
{
current.Info.Info.TryAdd(fieldName, fieldValue);
}
else
{
foreach (var child in current.Children)
{
queue.Enqueue(child);
}
}
}
}
public async Task Parse(TreeNode node)
{
DoParse(node, TryParseSeason, ItemFields.Key_Season);
DoParse(node, TryParseGroup, ItemFields.Key_Group);
DoParse(node, TryParseType, ItemFields.Key_Type);
DoParse(node, TryParseSubtitleLanguage, ItemFields.Key_SubtitleLanguage);
}
}