LearnGodot/Parsers/RawParser.cs
2026-01-04 21:42:22 +08:00

428 lines
12 KiB
C#
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Learn.Config;
using Learn.Models;
using Learn.Utils;
namespace Learn.Parsers;
public class RawParser(Configs configs) : ItemParser
{
private RawParserConfig config => configs.Get<RawParserConfig>();
private List<string> FilterParts(List<string> parts)
{
var result = parts.ToList();
foreach (var regex in config.TokenFilterRules.Regexes)
{
result = result.Select(part =>
{
var match = Regex.Match(part.Trim(), regex);
if (match.Success)
{
return part.Replace(match.Value, "").Trim();
}
return part.Trim();
}).ToList();
}
return result.Where(part => !string.IsNullOrEmpty(part)).ToList();
}
private List<string> GetParts(Item item)
{
var matches = Regex.Matches(item.Name(), config.SplitRegex).Select(match => match.Value)
.Select(match => match.Trim())
.Where(match => !string.IsNullOrEmpty(match))
.ToList();
return FilterParts(matches);
}
private bool TryNormalizeSeason(string seasonPart, out string season)
{
season = null;
if (int.TryParse(seasonPart, out var seasonInt))
{
season = seasonInt.ToString();
return true;
}
switch (seasonPart)
{
case "零":
season = "0";
return true;
case "一":
season = "1";
return true;
case "二":
season = "2";
return true;
case "三":
season = "3";
return true;
case "四":
season = "4";
return true;
case "五":
season = "5";
return true;
case "六":
season = "6";
return true;
case "七":
season = "7";
return true;
case "八":
season = "8";
return true;
case "九":
season = "9";
return true;
}
return false;
}
private bool TryMatchSeason(string token, out string seasonPart, out string matchPart)
{
seasonPart = null;
matchPart = null;
var regexes = config.SeasonMatchRules?.Regexes;
if (regexes == null) return false;
token = token.Trim();
foreach (var regex in regexes)
{
var match = Regex.Match(token, regex);
if (!match.Success) continue;
matchPart = match.Value;
return TryNormalizeSeason(match.Groups[1].Value, out seasonPart);
}
return false;
}
private bool TryParseSeason(List<string> parts, Item item, out string season, out MatchInfo matchInfo)
{
season = null;
matchInfo = null;
for (int i = 0; i < parts.Count; i++)
{
var part = parts[i];
var tokens = part.Split("-");
foreach (var token in tokens)
{
if (!TryMatchSeason(token, out season, out var content)) continue;
matchInfo = new MatchInfo
{
content = content,
partIndex = i
};
return true;
}
}
return false;
}
private bool IsFullMatch(string item, List<string> sequence)
{
if(sequence == null) return false;
for (int i = 0; i < sequence.Count; i++)
{
if (item.Equals(sequence[i], StringComparison.OrdinalIgnoreCase))
{
return true;
}
}
return false;
}
private bool IsPartialMatch(string item, List<string> sequence)
{
if(sequence == null) return false;
for (int i = 0; i < sequence.Count; i++)
{
if (item.Contains(sequence[i], StringComparison.OrdinalIgnoreCase))
{
return true;
}
}
return false;
}
private bool TryParseGroup(List<string> parts, Item item, out string group, out MatchInfo matchInfo)
{
group = null;
matchInfo = null;
for (int i = 0; i < parts.Count; i++)
{
if (IsFullMatch(parts[i], config.GroupsMatchRules?.Full))
{
group = parts[i];
matchInfo = new MatchInfo
{
content = parts[i],
partIndex = i
};
return true;
}
}
for (int i = 0; i < parts.Count; i++)
{
if (IsPartialMatch(parts[i], config.GroupsMatchRules?.Partial))
{
group = parts[i];
matchInfo = new MatchInfo
{
content = parts[i],
partIndex = i
};
return true;
}
}
return false;
}
private ItemFields.ItemType ParseItemType(Item item)
{
// 1. 判断是否属于Extras
foreach (var extraMatchName in config.TypeMatchRules.Extra.IfDirNameIs)
{
if (item.Name().Equals(extraMatchName, StringComparison.OrdinalIgnoreCase))
{
return ItemFields.ItemType.Extra;
}
}
if (item.IsFolder())
{
return ItemFields.ItemType.Unknown;
}
var infoExt = Path.GetExtension(item.Name());
if (string.IsNullOrEmpty(infoExt)) return ItemFields.ItemType.Extra;
foreach (var ext in config.TypeMatchRules.Extra.IfFileExtensionIs)
{
if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase))
{
return ItemFields.ItemType.Extra;
}
}
// 2. 判断是不是字幕
foreach (var ext in config.TypeMatchRules.Subtitle.IfFileExtensionIs)
{
if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase))
{
return ItemFields.ItemType.Subtitle;
}
}
// 3. 判断是不是剧集
foreach (var ext in config.TypeMatchRules.Episode.IfFileExtensionIs)
{
if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase))
{
return ItemFields.ItemType.Episode;
}
}
// 4. 啥都不是,不知道
return ItemFields.ItemType.Unknown;
}
private bool TryParseType(List<string> parts, Item item, out string type, out MatchInfo matchInfo)
{
matchInfo = null;
type = null;
var typeEnum = ParseItemType(item);
if (typeEnum == ItemFields.ItemType.Unknown) return false;
type = typeEnum.ToString();
return true;
}
private bool TryParseSubtitleLanguage(List<string> parts, Item item, out string language, out MatchInfo matchInfo)
{
language = null;
matchInfo = null;
if (item.Type() != ItemFields.ItemType.Subtitle)
{
return false;
}
var name = item.Name();
var tokens = name.Split(".");
if (tokens.Length < 3) return false;
language = tokens[^2];
return true;
}
private bool TryParseEpisode(List<string> parts, Item item, out string episode, out MatchInfo matchInfo)
{
episode = null;
matchInfo = null;
if (item.Type() == ItemFields.ItemType.Extra ||
item.Type() == ItemFields.ItemType.Unknown)
{
return false;
}
for (int i = 0; i < parts.Count; i++)
{
var part = parts[i];
var tokens = part.Split("-");
foreach (var token in tokens)
{
foreach (var regex in config.EpisodeRules.Regexes)
{
var match = Regex.Match(token.Trim(), regex);
if (match.Success)
{
episode = int.Parse(match.Groups[1].Value).ToString();
matchInfo = new MatchInfo
{
content = match.Value,
partIndex = i
};
return true;
}
}
}
}
return false;
}
private bool TryParseRawTitle(List<string> parts, Item item, out string rawTitle, out MatchInfo matchInfo)
{
rawTitle = null;
matchInfo = null;
if (item.Type() == ItemFields.ItemType.Extra)
{
return false;
}
while(TryParseGroup(parts, item, out _, out var groupMatch))
{
parts.RemoveAt(groupMatch.partIndex);
}
var tokens = new List<string>();
foreach (var part in parts)
{
tokens.AddRange(part.Split("-"));
}
while(TryParseSeason(tokens, item, out _, out var seasonMatch))
{
tokens[seasonMatch.partIndex] = tokens[seasonMatch.partIndex].Replace(seasonMatch.content, "");
}
tokens = FilterParts(tokens);
if(tokens.Count == 0) return false;
rawTitle = tokens.First().Trim();
// 尝试匹配年份
foreach (var token in tokens)
{
var matchYear = Regex.Match(token, @"[(](\d{4})[)]");
if (matchYear.Success)
{
rawTitle = rawTitle.Replace(matchYear.Value, "").Trim();
item.Info.TryAdd(ItemFields.Key_Year, matchYear.Groups[1].Value);
break;
}
}
return true;
}
class MatchInfo
{
public string content;
public int partIndex;
}
private delegate bool FieldParser(List<string> parts, Item item, out string result, out MatchInfo matchInfo); // 最好将item换成parts
private bool TryParseField(TreeNode node, FieldParser fieldParser, out string result)
{
result = null;
var item = node.Info;
if (item == null) return false;
var parts = GetParts(item);
if (!fieldParser(parts, item, out var fieldValue, out _)) return false;
var parsed = new List<string>();
foreach (var child in node.Children)
{
var childParts = GetParts(child.Info);
if (fieldParser(childParts, child.Info, out var childFieldValue, out _))
{
parsed.Add(childFieldValue);
}
}
var totalCount = parsed.Count;
if (totalCount == 0)
{
result = fieldValue;
return true;
}
var maxGroupCount = parsed.GroupBy(value => value)
.Select(group => group.Count())
.Max();
if (maxGroupCount > totalCount / 2)
{
result = fieldValue;
return true;
}
return false;
}
private void DoParse(TreeNode node, FieldParser fieldParser, string fieldName)
{
var queue = new Queue<TreeNode>();
queue.Enqueue(node);
while (queue.Count > 0)
{
var current = queue.Dequeue();
if (TryParseField(current, fieldParser, out var fieldValue))
{
current.Info.Info.TryAdd(fieldName, fieldValue);
}
else
{
foreach (var child in current.Children)
{
queue.Enqueue(child);
}
}
}
}
public async Task Parse(TreeNode node)
{
DoParse(node, TryParseSeason, ItemFields.Key_Season);
DoParse(node, TryParseGroup, ItemFields.Key_Group);
DoParse(node, TryParseType, ItemFields.Key_Type);
DoParse(node, TryParseSubtitleLanguage, ItemFields.Key_SubtitleLanguage);
DoParse(node, TryParseEpisode, ItemFields.Key_Episode);
DoParse(node, TryParseRawTitle, ItemFields.Key_RawTitle);
}
}