LearnGodot/Parsers/RawParser.cs
2025-12-29 22:54:30 +08:00

185 lines
5.1 KiB
C#

using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Learn.Models;
namespace Learn.Parsers;
public class RawParser : ItemParser
{
private string ParseSeasonFromCN(string season)
{
if (int.TryParse(season, out _)) return season;
switch (season)
{
case "零": return "0";
case "一": return "1";
case "二": return "2";
case "三": return "3";
case "四": return "4";
case "五": return "5";
case "六": return "6";
case "七": return "7";
case "八": return "8";
case "九": return "9";
}
return season;
}
private (string, string) SplitTitleAndSeason(string rawTitle)
{
var match1 = Regex.Match(rawTitle, @"第(.+)季");
if (match1.Success)
{
var seasonStr = match1.Groups[0].Value.Trim();
var season = ParseSeasonFromCN(match1.Groups[1].Value.Trim());
var title = rawTitle.Replace(seasonStr, "").Trim();
return (title, season);
}
var match2 = Regex.Match(rawTitle, @"[Ss]eason *(\d+)");
if (match2.Success)
{
var seasonStr = match1.Groups[0].Value.Trim();
var season = int.Parse(match1.Groups[1].Value.Trim()).ToString();
var title = rawTitle.Replace(seasonStr, "").Trim();
return (title, season);
}
return (rawTitle, null);
}
private void SimplifyMatches(List<string> matches)
{
matches.RemoveAll(match => string.IsNullOrEmpty(match.Trim()));
matches.RemoveAll(match => Regex.Match(match.Trim(), @"\d+[Pp]$").Success);
}
private List<string> GetParts(string name)
{
var matches = Regex.Matches(name, @"[^\[\]_【】]+").Select(match => match.Value).ToList();
SimplifyMatches(matches);
return matches;
}
private bool TryParseRawTitle(Item item, out string rawTitle)
{
rawTitle = null;
var name = item.Name();
var matches = GetParts(name);
if (matches.Count == 0) return false;
if (matches.Count == 1)
{
rawTitle = matches[0];
}
else
{
rawTitle = matches[1];
}
(rawTitle, _) = SplitTitleAndSeason(rawTitle);
return true;
}
private bool TryParseSeason(Item item, out string season)
{
season = null;
var name = item.Name();
var matches = GetParts(name);
if (matches.Count == 0) return false;
if (matches.Count == 1)
{
(_, season) = SplitTitleAndSeason(matches[0]);
if (!string.IsNullOrEmpty(season)) return true;
}
else
{
(_, season) = SplitTitleAndSeason(matches[1]);
if (!string.IsNullOrEmpty(season)) return true;
}
return false;
}
private bool TryParseGroup(Item item, out string group)
{
group = null;
var name = item.Name();
var matches = GetParts(name);
if (matches.Count <= 1) return false;
group = matches[0];
return true;
}
private delegate bool FieldParser(Item item, out string result);
private bool TryParseField(TreeNode node, FieldParser fieldParser, out string result)
{
result = null;
if (node.Info == null) return false;
if (!fieldParser(node.Info, out var fieldValue)) return false;
var parsed = new List<string>();
foreach (var child in node.Children)
{
if (fieldParser(child.Info, out var childFieldValue))
{
parsed.Add(childFieldValue);
}
}
var totalCount = parsed.Count;
if (totalCount == 0)
{
result = fieldValue;
return true;
}
var maxGroupCount = parsed.GroupBy(value => value)
.Select(group => group.Count())
.Max();
if (maxGroupCount > totalCount / 2)
{
result = fieldValue;
return true;
}
return false;
}
private void DoParse(TreeNode node, FieldParser fieldParser, string fieldName)
{
var queue = new Queue<TreeNode>();
queue.Enqueue(node);
while (queue.Count > 0)
{
var current = queue.Dequeue();
if (TryParseField(current, fieldParser, out var fieldValue))
{
current.Info.Info.TryAdd(fieldName, fieldValue);
}
else
{
foreach (var child in current.Children)
{
queue.Enqueue(child);
}
}
}
}
public async Task Parse(TreeNode node)
{
DoParse(node, TryParseRawTitle, ItemFields.Key_RawTitle);
DoParse(node, TryParseSeason, ItemFields.Key_Season);
DoParse(node, TryParseGroup, ItemFields.Key_Group);
}
}