185 lines
5.1 KiB
C#
185 lines
5.1 KiB
C#
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text.RegularExpressions;
|
|
using System.Threading.Tasks;
|
|
using Learn.Models;
|
|
|
|
namespace Learn.Parsers;
|
|
|
|
public class RawParser : ItemParser
|
|
{
|
|
private string ParseSeasonFromCN(string season)
|
|
{
|
|
if (int.TryParse(season, out _)) return season;
|
|
|
|
switch (season)
|
|
{
|
|
case "零": return "0";
|
|
case "一": return "1";
|
|
case "二": return "2";
|
|
case "三": return "3";
|
|
case "四": return "4";
|
|
case "五": return "5";
|
|
case "六": return "6";
|
|
case "七": return "7";
|
|
case "八": return "8";
|
|
case "九": return "9";
|
|
}
|
|
return season;
|
|
}
|
|
|
|
|
|
private (string, string) SplitTitleAndSeason(string rawTitle)
|
|
{
|
|
var match1 = Regex.Match(rawTitle, @"第(.+)季");
|
|
if (match1.Success)
|
|
{
|
|
var seasonStr = match1.Groups[0].Value.Trim();
|
|
var season = ParseSeasonFromCN(match1.Groups[1].Value.Trim());
|
|
var title = rawTitle.Replace(seasonStr, "").Trim();
|
|
return (title, season);
|
|
}
|
|
|
|
var match2 = Regex.Match(rawTitle, @"[Ss]eason *(\d+)");
|
|
if (match2.Success)
|
|
{
|
|
var seasonStr = match1.Groups[0].Value.Trim();
|
|
var season = int.Parse(match1.Groups[1].Value.Trim()).ToString();
|
|
var title = rawTitle.Replace(seasonStr, "").Trim();
|
|
return (title, season);
|
|
}
|
|
|
|
return (rawTitle, null);
|
|
}
|
|
|
|
private void SimplifyMatches(List<string> matches)
|
|
{
|
|
matches.RemoveAll(match => string.IsNullOrEmpty(match.Trim()));
|
|
matches.RemoveAll(match => Regex.Match(match.Trim(), @"\d+[Pp]$").Success);
|
|
}
|
|
|
|
private List<string> GetParts(string name)
|
|
{
|
|
var matches = Regex.Matches(name, @"[^\[\]_【】]+").Select(match => match.Value).ToList();
|
|
SimplifyMatches(matches);
|
|
return matches;
|
|
}
|
|
|
|
private bool TryParseRawTitle(Item item, out string rawTitle)
|
|
{
|
|
rawTitle = null;
|
|
var name = item.Name();
|
|
var matches = GetParts(name);
|
|
|
|
if (matches.Count == 0) return false;
|
|
|
|
if (matches.Count == 1)
|
|
{
|
|
rawTitle = matches[0];
|
|
}
|
|
else
|
|
{
|
|
rawTitle = matches[1];
|
|
}
|
|
|
|
(rawTitle, _) = SplitTitleAndSeason(rawTitle);
|
|
return true;
|
|
}
|
|
|
|
private bool TryParseSeason(Item item, out string season)
|
|
{
|
|
season = null;
|
|
var name = item.Name();
|
|
var matches = GetParts(name);
|
|
|
|
if (matches.Count == 0) return false;
|
|
|
|
if (matches.Count == 1)
|
|
{
|
|
(_, season) = SplitTitleAndSeason(matches[0]);
|
|
if (!string.IsNullOrEmpty(season)) return true;
|
|
}
|
|
else
|
|
{
|
|
(_, season) = SplitTitleAndSeason(matches[1]);
|
|
if (!string.IsNullOrEmpty(season)) return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
private bool TryParseGroup(Item item, out string group)
|
|
{
|
|
group = null;
|
|
var name = item.Name();
|
|
var matches = GetParts(name);
|
|
|
|
if (matches.Count <= 1) return false;
|
|
group = matches[0];
|
|
return true;
|
|
}
|
|
|
|
private delegate bool FieldParser(Item item, out string result);
|
|
|
|
private bool TryParseField(TreeNode node, FieldParser fieldParser, out string result)
|
|
{
|
|
result = null;
|
|
if (node.Info == null) return false;
|
|
if (!fieldParser(node.Info, out var fieldValue)) return false;
|
|
var parsed = new List<string>();
|
|
foreach (var child in node.Children)
|
|
{
|
|
if (fieldParser(child.Info, out var childFieldValue))
|
|
{
|
|
parsed.Add(childFieldValue);
|
|
}
|
|
}
|
|
|
|
var totalCount = parsed.Count;
|
|
if (totalCount == 0)
|
|
{
|
|
result = fieldValue;
|
|
return true;
|
|
}
|
|
|
|
var maxGroupCount = parsed.GroupBy(value => value)
|
|
.Select(group => group.Count())
|
|
.Max();
|
|
|
|
if (maxGroupCount > totalCount / 2)
|
|
{
|
|
result = fieldValue;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
private void DoParse(TreeNode node, FieldParser fieldParser, string fieldName)
|
|
{
|
|
var queue = new Queue<TreeNode>();
|
|
queue.Enqueue(node);
|
|
while (queue.Count > 0)
|
|
{
|
|
var current = queue.Dequeue();
|
|
if (TryParseField(current, fieldParser, out var fieldValue))
|
|
{
|
|
current.Info.Info.TryAdd(fieldName, fieldValue);
|
|
}
|
|
else
|
|
{
|
|
foreach (var child in current.Children)
|
|
{
|
|
queue.Enqueue(child);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public async Task Parse(TreeNode node)
|
|
{
|
|
DoParse(node, TryParseRawTitle, ItemFields.Key_RawTitle);
|
|
DoParse(node, TryParseSeason, ItemFields.Key_Season);
|
|
DoParse(node, TryParseGroup, ItemFields.Key_Group);
|
|
}
|
|
} |