428 lines
12 KiB
C#
428 lines
12 KiB
C#
using System;
|
||
using System.Collections.Generic;
|
||
using System.IO;
|
||
using System.Linq;
|
||
using System.Text.RegularExpressions;
|
||
using System.Threading.Tasks;
|
||
using Learn.Config;
|
||
using Learn.Models;
|
||
using Learn.Utils;
|
||
|
||
namespace Learn.Parsers;
|
||
|
||
public class RawParser(Configs configs) : ItemParser
|
||
{
|
||
private RawParserConfig config => configs.Get<RawParserConfig>();
|
||
|
||
private List<string> FilterParts(List<string> parts)
|
||
{
|
||
var result = parts.ToList();
|
||
foreach (var regex in config.TokenFilterRules.Regexes)
|
||
{
|
||
result = result.Select(part =>
|
||
{
|
||
var match = Regex.Match(part.Trim(), regex);
|
||
if (match.Success)
|
||
{
|
||
return part.Replace(match.Value, "").Trim();
|
||
}
|
||
|
||
return part.Trim();
|
||
}).ToList();
|
||
}
|
||
return result.Where(part => !string.IsNullOrEmpty(part)).ToList();
|
||
}
|
||
|
||
private List<string> GetParts(Item item)
|
||
{
|
||
var matches = Regex.Matches(item.Name(), config.SplitRegex).Select(match => match.Value)
|
||
.Select(match => match.Trim())
|
||
.Where(match => !string.IsNullOrEmpty(match))
|
||
.ToList();
|
||
return FilterParts(matches);
|
||
}
|
||
|
||
private bool TryNormalizeSeason(string seasonPart, out string season)
|
||
{
|
||
season = null;
|
||
if (int.TryParse(seasonPart, out var seasonInt))
|
||
{
|
||
season = seasonInt.ToString();
|
||
return true;
|
||
}
|
||
switch (seasonPart)
|
||
{
|
||
case "零":
|
||
season = "0";
|
||
return true;
|
||
case "一":
|
||
season = "1";
|
||
return true;
|
||
case "二":
|
||
season = "2";
|
||
return true;
|
||
case "三":
|
||
season = "3";
|
||
return true;
|
||
case "四":
|
||
season = "4";
|
||
return true;
|
||
case "五":
|
||
season = "5";
|
||
return true;
|
||
case "六":
|
||
season = "6";
|
||
return true;
|
||
case "七":
|
||
season = "7";
|
||
return true;
|
||
case "八":
|
||
season = "8";
|
||
return true;
|
||
case "九":
|
||
season = "9";
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
private bool TryMatchSeason(string token, out string seasonPart, out string matchPart)
|
||
{
|
||
seasonPart = null;
|
||
matchPart = null;
|
||
|
||
var regexes = config.SeasonMatchRules?.Regexes;
|
||
if (regexes == null) return false;
|
||
|
||
token = token.Trim();
|
||
|
||
foreach (var regex in regexes)
|
||
{
|
||
var match = Regex.Match(token, regex);
|
||
if (!match.Success) continue;
|
||
matchPart = match.Value;
|
||
return TryNormalizeSeason(match.Groups[1].Value, out seasonPart);
|
||
}
|
||
return false;
|
||
}
|
||
|
||
private bool TryParseSeason(List<string> parts, Item item, out string season, out MatchInfo matchInfo)
|
||
{
|
||
season = null;
|
||
matchInfo = null;
|
||
|
||
for (int i = 0; i < parts.Count; i++)
|
||
{
|
||
var part = parts[i];
|
||
var tokens = part.Split("-");
|
||
foreach (var token in tokens)
|
||
{
|
||
if (!TryMatchSeason(token, out season, out var content)) continue;
|
||
matchInfo = new MatchInfo
|
||
{
|
||
content = content,
|
||
partIndex = i
|
||
};
|
||
return true;
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
private bool IsFullMatch(string item, List<string> sequence)
|
||
{
|
||
if(sequence == null) return false;
|
||
for (int i = 0; i < sequence.Count; i++)
|
||
{
|
||
if (item.Equals(sequence[i], StringComparison.OrdinalIgnoreCase))
|
||
{
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
private bool IsPartialMatch(string item, List<string> sequence)
|
||
{
|
||
if(sequence == null) return false;
|
||
for (int i = 0; i < sequence.Count; i++)
|
||
{
|
||
if (item.Contains(sequence[i], StringComparison.OrdinalIgnoreCase))
|
||
{
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
private bool TryParseGroup(List<string> parts, Item item, out string group, out MatchInfo matchInfo)
|
||
{
|
||
group = null;
|
||
matchInfo = null;
|
||
|
||
for (int i = 0; i < parts.Count; i++)
|
||
{
|
||
if (IsFullMatch(parts[i], config.GroupsMatchRules?.Full))
|
||
{
|
||
group = parts[i];
|
||
matchInfo = new MatchInfo
|
||
{
|
||
content = parts[i],
|
||
partIndex = i
|
||
};
|
||
return true;
|
||
}
|
||
}
|
||
|
||
for (int i = 0; i < parts.Count; i++)
|
||
{
|
||
if (IsPartialMatch(parts[i], config.GroupsMatchRules?.Partial))
|
||
{
|
||
group = parts[i];
|
||
matchInfo = new MatchInfo
|
||
{
|
||
content = parts[i],
|
||
partIndex = i
|
||
};
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
private ItemFields.ItemType ParseItemType(Item item)
|
||
{
|
||
// 1. 判断是否属于Extras
|
||
foreach (var extraMatchName in config.TypeMatchRules.Extra.IfDirNameIs)
|
||
{
|
||
if (item.Name().Equals(extraMatchName, StringComparison.OrdinalIgnoreCase))
|
||
{
|
||
return ItemFields.ItemType.Extra;
|
||
}
|
||
}
|
||
|
||
if (item.IsFolder())
|
||
{
|
||
return ItemFields.ItemType.Unknown;
|
||
}
|
||
|
||
var infoExt = Path.GetExtension(item.Name());
|
||
if (string.IsNullOrEmpty(infoExt)) return ItemFields.ItemType.Extra;
|
||
foreach (var ext in config.TypeMatchRules.Extra.IfFileExtensionIs)
|
||
{
|
||
if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase))
|
||
{
|
||
return ItemFields.ItemType.Extra;
|
||
}
|
||
}
|
||
|
||
// 2. 判断是不是字幕
|
||
foreach (var ext in config.TypeMatchRules.Subtitle.IfFileExtensionIs)
|
||
{
|
||
if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase))
|
||
{
|
||
return ItemFields.ItemType.Subtitle;
|
||
}
|
||
}
|
||
|
||
// 3. 判断是不是剧集
|
||
foreach (var ext in config.TypeMatchRules.Episode.IfFileExtensionIs)
|
||
{
|
||
if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase))
|
||
{
|
||
return ItemFields.ItemType.Episode;
|
||
}
|
||
}
|
||
|
||
// 4. 啥都不是,不知道
|
||
return ItemFields.ItemType.Unknown;
|
||
}
|
||
|
||
private bool TryParseType(List<string> parts, Item item, out string type, out MatchInfo matchInfo)
|
||
{
|
||
matchInfo = null;
|
||
type = null;
|
||
var typeEnum = ParseItemType(item);
|
||
if (typeEnum == ItemFields.ItemType.Unknown) return false;
|
||
type = typeEnum.ToString();
|
||
return true;
|
||
}
|
||
|
||
private bool TryParseSubtitleLanguage(List<string> parts, Item item, out string language, out MatchInfo matchInfo)
|
||
{
|
||
language = null;
|
||
matchInfo = null;
|
||
if (item.Type() != ItemFields.ItemType.Subtitle)
|
||
{
|
||
return false;
|
||
}
|
||
var name = item.Name();
|
||
var tokens = name.Split(".");
|
||
if (tokens.Length < 3) return false;
|
||
|
||
language = tokens[^2];
|
||
return true;
|
||
}
|
||
|
||
private bool TryParseEpisode(List<string> parts, Item item, out string episode, out MatchInfo matchInfo)
|
||
{
|
||
episode = null;
|
||
matchInfo = null;
|
||
if (item.Type() == ItemFields.ItemType.Extra ||
|
||
item.Type() == ItemFields.ItemType.Unknown)
|
||
{
|
||
return false;
|
||
}
|
||
|
||
for (int i = 0; i < parts.Count; i++)
|
||
{
|
||
var part = parts[i];
|
||
var tokens = part.Split("-");
|
||
foreach (var token in tokens)
|
||
{
|
||
foreach (var regex in config.EpisodeRules.Regexes)
|
||
{
|
||
var match = Regex.Match(token.Trim(), regex);
|
||
if (match.Success)
|
||
{
|
||
episode = int.Parse(match.Groups[1].Value).ToString();
|
||
matchInfo = new MatchInfo
|
||
{
|
||
content = match.Value,
|
||
partIndex = i
|
||
};
|
||
return true;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
|
||
private bool TryParseRawTitle(List<string> parts, Item item, out string rawTitle, out MatchInfo matchInfo)
|
||
{
|
||
rawTitle = null;
|
||
matchInfo = null;
|
||
|
||
if (item.Type() == ItemFields.ItemType.Extra)
|
||
{
|
||
return false;
|
||
}
|
||
|
||
while(TryParseGroup(parts, item, out _, out var groupMatch))
|
||
{
|
||
parts.RemoveAt(groupMatch.partIndex);
|
||
}
|
||
|
||
var tokens = new List<string>();
|
||
foreach (var part in parts)
|
||
{
|
||
tokens.AddRange(part.Split("-"));
|
||
}
|
||
|
||
while(TryParseSeason(tokens, item, out _, out var seasonMatch))
|
||
{
|
||
tokens[seasonMatch.partIndex] = tokens[seasonMatch.partIndex].Replace(seasonMatch.content, "");
|
||
}
|
||
|
||
tokens = FilterParts(tokens);
|
||
|
||
if(tokens.Count == 0) return false;
|
||
|
||
rawTitle = tokens.First().Trim();
|
||
|
||
// 尝试匹配年份
|
||
foreach (var token in tokens)
|
||
{
|
||
var matchYear = Regex.Match(token, @"[((](\d{4})[))]");
|
||
if (matchYear.Success)
|
||
{
|
||
rawTitle = rawTitle.Replace(matchYear.Value, "").Trim();
|
||
item.Info.TryAdd(ItemFields.Key_Year, matchYear.Groups[1].Value);
|
||
break;
|
||
}
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
|
||
class MatchInfo
|
||
{
|
||
public string content;
|
||
public int partIndex;
|
||
}
|
||
|
||
private delegate bool FieldParser(List<string> parts, Item item, out string result, out MatchInfo matchInfo); // 最好将item换成parts
|
||
|
||
private bool TryParseField(TreeNode node, FieldParser fieldParser, out string result)
|
||
{
|
||
result = null;
|
||
var item = node.Info;
|
||
if (item == null) return false;
|
||
var parts = GetParts(item);
|
||
if (!fieldParser(parts, item, out var fieldValue, out _)) return false;
|
||
var parsed = new List<string>();
|
||
foreach (var child in node.Children)
|
||
{
|
||
var childParts = GetParts(child.Info);
|
||
if (fieldParser(childParts, child.Info, out var childFieldValue, out _))
|
||
{
|
||
parsed.Add(childFieldValue);
|
||
}
|
||
}
|
||
|
||
var totalCount = parsed.Count;
|
||
if (totalCount == 0)
|
||
{
|
||
result = fieldValue;
|
||
return true;
|
||
}
|
||
|
||
var maxGroupCount = parsed.GroupBy(value => value)
|
||
.Select(group => group.Count())
|
||
.Max();
|
||
|
||
if (maxGroupCount > totalCount / 2)
|
||
{
|
||
result = fieldValue;
|
||
return true;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
private void DoParse(TreeNode node, FieldParser fieldParser, string fieldName)
|
||
{
|
||
var queue = new Queue<TreeNode>();
|
||
queue.Enqueue(node);
|
||
while (queue.Count > 0)
|
||
{
|
||
var current = queue.Dequeue();
|
||
if (TryParseField(current, fieldParser, out var fieldValue))
|
||
{
|
||
current.Info.Info.TryAdd(fieldName, fieldValue);
|
||
}
|
||
else
|
||
{
|
||
foreach (var child in current.Children)
|
||
{
|
||
queue.Enqueue(child);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
public async Task Parse(TreeNode node)
|
||
{
|
||
DoParse(node, TryParseSeason, ItemFields.Key_Season);
|
||
DoParse(node, TryParseGroup, ItemFields.Key_Group);
|
||
DoParse(node, TryParseType, ItemFields.Key_Type);
|
||
DoParse(node, TryParseSubtitleLanguage, ItemFields.Key_SubtitleLanguage);
|
||
DoParse(node, TryParseEpisode, ItemFields.Key_Episode);
|
||
DoParse(node, TryParseRawTitle, ItemFields.Key_RawTitle);
|
||
}
|
||
} |