374 lines
10 KiB
C#
374 lines
10 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Text.RegularExpressions;
|
|
using System.Threading.Tasks;
|
|
using Learn.Config;
|
|
using Learn.Models;
|
|
using Learn.Utils;
|
|
|
|
namespace Learn.Parsers;
|
|
|
|
public class RawParser(Configs configs) : ItemParser
|
|
{
|
|
private RawParserConfig config => configs.Get<RawParserConfig>();
|
|
|
|
private List<string> FilterParts(List<string> parts)
|
|
{
|
|
var result = parts.ToList();
|
|
foreach (var regex in config.TokenFilterRules.Regexes)
|
|
{
|
|
result.RemoveAll(part => Regex.Match(part.Trim(), regex).Success);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
private List<string> GetParts(Item item)
|
|
{
|
|
var matches = Regex.Matches(item.Name(), config.SplitRegex).Select(match => match.Value)
|
|
.Select(match => match.Trim())
|
|
.Where(match => !string.IsNullOrEmpty(match))
|
|
.ToList();
|
|
return FilterParts(matches);
|
|
}
|
|
|
|
private bool TryNormalizeSeason(string seasonPart, out string season)
|
|
{
|
|
season = null;
|
|
if (int.TryParse(seasonPart, out var seasonInt))
|
|
{
|
|
season = seasonInt.ToString();
|
|
return true;
|
|
}
|
|
switch (seasonPart)
|
|
{
|
|
case "零":
|
|
season = "0";
|
|
return true;
|
|
case "一":
|
|
season = "1";
|
|
return true;
|
|
case "二":
|
|
season = "2";
|
|
return true;
|
|
case "三":
|
|
season = "3";
|
|
return true;
|
|
case "四":
|
|
season = "4";
|
|
return true;
|
|
case "五":
|
|
season = "5";
|
|
return true;
|
|
case "六":
|
|
season = "6";
|
|
return true;
|
|
case "七":
|
|
season = "7";
|
|
return true;
|
|
case "八":
|
|
season = "8";
|
|
return true;
|
|
case "九":
|
|
season = "9";
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private bool TryMatchSeason(string token, out string seasonPart, out string matchPart)
|
|
{
|
|
seasonPart = null;
|
|
matchPart = null;
|
|
|
|
var regexes = config.SeasonMatchRules?.Regexes;
|
|
if (regexes == null) return false;
|
|
|
|
token = token.Trim();
|
|
|
|
foreach (var regex in regexes)
|
|
{
|
|
var match = Regex.Match(token, regex);
|
|
if (!match.Success) continue;
|
|
matchPart = match.Value;
|
|
return TryNormalizeSeason(match.Groups[1].Value, out seasonPart);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private bool TryParseSeason(TreeNode node, out string season, out MatchInfo matchInfo)
|
|
{
|
|
season = null;
|
|
matchInfo = null;
|
|
|
|
var parts = GetParts(node.Info);
|
|
for (int i = 0; i < parts.Count; i++)
|
|
{
|
|
var part = parts[i];
|
|
var tokens = part.Split("-");
|
|
foreach (var token in tokens)
|
|
{
|
|
if (!TryMatchSeason(token, out season, out var content)) continue;
|
|
matchInfo = new MatchInfo
|
|
{
|
|
content = content,
|
|
partIndex = i
|
|
};
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
private bool IsFullMatch(string item, List<string> sequence)
|
|
{
|
|
if(sequence == null) return false;
|
|
for (int i = 0; i < sequence.Count; i++)
|
|
{
|
|
if (item.Equals(sequence[i], StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private bool IsPartialMatch(string item, List<string> sequence)
|
|
{
|
|
if(sequence == null) return false;
|
|
for (int i = 0; i < sequence.Count; i++)
|
|
{
|
|
if (item.Contains(sequence[i], StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private bool TryParseGroup(TreeNode node, out string group, out MatchInfo matchInfo)
|
|
{
|
|
group = null;
|
|
matchInfo = null;
|
|
|
|
var parts = GetParts(node.Info);
|
|
for (int i = 0; i < parts.Count; i++)
|
|
{
|
|
if (IsFullMatch(parts[i], config.GroupsMatchRules?.Full))
|
|
{
|
|
group = parts[i];
|
|
matchInfo = new MatchInfo
|
|
{
|
|
content = parts[i],
|
|
partIndex = i
|
|
};
|
|
return true;
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < parts.Count; i++)
|
|
{
|
|
if (IsPartialMatch(parts[i], config.GroupsMatchRules?.Partial))
|
|
{
|
|
group = parts[i];
|
|
matchInfo = new MatchInfo
|
|
{
|
|
content = parts[i],
|
|
partIndex = i
|
|
};
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private ItemFields.ItemType ParseItemType(TreeNode node)
|
|
{
|
|
// 1. 判断是否属于Extras
|
|
foreach (var extraMatchName in config.TypeMatchRules.Extra.IfDirNameIs)
|
|
{
|
|
if (node.Info.Name().Equals(extraMatchName, StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return ItemFields.ItemType.Extra;
|
|
}
|
|
}
|
|
|
|
if (node.Info.IsFolder())
|
|
{
|
|
return ItemFields.ItemType.Unknown;
|
|
}
|
|
|
|
var infoExt = Path.GetExtension(node.Info.Name());
|
|
if (string.IsNullOrEmpty(infoExt)) return ItemFields.ItemType.Extra;
|
|
foreach (var ext in config.TypeMatchRules.Extra.IfFileExtensionIs)
|
|
{
|
|
if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return ItemFields.ItemType.Extra;
|
|
}
|
|
}
|
|
|
|
// 2. 判断是不是字幕
|
|
foreach (var ext in config.TypeMatchRules.Subtitle.IfFileExtensionIs)
|
|
{
|
|
if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return ItemFields.ItemType.Subtitle;
|
|
}
|
|
}
|
|
|
|
// 3. 判断是不是剧集
|
|
foreach (var ext in config.TypeMatchRules.Episode.IfFileExtensionIs)
|
|
{
|
|
if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return ItemFields.ItemType.Episode;
|
|
}
|
|
}
|
|
|
|
// 4. 啥都不是,不知道
|
|
return ItemFields.ItemType.Unknown;
|
|
}
|
|
|
|
private bool TryParseType(TreeNode node, out string type, out MatchInfo matchInfo)
|
|
{
|
|
matchInfo = null;
|
|
type = null;
|
|
var typeEnum = ParseItemType(node);
|
|
if (typeEnum == ItemFields.ItemType.Unknown) return false;
|
|
type = typeEnum.ToString();
|
|
return true;
|
|
}
|
|
|
|
private bool TryParseSubtitleLanguage(TreeNode node, out string language, out MatchInfo matchInfo)
|
|
{
|
|
language = null;
|
|
matchInfo = null;
|
|
if (node.Info.Type() != ItemFields.ItemType.Subtitle)
|
|
{
|
|
return false;
|
|
}
|
|
var name = node.Info.Name();
|
|
var parts = name.Split(".");
|
|
if (parts.Length < 3) return false;
|
|
|
|
language = parts[^2];
|
|
return true;
|
|
}
|
|
|
|
private bool TryParseEpisode(TreeNode node, out string episode, out MatchInfo matchInfo)
|
|
{
|
|
episode = null;
|
|
matchInfo = null;
|
|
if (node.Info.Type() != ItemFields.ItemType.Episode)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
var parts = GetParts(node.Info);
|
|
for (int i = 0; i < parts.Count; i++)
|
|
{
|
|
var part = parts[i];
|
|
var tokens = part.Split("-");
|
|
foreach (var token in tokens)
|
|
{
|
|
var match = Regex.Match(token.Trim(), @"^\d{1,2}$");
|
|
if (match.Success)
|
|
{
|
|
matchInfo = new MatchInfo
|
|
{
|
|
content = content,
|
|
partIndex = i
|
|
};
|
|
}
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
//
|
|
// private bool TryParseRawTitle(TreeNode node, out string rawTitle, out MatchInfo matchInfo)
|
|
// {
|
|
//
|
|
// }
|
|
|
|
|
|
class MatchInfo
|
|
{
|
|
public string content;
|
|
public int partIndex;
|
|
}
|
|
|
|
private delegate bool FieldParser(TreeNode node, out string result, out MatchInfo matchInfo);
|
|
|
|
private bool TryParseField(TreeNode node, FieldParser fieldParser, out string result)
|
|
{
|
|
result = null;
|
|
if (node.Info == null) return false;
|
|
|
|
if (!fieldParser(node, out var fieldValue, out _)) return false;
|
|
var parsed = new List<string>();
|
|
foreach (var child in node.Children)
|
|
{
|
|
if (fieldParser(child, out var childFieldValue, out _))
|
|
{
|
|
parsed.Add(childFieldValue);
|
|
}
|
|
}
|
|
|
|
var totalCount = parsed.Count;
|
|
if (totalCount == 0)
|
|
{
|
|
result = fieldValue;
|
|
return true;
|
|
}
|
|
|
|
var maxGroupCount = parsed.GroupBy(value => value)
|
|
.Select(group => group.Count())
|
|
.Max();
|
|
|
|
if (maxGroupCount > totalCount / 2)
|
|
{
|
|
result = fieldValue;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
private void DoParse(TreeNode node, FieldParser fieldParser, string fieldName)
|
|
{
|
|
var queue = new Queue<TreeNode>();
|
|
queue.Enqueue(node);
|
|
while (queue.Count > 0)
|
|
{
|
|
var current = queue.Dequeue();
|
|
if(current.Info.Info.ContainsKey(fieldName)) continue;
|
|
if (TryParseField(current, fieldParser, out var fieldValue))
|
|
{
|
|
current.Info.Info.TryAdd(fieldName, fieldValue);
|
|
}
|
|
else
|
|
{
|
|
foreach (var child in current.Children)
|
|
{
|
|
queue.Enqueue(child);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public async Task Parse(TreeNode node)
|
|
{
|
|
DoParse(node, TryParseSeason, ItemFields.Key_Season);
|
|
DoParse(node, TryParseGroup, ItemFields.Key_Group);
|
|
DoParse(node, TryParseType, ItemFields.Key_Type);
|
|
DoParse(node, TryParseSubtitleLanguage, ItemFields.Key_SubtitleLanguage);
|
|
|
|
}
|
|
} |