using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text.RegularExpressions; using System.Threading.Tasks; using Learn.Config; using Learn.Models; using Learn.Utils; namespace Learn.Parsers; public class RawParser(Configs configs) : ItemParser { private RawParserConfig config => configs.Get(); private List FilterParts(List parts) { var result = parts.ToList(); foreach (var regex in config.TokenFilterRules.Regexes) { result = result.Select(part => { var match = Regex.Match(part.Trim(), regex); if (match.Success) { return part.Replace(match.Value, "").Trim(); } return part.Trim(); }).ToList(); } return result.Where(part => !string.IsNullOrEmpty(part)).ToList(); } private List GetParts(Item item) { var matches = Regex.Matches(item.Name(), config.SplitRegex).Select(match => match.Value) .Select(match => match.Trim()) .Where(match => !string.IsNullOrEmpty(match)) .ToList(); return FilterParts(matches); } private bool TryNormalizeSeason(string seasonPart, out string season) { season = null; if (int.TryParse(seasonPart, out var seasonInt)) { season = seasonInt.ToString(); return true; } switch (seasonPart) { case "零": season = "0"; return true; case "一": season = "1"; return true; case "二": season = "2"; return true; case "三": season = "3"; return true; case "四": season = "4"; return true; case "五": season = "5"; return true; case "六": season = "6"; return true; case "七": season = "7"; return true; case "八": season = "8"; return true; case "九": season = "9"; return true; } return false; } private bool TryMatchSeason(string token, out string seasonPart, out string matchPart) { seasonPart = null; matchPart = null; var regexes = config.SeasonMatchRules?.Regexes; if (regexes == null) return false; token = token.Trim(); foreach (var regex in regexes) { var match = Regex.Match(token, regex); if (!match.Success) continue; matchPart = match.Value; return TryNormalizeSeason(match.Groups[1].Value, out seasonPart); } return false; } private bool TryParseSeason(List parts, Item item, out string season, out MatchInfo matchInfo) { season = null; matchInfo = null; for (int i = 0; i < parts.Count; i++) { var part = parts[i]; var tokens = part.Split("-"); foreach (var token in tokens) { if (!TryMatchSeason(token, out season, out var content)) continue; matchInfo = new MatchInfo { content = content, partIndex = i }; return true; } } return false; } private bool IsFullMatch(string item, List sequence) { if(sequence == null) return false; for (int i = 0; i < sequence.Count; i++) { if (item.Equals(sequence[i], StringComparison.OrdinalIgnoreCase)) { return true; } } return false; } private bool IsPartialMatch(string item, List sequence) { if(sequence == null) return false; for (int i = 0; i < sequence.Count; i++) { if (item.Contains(sequence[i], StringComparison.OrdinalIgnoreCase)) { return true; } } return false; } private bool TryParseGroup(List parts, Item item, out string group, out MatchInfo matchInfo) { group = null; matchInfo = null; for (int i = 0; i < parts.Count; i++) { if (IsFullMatch(parts[i], config.GroupsMatchRules?.Full)) { group = parts[i]; matchInfo = new MatchInfo { content = parts[i], partIndex = i }; return true; } } for (int i = 0; i < parts.Count; i++) { if (IsPartialMatch(parts[i], config.GroupsMatchRules?.Partial)) { group = parts[i]; matchInfo = new MatchInfo { content = parts[i], partIndex = i }; return true; } } return false; } private ItemFields.ItemType ParseItemType(Item item) { // 1. 判断是否属于Extras foreach (var extraMatchName in config.TypeMatchRules.Extra.IfDirNameIs) { if (item.Name().Equals(extraMatchName, StringComparison.OrdinalIgnoreCase)) { return ItemFields.ItemType.Extra; } } if (item.IsFolder()) { return ItemFields.ItemType.Unknown; } var infoExt = Path.GetExtension(item.Name()); if (string.IsNullOrEmpty(infoExt)) return ItemFields.ItemType.Extra; foreach (var ext in config.TypeMatchRules.Extra.IfFileExtensionIs) { if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase)) { return ItemFields.ItemType.Extra; } } // 2. 判断是不是字幕 foreach (var ext in config.TypeMatchRules.Subtitle.IfFileExtensionIs) { if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase)) { return ItemFields.ItemType.Subtitle; } } // 3. 判断是不是剧集 foreach (var ext in config.TypeMatchRules.Episode.IfFileExtensionIs) { if (infoExt.Equals(ext, StringComparison.OrdinalIgnoreCase)) { return ItemFields.ItemType.Episode; } } // 4. 啥都不是,不知道 return ItemFields.ItemType.Unknown; } private bool TryParseType(List parts, Item item, out string type, out MatchInfo matchInfo) { matchInfo = null; type = null; var typeEnum = ParseItemType(item); if (typeEnum == ItemFields.ItemType.Unknown) return false; type = typeEnum.ToString(); return true; } private bool TryParseSubtitleLanguage(List parts, Item item, out string language, out MatchInfo matchInfo) { language = null; matchInfo = null; if (item.Type() != ItemFields.ItemType.Subtitle) { return false; } var name = item.Name(); var tokens = name.Split("."); if (tokens.Length < 3) return false; language = tokens[^2]; return true; } private bool TryParseEpisode(List parts, Item item, out string episode, out MatchInfo matchInfo) { episode = null; matchInfo = null; if (item.Type() == ItemFields.ItemType.Extra || item.Type() == ItemFields.ItemType.Unknown) { return false; } for (int i = 0; i < parts.Count; i++) { var part = parts[i]; var tokens = part.Split("-"); foreach (var token in tokens) { foreach (var regex in config.EpisodeRules.Regexes) { var match = Regex.Match(token.Trim(), regex); if (match.Success) { episode = int.Parse(match.Groups[1].Value).ToString(); matchInfo = new MatchInfo { content = match.Value, partIndex = i }; return true; } } } } return false; } private bool TryParseRawTitle(List parts, Item item, out string rawTitle, out MatchInfo matchInfo) { rawTitle = null; matchInfo = null; if (item.Type() == ItemFields.ItemType.Extra) { return false; } while(TryParseGroup(parts, item, out _, out var groupMatch)) { parts.RemoveAt(groupMatch.partIndex); } var tokens = new List(); foreach (var part in parts) { tokens.AddRange(part.Split("-")); } while(TryParseSeason(tokens, item, out _, out var seasonMatch)) { tokens[seasonMatch.partIndex] = tokens[seasonMatch.partIndex].Replace(seasonMatch.content, ""); } tokens = FilterParts(tokens); if(tokens.Count == 0) return false; rawTitle = tokens.First().Trim(); // 尝试匹配年份 foreach (var token in tokens) { var matchYear = Regex.Match(token, @"[((](\d{4})[))]"); if (matchYear.Success) { rawTitle = rawTitle.Replace(matchYear.Value, "").Trim(); item.Info.TryAdd(ItemFields.Key_Year, matchYear.Groups[1].Value); break; } } return true; } class MatchInfo { public string content; public int partIndex; } private delegate bool FieldParser(List parts, Item item, out string result, out MatchInfo matchInfo); // 最好将item换成parts private bool TryParseField(TreeNode node, FieldParser fieldParser, out string result) { result = null; var item = node.Info; if (item == null) return false; var parts = GetParts(item); if (!fieldParser(parts, item, out var fieldValue, out _)) return false; var parsed = new List(); foreach (var child in node.Children) { var childParts = GetParts(child.Info); if (fieldParser(childParts, child.Info, out var childFieldValue, out _)) { parsed.Add(childFieldValue); } } var totalCount = parsed.Count; if (totalCount == 0) { result = fieldValue; return true; } var maxGroupCount = parsed.GroupBy(value => value) .Select(group => group.Count()) .Max(); if (maxGroupCount > totalCount / 2) { result = fieldValue; return true; } return false; } private void DoParse(TreeNode node, FieldParser fieldParser, string fieldName) { var queue = new Queue(); queue.Enqueue(node); while (queue.Count > 0) { var current = queue.Dequeue(); if (TryParseField(current, fieldParser, out var fieldValue)) { current.Info.Info.TryAdd(fieldName, fieldValue); } else { foreach (var child in current.Children) { queue.Enqueue(child); } } } } public async Task Parse(TreeNode node) { DoParse(node, TryParseSeason, ItemFields.Key_Season); DoParse(node, TryParseGroup, ItemFields.Key_Group); DoParse(node, TryParseType, ItemFields.Key_Type); DoParse(node, TryParseSubtitleLanguage, ItemFields.Key_SubtitleLanguage); DoParse(node, TryParseEpisode, ItemFields.Key_Episode); DoParse(node, TryParseRawTitle, ItemFields.Key_RawTitle); } }