146 lines
3.8 KiB
C#
146 lines
3.8 KiB
C#
using System.Collections.Concurrent;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
using Newtonsoft.Json;
|
|
|
|
namespace ConsoleApp1;
|
|
|
|
public class EpisodeInfo
|
|
{
|
|
public string path;
|
|
public string name;
|
|
public string session;
|
|
public string episode;
|
|
public string group;
|
|
public string type; // others, episode, subtitle
|
|
public string language;
|
|
}
|
|
|
|
public class EpisodeParseResult
|
|
{
|
|
public bool success;
|
|
public string originalQuestion;
|
|
public EpisodeInfo parseResult;
|
|
}
|
|
|
|
public class EpisodeParser
|
|
{
|
|
// todo: 添加解析年份
|
|
private bool _running = false;
|
|
|
|
private const string PromptPath = "Prompt.txt";
|
|
|
|
private readonly string _prompt;
|
|
private readonly OllamaHelper _ollama;
|
|
|
|
private ConcurrentQueue<string> _questions;
|
|
private ConcurrentQueue<EpisodeParseResult> _results;
|
|
|
|
public bool Running => _running;
|
|
public int TotalQuestions => _questions.Count + _results.Count;
|
|
public int CompletedQuestions => _results.Count;
|
|
|
|
public int RestQuestions => _questions.Count;
|
|
|
|
public bool TryGetResult(out EpisodeParseResult result)
|
|
{
|
|
return _results.TryDequeue(out result);
|
|
}
|
|
|
|
public EpisodeParser()
|
|
{
|
|
_prompt = File.ReadAllText(PromptPath);
|
|
_ollama = new OllamaHelper();
|
|
_questions = new ConcurrentQueue<string>();
|
|
_results = new ConcurrentQueue<EpisodeParseResult>();
|
|
}
|
|
|
|
public void Append(string question)
|
|
{
|
|
_questions.Enqueue(question);
|
|
}
|
|
|
|
public void Start()
|
|
{
|
|
if (_running) return ;
|
|
_running = true;
|
|
DoParse();
|
|
}
|
|
|
|
private string Preprocess(string respoonds)
|
|
{
|
|
return respoonds.Replace("```json\n", "").Replace("```", "");
|
|
}
|
|
|
|
private static string RemoveNonDigits(string s)
|
|
{
|
|
return Regex.Replace(s, @"[^\d\.]*", "");
|
|
}
|
|
|
|
private static string RemoveFrontZeros(string s)
|
|
{
|
|
var result = new StringBuilder();
|
|
bool front = true;
|
|
foreach (var c in s)
|
|
{
|
|
if (front && c == '0') continue;
|
|
front = false;
|
|
result.Append(c);
|
|
}
|
|
return result.ToString();
|
|
}
|
|
|
|
private static string ProcessSession(string session)
|
|
{
|
|
session = RemoveFrontZeros(RemoveNonDigits(session));
|
|
if (session.Length > 2) session = "";
|
|
return session == "" ? "1" : session;
|
|
}
|
|
|
|
private static string ProcessEpisode(string episode)
|
|
{
|
|
episode = RemoveFrontZeros(RemoveNonDigits(episode));
|
|
return episode == "" ? "1" : episode;
|
|
}
|
|
|
|
private void FinalProcess(EpisodeInfo info)
|
|
{
|
|
if (info.type == "others")
|
|
{
|
|
info.episode = "";
|
|
info.session = ProcessSession(info.session);
|
|
}
|
|
else
|
|
{
|
|
info.episode = ProcessEpisode(info.episode);
|
|
info.session = ProcessSession(info.session);
|
|
}
|
|
}
|
|
|
|
private async void DoParse()
|
|
{
|
|
while (_questions.TryDequeue(out string question))
|
|
{
|
|
var result = new EpisodeParseResult();
|
|
result.originalQuestion = question;
|
|
var responds = await _ollama.Ask(_prompt + $"\"{question}\"");
|
|
try
|
|
{
|
|
responds = Preprocess(responds);
|
|
result.parseResult = JsonConvert.DeserializeObject<EpisodeInfo>(responds);
|
|
result.parseResult.path = question;
|
|
result.success = result.parseResult != null;
|
|
if (result.success)
|
|
{
|
|
FinalProcess(result.parseResult);
|
|
}
|
|
}
|
|
catch (Exception _)
|
|
{
|
|
result.success = false;
|
|
}
|
|
_results.Enqueue(result);
|
|
}
|
|
_running = false;
|
|
}
|
|
} |