using System; using System.IO; using System.Linq; using System.Text; using System.Text.RegularExpressions; using Console = HyperCube.Utils.AdvConsole; namespace HyperCube { public class DocParse { public static Models.ArticleModel GetBaseProperties(string text) { //Console.WriteLine("GetBaseProperties: start"); Models.ArticleModel articleModel = new(); Regex regex; MatchCollection matches; ///getting article name regex = new Regex(@"^.*?[\.!\?](?:\s|$)"); matches = regex.Matches(text); if (matches.Count > 0) { string name = string.Join(", ", from Match match in matches select match.Value); articleModel.Name = name.Trim(); } else Console.WriteLine("GetBaseProperties: cant get name"); ///getting publish date ///? /// getting authors regex = new Regex(@"[А-Я]\.\s?[А-Я]\.\s[А-Я][а-я]{1,20}"); matches = regex.Matches(text); if (matches.Count > 0) { //foreach (Match match in matches) Console.WriteLine(match.Value); string authors = string.Join(", ", from Match match in matches select match.Value); articleModel.Authors = authors; } else Console.WriteLine("GetBaseProperties: authors not found"); ///getting keywords regex = new Regex(@"(ключевые)\s*(слова:)\s[\w+\-+\w\,\s]*\.", RegexOptions.IgnoreCase); matches = regex.Matches(text); if (matches.Count > 0) { string keywords = string.Join(", ", from Match match in matches select match.Value); keywords = Regex.Replace(keywords, @"\s+", " "); keywords = Regex.Replace(keywords, "-", ""); keywords = keywords.Substring(keywords.IndexOf(":") + 2); articleModel.Keywords = keywords; } else Console.WriteLine("GetBaseProperties: keywords not found"); ///getting annotation ///? //Console.WriteLine("GetBaseProperties: end"); return articleModel; } public static string ReadDocx() { StringBuilder pageText = new(); return pageText.ToString(); } } }