1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- using System;
- using System.IO;
- using System.Linq;
- using System.Text;
- using System.Text.RegularExpressions;
- using Console = HyperCube.Utils.AdvConsole;
- namespace HyperCube
- {
- public class DocParse
- {
- public static Models.ArticleModel GetBaseProperties(string text)
- {
-
- Models.ArticleModel articleModel = new();
- Regex regex;
- MatchCollection matches;
-
- regex = new Regex(@"^.*?[\.!\?](?:\s|$)");
- matches = regex.Matches(text);
- if (matches.Count > 0)
- {
- string name = string.Join(", ", from Match match in matches select match.Value);
- articleModel.Name = name.Trim();
- }
- else
- Console.WriteLine("GetBaseProperties: cant get name");
-
-
-
- regex = new Regex(@"[А-Я]\.\s?[А-Я]\.\s[А-Я][а-я]{1,20}");
- matches = regex.Matches(text);
- if (matches.Count > 0)
- {
-
- string authors = string.Join(", ", from Match match in matches select match.Value);
- articleModel.Authors = authors;
- }
- else
- Console.WriteLine("GetBaseProperties: authors not found");
-
- regex = new Regex(@"(ключевые)\s*(слова:)\s[\w+\-+\w\,\s]*\.", RegexOptions.IgnoreCase);
- matches = regex.Matches(text);
- if (matches.Count > 0)
- {
- string keywords = string.Join(", ", from Match match in matches select match.Value);
- keywords = Regex.Replace(keywords, @"\s+", " ");
- keywords = Regex.Replace(keywords, "-", "");
- keywords = keywords.Substring(keywords.IndexOf(":") + 2);
- articleModel.Keywords = keywords;
- }
- else
- Console.WriteLine("GetBaseProperties: keywords not found");
-
-
-
- return articleModel;
- }
- public static string ReadDocx()
- {
- StringBuilder pageText = new();
- return pageText.ToString();
- }
- }
- }
|