1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- using System;
- using System.IO;
- using System.Linq;
- using System.Text;
- using System.Text.RegularExpressions;
- using Console = HyperCube.Utils.AdvConsole;
- namespace HyperCube
- {
- public class DocParse
- {
- public static Models.ArticleModel GetBaseProperties(string text)
- {
- //Console.WriteLine("GetBaseProperties: start");
- Models.ArticleModel articleModel = new();
- Regex regex;
- MatchCollection matches;
- ///getting article name
- regex = new Regex(@"^.*?[\.!\?](?:\s|$)");
- matches = regex.Matches(text);
- if (matches.Count > 0)
- {
- string name = string.Join(", ", from Match match in matches select match.Value);
- articleModel.Name = name.Trim();
- }
- else
- Console.WriteLine("GetBaseProperties: cant get name");
- ///getting publish date
- ///?
- /// getting authors
- regex = new Regex(@"[А-Я]\.\s?[А-Я]\.\s[А-Я][а-я]{1,20}");
- matches = regex.Matches(text);
- if (matches.Count > 0)
- {
- //foreach (Match match in matches) Console.WriteLine(match.Value);
- string authors = string.Join(", ", from Match match in matches select match.Value);
- articleModel.Authors = authors;
- }
- else
- Console.WriteLine("GetBaseProperties: authors not found");
- ///getting keywords
- regex = new Regex(@"(ключевые)\s*(слова:)\s[\w+\-+\w\,\s]*\.", RegexOptions.IgnoreCase);
- matches = regex.Matches(text);
- if (matches.Count > 0)
- {
- string keywords = string.Join(", ", from Match match in matches select match.Value);
- keywords = Regex.Replace(keywords, @"\s+", " ");
- keywords = Regex.Replace(keywords, "-", "");
- keywords = keywords.Substring(keywords.IndexOf(":") + 2);
- articleModel.Keywords = keywords;
- }
- else
- Console.WriteLine("GetBaseProperties: keywords not found");
- ///getting annotation
- ///?
- //Console.WriteLine("GetBaseProperties: end");
- return articleModel;
- }
- public static string ReadDocx()
- {
- StringBuilder pageText = new();
- return pageText.ToString();
- }
- }
- }
|