DocParse.cs 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. using System;
  2. using System.IO;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Text.RegularExpressions;
  6. using Console = HyperCube.Utils.AdvConsole;
  7. namespace HyperCube
  8. {
  9. public class DocParse
  10. {
  11. public static Models.ArticleModel GetBaseProperties(string text)
  12. {
  13. //Console.WriteLine("GetBaseProperties: start");
  14. Models.ArticleModel articleModel = new();
  15. Regex regex;
  16. MatchCollection matches;
  17. ///getting article name
  18. regex = new Regex(@"^.*?[\.!\?](?:\s|$)");
  19. matches = regex.Matches(text);
  20. if (matches.Count > 0)
  21. {
  22. string name = string.Join(", ", from Match match in matches select match.Value);
  23. articleModel.Name = name.Trim();
  24. }
  25. else
  26. Console.WriteLine("GetBaseProperties: cant get name");
  27. ///getting publish date
  28. ///?
  29. /// getting authors
  30. regex = new Regex(@"[А-Я]\.\s?[А-Я]\.\s[А-Я][а-я]{1,20}");
  31. matches = regex.Matches(text);
  32. if (matches.Count > 0)
  33. {
  34. //foreach (Match match in matches) Console.WriteLine(match.Value);
  35. string authors = string.Join(", ", from Match match in matches select match.Value);
  36. articleModel.Authors = authors;
  37. }
  38. else
  39. Console.WriteLine("GetBaseProperties: authors not found");
  40. ///getting keywords
  41. regex = new Regex(@"(ключевые)\s*(слова:)\s[\w+\-+\w\,\s]*\.", RegexOptions.IgnoreCase);
  42. matches = regex.Matches(text);
  43. if (matches.Count > 0)
  44. {
  45. string keywords = string.Join(", ", from Match match in matches select match.Value);
  46. keywords = Regex.Replace(keywords, @"\s+", " ");
  47. keywords = Regex.Replace(keywords, "-", "");
  48. keywords = keywords.Substring(keywords.IndexOf(":") + 2);
  49. articleModel.Keywords = keywords;
  50. }
  51. else
  52. Console.WriteLine("GetBaseProperties: keywords not found");
  53. ///getting annotation
  54. ///?
  55. //Console.WriteLine("GetBaseProperties: end");
  56. return articleModel;
  57. }
  58. public static string ReadDocx()
  59. {
  60. StringBuilder pageText = new();
  61. return pageText.ToString();
  62. }
  63. }
  64. }