ganahrhr 3 роки тому
батько
коміт
e2f67f55b1
5 змінених файлів з 142 додано та 51 видалено
  1. 7 17
      DocParse.cs
  2. 0 1
      HyperCube.csproj
  3. 130 31
      Pages/Desktop.razor.cs
  4. 2 1
      Pages/DocEdit.razor.cs
  5. 3 1
      Pages/_Host.cshtml

+ 7 - 17
DocParse.cs

@@ -3,28 +3,20 @@ using System.IO;
 using System.Linq;
 using System.Text;
 using System.Text.RegularExpressions;
-using BitMiracle.Docotic.Pdf;
 using Console = HyperCube.Utils.AdvConsole;
 
 namespace HyperCube
 {
     public class DocParse
     {
-        public static Models.ArticleModel ReadPDF(MemoryStream ms)
+        public static Models.ArticleModel GetBaseProperties(string text)
         {
-            //Console.WriteLine("ReadPDF start");
+            //Console.WriteLine("GetBaseProperties: start");
 
             Models.ArticleModel articleModel = new();
 
-            string text = "";
             Regex regex;
-            MatchCollection matches;
-
-            ms.Position = 0;
-            //Console.WriteLine($"memorystream length: {ms.Length}, canread: {ms.CanRead}, canseek:{ms.CanSeek}");
-
-            PdfDocument pdf = new(ms);
-            text = pdf.GetText(); //GetTextWithFormatting()            
+            MatchCollection matches;       
 
             ///getting article name
             regex = new Regex(@"^.*?[\.!\?](?:\s|$)");
@@ -35,7 +27,7 @@ namespace HyperCube
                 articleModel.Name = name.Trim();
             }
             else
-                Console.WriteLine("cant get name");
+                Console.WriteLine("GetBaseProperties: cant get name");
 
             ///getting publish date
             ///?
@@ -51,7 +43,7 @@ namespace HyperCube
                 articleModel.Authors = authors;
             }
             else
-                Console.WriteLine("authors not found");
+                Console.WriteLine("GetBaseProperties: authors not found");
 
             ///getting keywords
             regex = new Regex(@"(ключевые)\s*(слова:)\s[\w+\-+\w\,\s]*\.", RegexOptions.IgnoreCase);
@@ -66,14 +58,12 @@ namespace HyperCube
                 articleModel.Keywords = keywords;
             }
             else
-                Console.WriteLine("keywords not found");
+                Console.WriteLine("GetBaseProperties: keywords not found");
 
             ///getting annotation
             ///?
 
-            articleModel.Text = text;
-
-            //Console.WriteLine("ReadPDF end");
+            //Console.WriteLine("GetBaseProperties: end");
             return articleModel;
         }
 

+ 0 - 1
HyperCube.csproj

@@ -7,7 +7,6 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="BitMiracle.Docotic.Pdf" Version="8.2.12561" />
     <PackageReference Include="MailKit" Version="2.15.0" />
     <PackageReference Include="Microsoft.AspNetCore.Diagnostics.EntityFrameworkCore" Version="5.0.11" />
     <PackageReference Include="Microsoft.AspNetCore.Identity.EntityFrameworkCore" Version="5.0.11" />

+ 130 - 31
Pages/Desktop.razor.cs

@@ -23,7 +23,7 @@ namespace HyperCube.Pages
         [Inject]
         AppData _appData { get; set; }
 
-        //const string FOLDER_NAME = "articles_storage";
+        const string STORAGE_FOLDER_NAME = "articles_storage";
         const long MAX_FILE_SIZE = 5120000; //bytes
 
         const string ACTIVE_BUTTON_CLASS = "btn_white tab-button active";
@@ -40,7 +40,7 @@ namespace HyperCube.Pages
 
         int _counter = 1;
 
-        string _event = "";
+        //string _event = "";
         string _status;
         //string _header;
         //string _storageFolderPath;
@@ -51,7 +51,13 @@ namespace HyperCube.Pages
         ArticleModel _articleClone = new();
         ArticleModel _article = new();
 
-        protected override async Task OnInitializedAsync() => await AppData.LoadArticles();
+        UnitextDocument _document;
+
+        protected override async Task OnInitializedAsync()
+        {
+            ///tmp
+            await AppData.LoadArticles();
+        }
         protected override void OnAfterRender(bool firstRender) => _counter = 1;
 
         void SwitchDesktopTab(int tabIndex)
@@ -110,11 +116,6 @@ namespace HyperCube.Pages
 
                 if (count < 1)
                 {
-                    _articleClone = DocParse.ReadPDF(_memoryStream);
-                    _articleClone.Filename = file.Name;
-                    _articleClone.HashSum = hash;
-                    _article = (ArticleModel)_articleClone.Clone();
-
                     ReportModel report = _appData.Report;
                     report.FileName = file.Name;
                     report.FileSize = _memoryStream.Length.ToString();
@@ -122,11 +123,11 @@ namespace HyperCube.Pages
                     _memoryStream.Position = 0;
                     byte[] content = _memoryStream.ToArray();
 
-                    UnitextDocument doc = UnitextService.CreateDocument(null, content, null);
-                    if (doc.ErrorMessage != null)
+                    _document = UnitextService.CreateDocument(null, content, null);
+                    if (_document.ErrorMessage != null)
                     {
                         // скорее всего, этот формат не поддерживается на данный момент
-                        Console.WriteLine($"error, sorry: {doc.ErrorMessage}");
+                        Console.WriteLine($"error, sorry: {_document.ErrorMessage}");
 
                         _memoryStream.Close();
                         stream.Close();
@@ -137,12 +138,12 @@ namespace HyperCube.Pages
                     }
 
                     // восстанавливаем имя исходного файла, извлечённого из ресурсов
-                    doc.SourceFileName = file.Name;
+                    _document.SourceFileName = file.Name;
                     for (int i = file.Name.Length - 7; i > 0; i--)
                     {
                         if (file.Name[i] == '.')
                         {
-                            doc.SourceFileName = file.Name.Substring(i + 1);
+                            _document.SourceFileName = file.Name.Substring(i + 1);
                             break;
                         }
                     }
@@ -164,20 +165,14 @@ namespace HyperCube.Pages
                     //Console.WriteLine("XML write done");
 
                     // получаем плоский текст
-                    string plainText = doc.GetPlaintextString(null);
+                    string plainText = _document.GetPlaintextString(null);
                     if (plainText == null)
-                        plainText = "Текст не выделен";
-                    //File.WriteAllBytes(doc.SourceFileName + ".txt", Encoding.UTF8.GetBytes(plainText));
-
-                    // получаем html
-                    GetHtmlParam htmlParams = new();
-                    htmlParams.OutHtmlAndBodyTags = true;
-                    string html = doc.GetHtmlString(htmlParams);
-                    string folderpath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "wwwroot", "temphmls");
-                    Directory.CreateDirectory(folderpath);
-                    string fullpath = Path.Combine(folderpath, doc.SourceFileName + ".html");
-                    Console.WriteLine($"fullpath: {fullpath}");
-                    File.WriteAllBytes(fullpath, Encoding.UTF8.GetBytes(html));
+                        plainText = "Текст не выделен";                    
+
+                    _articleClone = DocParse.GetBaseProperties(plainText);
+                    _articleClone.Filename = file.Name;
+                    _articleClone.HashSum = hash;
+                    _article = (ArticleModel)_articleClone.Clone();
 
                     Console.WriteLine($"Initializing SDK Pullenti ver {Pullenti.Sdk.Version} ({Pullenti.Sdk.VersionDate})... ");
                     Pullenti.Sdk.InitializeAll();
@@ -262,13 +257,13 @@ namespace HyperCube.Pages
                                 report.Keywords2 += $"{kwstr} = {kw}<br>";
                             }
                         }
-                    }
-                    Console.WriteLine("Analysis is over!");
+                    }                   
                     
                     int res = (from x in npt_tokens
                                select x).Distinct().Count();
 
                     Console.WriteLine($"npt_tokens.count={npt_tokens.Count}, distinct.Count={res}");
+                    Console.WriteLine("Analysis is over!");
 
                     var query = from x in npt_tokens
                             group x by x into g
@@ -287,16 +282,120 @@ namespace HyperCube.Pages
                 {
                     _status = $"File duplicate founded, hash: {hash}.";
                     Console.WriteLine(_status);
-                    
-                    _memoryStream.Close();
-                    stream.Close();
+
+                    _document = null;
+                    _memoryStream.Close();             
                     _modalInfo_error.Open("Загрузка не удалась, такой документ уже есть в системе.");
                 }
+
+                file = null;
+                stream.Close();
             }
 
             _modalLoading.Close();
         }
 
+        private async Task SaveDocument()
+        {
+            Console.WriteLine($"SaveDocument, docID: {DocID}.");
+
+            _modalLoading.Open();
+
+            /// all is fine, continue
+            MySQLConnector dbCon = MySQLConnector.Instance();
+            long id;
+            string stringSQL;
+
+            if (DocID > 0)
+            {
+                id = DocID;
+                stringSQL = $"UPDATE articles " +
+                    $"SET filename='{_article.Filename}', article_name='{_article.Name}', authors='{_article.Authors}', " +
+                        $"date_publish='{_article.PublishDate:yyyy-MM-dd}', annotation='{_article.Annotation}', " +
+                        $"keywords='{_article.Keywords}', rating={_article.Rating}, file_hash='{_article.HashSum}' " +
+                    $"WHERE id={DocID}";
+                await dbCon.SQLInsert(stringSQL);
+            }
+            else
+            {
+                stringSQL = $"INSERT INTO articles (filename, article_name, authors, date_publish, annotation, keywords, file_hash) " +
+                    $"VALUES ('{_article.Filename}', '{_article.Name}', '{_article.Authors}', '{_article.PublishDate:yyyy-MM-dd}'," +
+                        $"'{_article.Annotation}', '{_article.Keywords}', '{_article.HashSum}')";
+                id = await dbCon.SQLInsert(stringSQL);
+            }
+
+            /// tmp
+            int action_type = DocID > 0 ? 2 : 1;
+
+            stringSQL = $"INSERT INTO actions_history (article_id, action_type, acc_id) " +
+                $"VALUES ('{id}', '{action_type}', '{currentAcc.UUID}')";
+            await dbCon.SQLInsert(stringSQL);
+
+            Dictionary<string, PropertyInfo> propDict = Compare.SimpleCompare<ArticleModel>(_article, _articleClone);
+            foreach (KeyValuePair<string, PropertyInfo> prop in propDict)
+            {
+                //Console.WriteLine($"property name: {prop.Key}, value: {prop.Value.GetValue(articleModel, null)}");
+
+                stringSQL = $"INSERT INTO articles_edit_log (article_id, acc_id, field_name, field_prevvalue, field_newvalue) " +
+                    $"VALUES ('{id}', '{currentAcc.UUID}', '{prop.Key}', '{prop.Value.GetValue(_articleClone, null)}', '{prop.Value.GetValue(_article, null)}')";
+                await dbCon.SQLInsert(stringSQL);
+            }
+
+            if (DocID > 0)
+            {
+                _status = propDict.Count > 0 ? "All changes saved, article has veryfied." : "Article verifyed without any changes.";
+                //transactionId = await Verify();
+                //Console.WriteLine("transactionId found " + transactionId);
+
+                ///tmp
+                //editsCount = await article.GetEditsCount(currentAcc.UUID);
+                //modalInfo_transac.Open();
+            }
+            else
+            {
+                // получаем html
+                GetHtmlParam htmlParams = new();
+                htmlParams.OutHtmlAndBodyTags = true;
+                string html = _document.GetHtmlString(htmlParams);
+                string fullpath;
+                string htmldirectorypath;
+                string docdirectorypath;
+#if DEBUG
+                htmldirectorypath = Path.Combine(Environment.CurrentDirectory, "wwwroot", STORAGE_FOLDER_NAME, "html");
+                docdirectorypath = Path.Combine(Environment.CurrentDirectory, "wwwroot", STORAGE_FOLDER_NAME, "source");
+#else
+                htmldirectorypath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "wwwroot", STORAGE_FOLDER_NAME, "html");
+                docdirectorypath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "wwwroot", STORAGE_FOLDER_NAME, "source");
+#endif
+                ///saving html
+                fullpath = Path.Combine(htmldirectorypath, _document.SourceFileName + ".html");
+                Console.WriteLine($"Saving file [{fullpath}]");
+                Directory.CreateDirectory(htmldirectorypath);
+                File.WriteAllBytes(fullpath, Encoding.UTF8.GetBytes(html));
+
+                ///saving original files
+                fullpath = Path.Combine(docdirectorypath, $"{id}_{_article.Filename}");
+                Directory.CreateDirectory(docdirectorypath);
+                FileStream fs = new(fullpath, FileMode.Create, FileAccess.Write);
+                _memoryStream.Position = 0;
+                await _memoryStream.CopyToAsync(fs);
+
+                _status = $"User has saved new article data: [{id}_{_article.Filename}], memory size:{_memoryStream.Length}b, file size: {fs.Length}b";
+                Console.WriteLine(_status);
+                _memoryStream.Close();
+                fs.Close();                
+
+                //bool confirmed = await JsRuntime.InvokeAsync<bool>("confirm", "Хотите загрузить еще статью?");
+                //if (confirmed)
+                //    NavigationManager.NavigateTo("docedit", true);
+                //else
+                //    NavigationManager.NavigateTo("");
+            }
+
+            /// reloading articles
+            await AppData.LoadArticles();
+        }
+
         async Task<string> CalculateHashSum(MemoryStream ms)
         {
             MD5CryptoServiceProvider md5Provider = new();

+ 2 - 1
Pages/DocEdit.razor.cs

@@ -242,7 +242,8 @@ namespace HyperCube.Pages
 
                 if (count < 1)
                 {
-                    articleClone = DocParse.ReadPDF(memoryStream);
+                    //articleClone = DocParse.GetBaseProperties(memoryStream);
+                    articleClone = DocParse.GetBaseProperties("");
                     articleClone.Filename = file.Name;
                     articleClone.HashSum = hash;
                     article = (ArticleModel)articleClone.Clone();

+ 3 - 1
Pages/_Host.cshtml

@@ -4,7 +4,9 @@
 @addTagHelper *, Microsoft.AspNetCore.Mvc.TagHelpers
 @{
     Layout = null;
-//    HyperCube.Models.AccountModel.InitializeAccounts();
+    //    HyperCube.Models.AccountModel.InitializeAccounts();
+
+    Console.WriteLine($"Paths:\r\nApp:{AppDomain.CurrentDomain.BaseDirectory}\r\nWorking: {Environment.CurrentDirectory}");
 }
 
 <!DOCTYPE html>