123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431 |
- using HyperCube.Models;
- using Microsoft.AspNetCore.Components;
- using Microsoft.AspNetCore.Components.Forms;
- using Pullenti.Unitext;
- using System;
- using System.Collections.Generic;
- using System.ComponentModel.DataAnnotations;
- using System.IO;
- using System.Linq;
- using System.Reflection;
- using System.Security.Cryptography;
- using System.Text;
- using System.Threading.Tasks;
- using System.Xml;
- using Console = HyperCube.Utils.AdvConsole;
- namespace HyperCube.Pages
- {
- public partial class Desktop : ComponentBase
- {
- [Inject]
- NavigationManager _navigationManager { get; set; }
- [Inject]
- AppData _appData { get; set; }
- const string STORAGE_FOLDER_NAME = "articles_storage";
- const long MAX_FILE_SIZE = 5120000; //bytes
- const string ACTIVE_BUTTON_CLASS = "btn_white tab-button active";
- const string ACTIVE_TAB_CLASS = "second-block__form visible";
- const string BUTTON_CLASS = "btn_white tab-button";
- const string TAB_CLASS = "second-block__form";
- string _uploadButtonClass = ACTIVE_BUTTON_CLASS;
- string _uploadTabClass = ACTIVE_TAB_CLASS;
- string _verifyButtonClass = BUTTON_CLASS;
- string _verifyTabClass = TAB_CLASS;
- string _otherButtonClass = BUTTON_CLASS;
- string _otherTabClass = TAB_CLASS;
- int _counter = 1;
- //string _event = "";
- string _status;
- //string _header;
- //string _storageFolderPath;
- MemoryStream _memoryStream;
- ModalInfo _modalInfo_error { get; set; }
- ModalLoading _modalLoading { get; set; }
- ArticleModel _articleClone = new();
- ArticleModel _article = new();
- UnitextDocument _document;
- protected override async Task OnInitializedAsync()
- {
- ///tmp
- await AppData.LoadArticles();
- }
- protected override void OnAfterRender(bool firstRender) => _counter = 1;
- void SwitchDesktopTab(int tabIndex)
- {
- switch(tabIndex)
- {
- case 0:
- _uploadButtonClass = ACTIVE_BUTTON_CLASS;
- _uploadTabClass = ACTIVE_TAB_CLASS;
- _verifyButtonClass = BUTTON_CLASS;
- _verifyTabClass = TAB_CLASS;
- _otherButtonClass = BUTTON_CLASS;
- _otherTabClass = TAB_CLASS;
- break;
- case 1:
- _uploadButtonClass = BUTTON_CLASS;
- _uploadTabClass = TAB_CLASS;
- _verifyButtonClass = ACTIVE_BUTTON_CLASS;
- _verifyTabClass = ACTIVE_TAB_CLASS;
- _otherButtonClass = BUTTON_CLASS;
- _otherTabClass = TAB_CLASS;
- break;
- case 2:
- _uploadButtonClass = BUTTON_CLASS;
- _uploadTabClass = TAB_CLASS;
- _verifyButtonClass = BUTTON_CLASS;
- _verifyTabClass = TAB_CLASS;
- _otherButtonClass = ACTIVE_BUTTON_CLASS;
- _otherTabClass = ACTIVE_TAB_CLASS;
- break;
- }
- }
- async Task HandleUpload(InputFileChangeEventArgs e)
- {
- _modalLoading.Open();
- IBrowserFile file = e.File;
- if (file != null)
- {
- Stream stream = file.OpenReadStream(MAX_FILE_SIZE);
- _memoryStream = new();
- await stream.CopyToAsync(_memoryStream);
- _status = $"Finished loading {_memoryStream.Length} bytes from {file.Name}";
- Console.WriteLine(_status);
- /// calculating hash
- string hash = await CalculateHashSum(_memoryStream);
- Console.WriteLine($"Hash: {hash}");
- /// checking hash
- MySQLConnector dbCon = MySQLConnector.Instance();
- string stringSQL;
- stringSQL = $"SELECT COUNT(*) FROM articles WHERE file_hash='{hash}'";
- int count = await dbCon.SQLSelectCount(stringSQL);
- if (count < 1)
- {
- ReportModel report = _appData.Report;
- report.FileName = file.Name;
- report.FileSize = _memoryStream.Length.ToString();
- _memoryStream.Position = 0;
- byte[] content = _memoryStream.ToArray();
- _document = UnitextService.CreateDocument(null, content, null);
- if (_document.ErrorMessage != null)
- {
- // скорее всего, этот формат не поддерживается на данный момент
- Console.WriteLine($"error, sorry: {_document.ErrorMessage}");
- _memoryStream.Close();
- stream.Close();
- _modalLoading.Close();
- _modalInfo_error.Open("Не удается прочитать документ, формат не поддерживается или файл поврежден.");
- return;
- }
- // восстанавливаем имя исходного файла, извлечённого из ресурсов
- _document.SourceFileName = file.Name;
- for (int i = file.Name.Length - 7; i > 0; i--)
- {
- if (file.Name[i] == '.')
- {
- _document.SourceFileName = file.Name.Substring(i + 1);
- break;
- }
- }
- //// записываем результат в XML
- //using (FileStream fs = new(doc.SourceFileName + ".xml", FileMode.Create, FileAccess.Write))
- //{
- // XmlWriterSettings xmlParams = new();
- // xmlParams.Encoding = Encoding.UTF8;
- // xmlParams.Indent = true;
- // xmlParams.IndentChars = " ";
- // using (XmlWriter xml = XmlWriter.Create(fs, xmlParams))
- // {
- // xml.WriteStartDocument();
- // doc.GetXml(xml);
- // xml.WriteEndDocument();
- // }
- //}
- //Console.WriteLine("XML write done");
- // получаем плоский текст
- string plainText = _document.GetPlaintextString(null);
- if (plainText == null)
- plainText = "Текст не выделен";
- _articleClone = DocParse.GetBaseProperties(plainText);
- _articleClone.Filename = file.Name;
- _articleClone.HashSum = hash;
- _article = (ArticleModel)_articleClone.Clone();
- Console.WriteLine($"Initializing SDK Pullenti ver {Pullenti.Sdk.Version} ({Pullenti.Sdk.VersionDate})... ");
- Pullenti.Sdk.InitializeAll();
- //Console.WriteLine($"OK (by ... ms), version {Pullenti.Ner.ProcessorService.Version}");
- List<string> npt_tokens = new();
- // запускаем обработку на пустом процессоре (без анализаторов NER)
- Pullenti.Ner.AnalysisResult are = Pullenti.Ner.ProcessorService.EmptyProcessor.Process(new Pullenti.Ner.SourceOfAnalysis(plainText), null, null);
- System.Console.Write("Noun groups: ");
- // перебираем токены
- for (Pullenti.Ner.Token t = are.FirstToken; t != null; t = t.Next)
- {
- // выделяем именную группу с текущего токена
- Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null);
- // не получилось
- if (npt == null)
- continue;
- // получилось, выводим в нормализованном виде
- //System.Console.Write($"[{npt.GetSourceText()}=>{npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false)}] ");
- report.NounGroups += $"[{npt.GetSourceText()}=>{npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false)}] ";
- npt_tokens.Add(npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false));
- // указатель на последний токен именной группы
- t = npt.EndToken;
- }
- using (Pullenti.Ner.Processor proc = Pullenti.Ner.ProcessorService.CreateProcessor())
- {
- // анализируем текст
- Pullenti.Ner.AnalysisResult ar = proc.Process(new Pullenti.Ner.SourceOfAnalysis(plainText), null, null);
- // результирующие сущности
- //Console.WriteLine("\r\n==========================================\r\nEntities: ");
- foreach (Pullenti.Ner.Referent en in ar.Entities)
- {
- //Console.WriteLine($"{en.TypeName}: {en}");
- report.Entities += $"{en.TypeName}: {en}\r\n";
- foreach (Pullenti.Ner.Slot s in en.Slots)
- {
- //Console.WriteLine($" {s.TypeName}: {s.Value}");
- report.Entities += $" {s.TypeName}: {s.Value}<br>";
- }
- }
- // пример выделения именных групп
- //Console.WriteLine("\r\n==========================================\r\nNoun groups: ");
- for (Pullenti.Ner.Token t = ar.FirstToken; t != null; t = t.Next)
- {
- // токены с сущностями игнорируем
- if (t.GetReferent() != null)
- continue;
- // пробуем создать именную группу
- Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.AdjectiveCanBeLast, 0, null);
- // не получилось
- if (npt == null)
- continue;
- //Console.WriteLine(npt.ToString());
- report.EntitiesNounGroups += $"{npt}<br>";
- // указатель перемещаем на последний токен группы
- t = npt.EndToken;
- }
- }
- using (Pullenti.Ner.Processor proc = Pullenti.Ner.ProcessorService.CreateSpecificProcessor(Pullenti.Ner.Keyword.KeywordAnalyzer.ANALYZER_NAME))
- {
- Pullenti.Ner.AnalysisResult ar = proc.Process(new Pullenti.Ner.SourceOfAnalysis(plainText), null, null);
- //Console.WriteLine("\r\n==========================================\r\nKeywords1: ");
- foreach (Pullenti.Ner.Referent en in ar.Entities)
- {
- if (en is Pullenti.Ner.Keyword.KeywordReferent)
- //Console.WriteLine(en.ToString());
- report.Keywords1 += $"{en}<br>";
- }
- //Console.WriteLine("\r\n==========================================\r\nKeywords2: ");
- for (Pullenti.Ner.Token t = ar.FirstToken; t != null; t = t.Next)
- {
- if (t is Pullenti.Ner.ReferentToken)
- {
- Pullenti.Ner.Keyword.KeywordReferent kw = t.GetReferent() as Pullenti.Ner.Keyword.KeywordReferent;
- if (kw == null)
- continue;
- string kwstr = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(t as Pullenti.Ner.ReferentToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominativeSingle | Pullenti.Ner.Core.GetTextAttr.KeepRegister);
- //Console.WriteLine($"{kwstr} = {kw}");
- report.Keywords2 += $"{kwstr} = {kw}<br>";
- }
- }
- }
-
- int res = (from x in npt_tokens
- select x).Distinct().Count();
- Console.WriteLine($"npt_tokens.count={npt_tokens.Count}, distinct.Count={res}");
- Console.WriteLine("Analysis is over!");
- var query = from x in npt_tokens
- group x by x into g
- let count1 = g.Count()
- orderby count1 descending
- select new { Name = g.Key, Count = count1 };
- foreach (var result in query)
- {
- report.NounGroupsSorted += $"{result.Name}, Count: {result.Count}<br>";
- //Console.WriteLine($"Name: {result.Name}, Count: {result.Count}");
- }
- _navigationManager.NavigateTo("report");
- }
- else
- {
- _status = $"File duplicate founded, hash: {hash}.";
- Console.WriteLine(_status);
- _document = null;
- _memoryStream.Close();
- _modalInfo_error.Open("Загрузка не удалась, такой документ уже есть в системе.");
- }
- file = null;
- stream.Close();
- }
- _modalLoading.Close();
- }
- private async Task SaveDocument()
- {
- Console.WriteLine($"SaveDocument, docID: {DocID}.");
- _modalLoading.Open();
- /// all is fine, continue
- MySQLConnector dbCon = MySQLConnector.Instance();
- long id;
- string stringSQL;
- if (DocID > 0)
- {
- id = DocID;
- stringSQL = $"UPDATE articles " +
- $"SET filename='{_article.Filename}', article_name='{_article.Name}', authors='{_article.Authors}', " +
- $"date_publish='{_article.PublishDate:yyyy-MM-dd}', annotation='{_article.Annotation}', " +
- $"keywords='{_article.Keywords}', rating={_article.Rating}, file_hash='{_article.HashSum}' " +
- $"WHERE id={DocID}";
- await dbCon.SQLInsert(stringSQL);
- }
- else
- {
- stringSQL = $"INSERT INTO articles (filename, article_name, authors, date_publish, annotation, keywords, file_hash) " +
- $"VALUES ('{_article.Filename}', '{_article.Name}', '{_article.Authors}', '{_article.PublishDate:yyyy-MM-dd}'," +
- $"'{_article.Annotation}', '{_article.Keywords}', '{_article.HashSum}')";
- id = await dbCon.SQLInsert(stringSQL);
- }
- /// tmp
- int action_type = DocID > 0 ? 2 : 1;
- stringSQL = $"INSERT INTO actions_history (article_id, action_type, acc_id) " +
- $"VALUES ('{id}', '{action_type}', '{currentAcc.UUID}')";
- await dbCon.SQLInsert(stringSQL);
- Dictionary<string, PropertyInfo> propDict = Compare.SimpleCompare<ArticleModel>(_article, _articleClone);
- foreach (KeyValuePair<string, PropertyInfo> prop in propDict)
- {
- //Console.WriteLine($"property name: {prop.Key}, value: {prop.Value.GetValue(articleModel, null)}");
- stringSQL = $"INSERT INTO articles_edit_log (article_id, acc_id, field_name, field_prevvalue, field_newvalue) " +
- $"VALUES ('{id}', '{currentAcc.UUID}', '{prop.Key}', '{prop.Value.GetValue(_articleClone, null)}', '{prop.Value.GetValue(_article, null)}')";
- await dbCon.SQLInsert(stringSQL);
- }
- if (DocID > 0)
- {
- _status = propDict.Count > 0 ? "All changes saved, article has veryfied." : "Article verifyed without any changes.";
- //transactionId = await Verify();
- //Console.WriteLine("transactionId found " + transactionId);
- ///tmp
- //editsCount = await article.GetEditsCount(currentAcc.UUID);
- //modalInfo_transac.Open();
- }
- else
- {
- // получаем html
- GetHtmlParam htmlParams = new();
- htmlParams.OutHtmlAndBodyTags = true;
- string html = _document.GetHtmlString(htmlParams);
- string fullpath;
- string htmldirectorypath;
- string docdirectorypath;
- #if DEBUG
- htmldirectorypath = Path.Combine(Environment.CurrentDirectory, "wwwroot", STORAGE_FOLDER_NAME, "html");
- docdirectorypath = Path.Combine(Environment.CurrentDirectory, "wwwroot", STORAGE_FOLDER_NAME, "source");
- #else
- htmldirectorypath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "wwwroot", STORAGE_FOLDER_NAME, "html");
- docdirectorypath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "wwwroot", STORAGE_FOLDER_NAME, "source");
- #endif
- ///saving html
- fullpath = Path.Combine(htmldirectorypath, _document.SourceFileName + ".html");
- Console.WriteLine($"Saving file [{fullpath}]");
- Directory.CreateDirectory(htmldirectorypath);
- File.WriteAllBytes(fullpath, Encoding.UTF8.GetBytes(html));
- ///saving original files
- fullpath = Path.Combine(docdirectorypath, $"{id}_{_article.Filename}");
- Directory.CreateDirectory(docdirectorypath);
- FileStream fs = new(fullpath, FileMode.Create, FileAccess.Write);
- _memoryStream.Position = 0;
- await _memoryStream.CopyToAsync(fs);
- _status = $"User has saved new article data: [{id}_{_article.Filename}], memory size:{_memoryStream.Length}b, file size: {fs.Length}b";
- Console.WriteLine(_status);
- _memoryStream.Close();
- fs.Close();
- //bool confirmed = await JsRuntime.InvokeAsync<bool>("confirm", "Хотите загрузить еще статью?");
- //if (confirmed)
- // NavigationManager.NavigateTo("docedit", true);
- //else
- // NavigationManager.NavigateTo("");
- }
- /// reloading articles
- await AppData.LoadArticles();
- }
- async Task<string> CalculateHashSum(MemoryStream ms)
- {
- MD5CryptoServiceProvider md5Provider = new();
- ms.Position = 0;
- byte[] hash = await md5Provider.ComputeHashAsync(ms);
- return Convert.ToBase64String(hash);
- }
- List<string> ValidateForm<T>(T obj)
- {
- var props = typeof(T).GetProperties().Where(pi => Attribute.IsDefined(pi, typeof(RequiredAttribute)));
- List<string> result = new();
- foreach (var prop in props)
- {
- var val = prop.GetValue(obj, null);
- if (val == null || val?.ToString().Length == 0)
- result.Add(prop.Name);
- //Console.WriteLine($"Required field '{prop.Name}' is not filled.");
- }
- return result;
- }
- static string GetDisplayName(Enum enumValue)
- {
- return enumValue.GetType()
- .GetMember(enumValue.ToString())
- .First()
- .GetCustomAttribute<DisplayAttribute>()
- .GetName();
- }
- }
- }
|