using HyperCube.Models; using Microsoft.AspNetCore.Components; using Microsoft.AspNetCore.Components.Forms; using Pullenti.Unitext; using System; using System.Collections.Generic; using System.ComponentModel.DataAnnotations; using System.IO; using System.Linq; using System.Reflection; using System.Security.Cryptography; using System.Text; using System.Threading.Tasks; using System.Xml; using Console = HyperCube.Utils.AdvConsole; namespace HyperCube.Pages { public partial class Desktop : ComponentBase { [Inject] NavigationManager _navigationManager { get; set; } [Inject] AppData _appData { get; set; } const string STORAGE_FOLDER_NAME = "articles_storage"; const long MAX_FILE_SIZE = 5120000; //bytes const string ACTIVE_BUTTON_CLASS = "btn_white tab-button active"; const string ACTIVE_TAB_CLASS = "second-block__form visible"; const string BUTTON_CLASS = "btn_white tab-button"; const string TAB_CLASS = "second-block__form"; string _uploadButtonClass = ACTIVE_BUTTON_CLASS; string _uploadTabClass = ACTIVE_TAB_CLASS; string _verifyButtonClass = BUTTON_CLASS; string _verifyTabClass = TAB_CLASS; string _otherButtonClass = BUTTON_CLASS; string _otherTabClass = TAB_CLASS; int _counter = 1; //string _event = ""; string _status; //string _header; //string _storageFolderPath; MemoryStream _memoryStream; ModalInfo _modalInfo_error { get; set; } ModalLoading _modalLoading { get; set; } ArticleModel _articleClone = new(); ArticleModel _article = new(); UnitextDocument _document; protected override async Task OnInitializedAsync() { ///tmp await AppData.LoadArticles(); } protected override void OnAfterRender(bool firstRender) => _counter = 1; void SwitchDesktopTab(int tabIndex) { switch(tabIndex) { case 0: _uploadButtonClass = ACTIVE_BUTTON_CLASS; _uploadTabClass = ACTIVE_TAB_CLASS; _verifyButtonClass = BUTTON_CLASS; _verifyTabClass = TAB_CLASS; _otherButtonClass = BUTTON_CLASS; _otherTabClass = TAB_CLASS; break; case 1: _uploadButtonClass = BUTTON_CLASS; _uploadTabClass = TAB_CLASS; _verifyButtonClass = ACTIVE_BUTTON_CLASS; _verifyTabClass = ACTIVE_TAB_CLASS; _otherButtonClass = BUTTON_CLASS; _otherTabClass = TAB_CLASS; break; case 2: _uploadButtonClass = BUTTON_CLASS; _uploadTabClass = TAB_CLASS; _verifyButtonClass = BUTTON_CLASS; _verifyTabClass = TAB_CLASS; _otherButtonClass = ACTIVE_BUTTON_CLASS; _otherTabClass = ACTIVE_TAB_CLASS; break; } } async Task HandleUpload(InputFileChangeEventArgs e) { _modalLoading.Open(); IBrowserFile file = e.File; if (file != null) { Stream stream = file.OpenReadStream(MAX_FILE_SIZE); _memoryStream = new(); await stream.CopyToAsync(_memoryStream); _status = $"Finished loading {_memoryStream.Length} bytes from {file.Name}"; Console.WriteLine(_status); /// calculating hash string hash = await CalculateHashSum(_memoryStream); Console.WriteLine($"Hash: {hash}"); /// checking hash MySQLConnector dbCon = MySQLConnector.Instance(); string stringSQL; stringSQL = $"SELECT COUNT(*) FROM articles WHERE file_hash='{hash}'"; int count = await dbCon.SQLSelectCount(stringSQL); if (count < 1) { ReportModel report = _appData.Report; report.FileName = file.Name; report.FileSize = _memoryStream.Length.ToString(); _memoryStream.Position = 0; byte[] content = _memoryStream.ToArray(); _document = UnitextService.CreateDocument(null, content, null); if (_document.ErrorMessage != null) { // скорее всего, этот формат не поддерживается на данный момент Console.WriteLine($"error, sorry: {_document.ErrorMessage}"); _memoryStream.Close(); stream.Close(); _modalLoading.Close(); _modalInfo_error.Open("Не удается прочитать документ, формат не поддерживается или файл поврежден."); return; } // восстанавливаем имя исходного файла, извлечённого из ресурсов _document.SourceFileName = file.Name; for (int i = file.Name.Length - 7; i > 0; i--) { if (file.Name[i] == '.') { _document.SourceFileName = file.Name.Substring(i + 1); break; } } //// записываем результат в XML //using (FileStream fs = new(doc.SourceFileName + ".xml", FileMode.Create, FileAccess.Write)) //{ // XmlWriterSettings xmlParams = new(); // xmlParams.Encoding = Encoding.UTF8; // xmlParams.Indent = true; // xmlParams.IndentChars = " "; // using (XmlWriter xml = XmlWriter.Create(fs, xmlParams)) // { // xml.WriteStartDocument(); // doc.GetXml(xml); // xml.WriteEndDocument(); // } //} //Console.WriteLine("XML write done"); // получаем плоский текст string plainText = _document.GetPlaintextString(null); if (plainText == null) plainText = "Текст не выделен"; _articleClone = DocParse.GetBaseProperties(plainText); _articleClone.Filename = file.Name; _articleClone.HashSum = hash; _article = (ArticleModel)_articleClone.Clone(); Console.WriteLine($"Initializing SDK Pullenti ver {Pullenti.Sdk.Version} ({Pullenti.Sdk.VersionDate})... "); Pullenti.Sdk.InitializeAll(); //Console.WriteLine($"OK (by ... ms), version {Pullenti.Ner.ProcessorService.Version}"); List npt_tokens = new(); // запускаем обработку на пустом процессоре (без анализаторов NER) Pullenti.Ner.AnalysisResult are = Pullenti.Ner.ProcessorService.EmptyProcessor.Process(new Pullenti.Ner.SourceOfAnalysis(plainText), null, null); System.Console.Write("Noun groups: "); // перебираем токены for (Pullenti.Ner.Token t = are.FirstToken; t != null; t = t.Next) { // выделяем именную группу с текущего токена Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.No, 0, null); // не получилось if (npt == null) continue; // получилось, выводим в нормализованном виде //System.Console.Write($"[{npt.GetSourceText()}=>{npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false)}] "); report.NounGroups += $"[{npt.GetSourceText()}=>{npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false)}] "; npt_tokens.Add(npt.GetNormalCaseText(null, Pullenti.Morph.MorphNumber.Singular, Pullenti.Morph.MorphGender.Undefined, false)); // указатель на последний токен именной группы t = npt.EndToken; } using (Pullenti.Ner.Processor proc = Pullenti.Ner.ProcessorService.CreateProcessor()) { // анализируем текст Pullenti.Ner.AnalysisResult ar = proc.Process(new Pullenti.Ner.SourceOfAnalysis(plainText), null, null); // результирующие сущности //Console.WriteLine("\r\n==========================================\r\nEntities: "); foreach (Pullenti.Ner.Referent en in ar.Entities) { //Console.WriteLine($"{en.TypeName}: {en}"); report.Entities += $"{en.TypeName}: {en}\r\n"; foreach (Pullenti.Ner.Slot s in en.Slots) { //Console.WriteLine($" {s.TypeName}: {s.Value}"); report.Entities += $" {s.TypeName}: {s.Value}
"; } } // пример выделения именных групп //Console.WriteLine("\r\n==========================================\r\nNoun groups: "); for (Pullenti.Ner.Token t = ar.FirstToken; t != null; t = t.Next) { // токены с сущностями игнорируем if (t.GetReferent() != null) continue; // пробуем создать именную группу Pullenti.Ner.Core.NounPhraseToken npt = Pullenti.Ner.Core.NounPhraseHelper.TryParse(t, Pullenti.Ner.Core.NounPhraseParseAttr.AdjectiveCanBeLast, 0, null); // не получилось if (npt == null) continue; //Console.WriteLine(npt.ToString()); report.EntitiesNounGroups += $"{npt}
"; // указатель перемещаем на последний токен группы t = npt.EndToken; } } using (Pullenti.Ner.Processor proc = Pullenti.Ner.ProcessorService.CreateSpecificProcessor(Pullenti.Ner.Keyword.KeywordAnalyzer.ANALYZER_NAME)) { Pullenti.Ner.AnalysisResult ar = proc.Process(new Pullenti.Ner.SourceOfAnalysis(plainText), null, null); //Console.WriteLine("\r\n==========================================\r\nKeywords1: "); foreach (Pullenti.Ner.Referent en in ar.Entities) { if (en is Pullenti.Ner.Keyword.KeywordReferent) //Console.WriteLine(en.ToString()); report.Keywords1 += $"{en}
"; } //Console.WriteLine("\r\n==========================================\r\nKeywords2: "); for (Pullenti.Ner.Token t = ar.FirstToken; t != null; t = t.Next) { if (t is Pullenti.Ner.ReferentToken) { Pullenti.Ner.Keyword.KeywordReferent kw = t.GetReferent() as Pullenti.Ner.Keyword.KeywordReferent; if (kw == null) continue; string kwstr = Pullenti.Ner.Core.MiscHelper.GetTextValueOfMetaToken(t as Pullenti.Ner.ReferentToken, Pullenti.Ner.Core.GetTextAttr.FirstNounGroupToNominativeSingle | Pullenti.Ner.Core.GetTextAttr.KeepRegister); //Console.WriteLine($"{kwstr} = {kw}"); report.Keywords2 += $"{kwstr} = {kw}
"; } } } int res = (from x in npt_tokens select x).Distinct().Count(); Console.WriteLine($"npt_tokens.count={npt_tokens.Count}, distinct.Count={res}"); Console.WriteLine("Analysis is over!"); var query = from x in npt_tokens group x by x into g let count1 = g.Count() orderby count1 descending select new { Name = g.Key, Count = count1 }; foreach (var result in query) { report.NounGroupsSorted += $"{result.Name}, Count: {result.Count}
"; //Console.WriteLine($"Name: {result.Name}, Count: {result.Count}"); } _navigationManager.NavigateTo("report"); } else { _status = $"File duplicate founded, hash: {hash}."; Console.WriteLine(_status); _document = null; _memoryStream.Close(); _modalInfo_error.Open("Загрузка не удалась, такой документ уже есть в системе."); } file = null; stream.Close(); } _modalLoading.Close(); } private async Task SaveDocument() { Console.WriteLine($"SaveDocument, docID: {DocID}."); _modalLoading.Open(); /// all is fine, continue MySQLConnector dbCon = MySQLConnector.Instance(); long id; string stringSQL; if (DocID > 0) { id = DocID; stringSQL = $"UPDATE articles " + $"SET filename='{_article.Filename}', article_name='{_article.Name}', authors='{_article.Authors}', " + $"date_publish='{_article.PublishDate:yyyy-MM-dd}', annotation='{_article.Annotation}', " + $"keywords='{_article.Keywords}', rating={_article.Rating}, file_hash='{_article.HashSum}' " + $"WHERE id={DocID}"; await dbCon.SQLInsert(stringSQL); } else { stringSQL = $"INSERT INTO articles (filename, article_name, authors, date_publish, annotation, keywords, file_hash) " + $"VALUES ('{_article.Filename}', '{_article.Name}', '{_article.Authors}', '{_article.PublishDate:yyyy-MM-dd}'," + $"'{_article.Annotation}', '{_article.Keywords}', '{_article.HashSum}')"; id = await dbCon.SQLInsert(stringSQL); } /// tmp int action_type = DocID > 0 ? 2 : 1; stringSQL = $"INSERT INTO actions_history (article_id, action_type, acc_id) " + $"VALUES ('{id}', '{action_type}', '{currentAcc.UUID}')"; await dbCon.SQLInsert(stringSQL); Dictionary propDict = Compare.SimpleCompare(_article, _articleClone); foreach (KeyValuePair prop in propDict) { //Console.WriteLine($"property name: {prop.Key}, value: {prop.Value.GetValue(articleModel, null)}"); stringSQL = $"INSERT INTO articles_edit_log (article_id, acc_id, field_name, field_prevvalue, field_newvalue) " + $"VALUES ('{id}', '{currentAcc.UUID}', '{prop.Key}', '{prop.Value.GetValue(_articleClone, null)}', '{prop.Value.GetValue(_article, null)}')"; await dbCon.SQLInsert(stringSQL); } if (DocID > 0) { _status = propDict.Count > 0 ? "All changes saved, article has veryfied." : "Article verifyed without any changes."; //transactionId = await Verify(); //Console.WriteLine("transactionId found " + transactionId); ///tmp //editsCount = await article.GetEditsCount(currentAcc.UUID); //modalInfo_transac.Open(); } else { // получаем html GetHtmlParam htmlParams = new(); htmlParams.OutHtmlAndBodyTags = true; string html = _document.GetHtmlString(htmlParams); string fullpath; string htmldirectorypath; string docdirectorypath; #if DEBUG htmldirectorypath = Path.Combine(Environment.CurrentDirectory, "wwwroot", STORAGE_FOLDER_NAME, "html"); docdirectorypath = Path.Combine(Environment.CurrentDirectory, "wwwroot", STORAGE_FOLDER_NAME, "source"); #else htmldirectorypath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "wwwroot", STORAGE_FOLDER_NAME, "html"); docdirectorypath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "wwwroot", STORAGE_FOLDER_NAME, "source"); #endif ///saving html fullpath = Path.Combine(htmldirectorypath, _document.SourceFileName + ".html"); Console.WriteLine($"Saving file [{fullpath}]"); Directory.CreateDirectory(htmldirectorypath); File.WriteAllBytes(fullpath, Encoding.UTF8.GetBytes(html)); ///saving original files fullpath = Path.Combine(docdirectorypath, $"{id}_{_article.Filename}"); Directory.CreateDirectory(docdirectorypath); FileStream fs = new(fullpath, FileMode.Create, FileAccess.Write); _memoryStream.Position = 0; await _memoryStream.CopyToAsync(fs); _status = $"User has saved new article data: [{id}_{_article.Filename}], memory size:{_memoryStream.Length}b, file size: {fs.Length}b"; Console.WriteLine(_status); _memoryStream.Close(); fs.Close(); //bool confirmed = await JsRuntime.InvokeAsync("confirm", "Хотите загрузить еще статью?"); //if (confirmed) // NavigationManager.NavigateTo("docedit", true); //else // NavigationManager.NavigateTo(""); } /// reloading articles await AppData.LoadArticles(); } async Task CalculateHashSum(MemoryStream ms) { MD5CryptoServiceProvider md5Provider = new(); ms.Position = 0; byte[] hash = await md5Provider.ComputeHashAsync(ms); return Convert.ToBase64String(hash); } List ValidateForm(T obj) { var props = typeof(T).GetProperties().Where(pi => Attribute.IsDefined(pi, typeof(RequiredAttribute))); List result = new(); foreach (var prop in props) { var val = prop.GetValue(obj, null); if (val == null || val?.ToString().Length == 0) result.Add(prop.Name); //Console.WriteLine($"Required field '{prop.Name}' is not filled."); } return result; } static string GetDisplayName(Enum enumValue) { return enumValue.GetType() .GetMember(enumValue.ToString()) .First() .GetCustomAttribute() .GetName(); } } }