using System;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using BitMiracle.Docotic.Pdf;
using Console = HyperCube.Utils.AdvConsole;

namespace HyperCube
{
    public class DocParse
    {
        public static Models.ArticleModel ReadPDF(MemoryStream ms)
        {
            //Console.WriteLine("ReadPDF start");

            Models.ArticleModel articleModel = new();

            string text = "";
            Regex regex;
            MatchCollection matches;

            ms.Position = 0;
            //Console.WriteLine($"memorystream length: {ms.Length}, canread: {ms.CanRead}, canseek:{ms.CanSeek}");

            PdfDocument pdf = new(ms);
            text = pdf.GetText(); //GetTextWithFormatting()            

            ///getting article name
            regex = new Regex(@"^.*?[\.!\?](?:\s|$)");
            matches = regex.Matches(text);
            if (matches.Count > 0)
            {
                string name = string.Join(", ", from Match match in matches select match.Value);
                articleModel.Name = name.Trim();
            }
            else
                Console.WriteLine("cant get name");

            ///getting publish date
            ///?

            /// getting authors
            regex = new Regex(@"[А-Я]\.\s?[А-Я]\.\s[А-Я][а-я]{1,20}");
            matches = regex.Matches(text);
            if (matches.Count > 0)
            {
                //foreach (Match match in matches) Console.WriteLine(match.Value);

                string authors = string.Join(", ", from Match match in matches select match.Value);
                articleModel.Authors = authors;
            }
            else
                Console.WriteLine("authors not found");

            ///getting keywords
            regex = new Regex(@"(ключевые)\s*(слова:)\s[\w+\-+\w\,\s]*\.", RegexOptions.IgnoreCase);
            matches = regex.Matches(text);
            if (matches.Count > 0)
            {
                string keywords = string.Join(", ", from Match match in matches select match.Value);
                keywords = Regex.Replace(keywords, @"\s+", " ");
                keywords = Regex.Replace(keywords, "-", "");
                keywords = keywords.Substring(keywords.IndexOf(":") + 2);

                articleModel.Keywords = keywords;
            }
            else
                Console.WriteLine("keywords not found");

            ///getting annotation
            ///?

            articleModel.Text = text;

            //Console.WriteLine("ReadPDF end");
            return articleModel;
        }

        public static string ReadDocx()
        {
            StringBuilder pageText = new();

            return pageText.ToString();
        }
    }
}