const sectionsFinder = function (opts) {
  const guidGenerator = function () {
    var S4 = function () {
      return (((1 + Math.random()) * 0x10000) | 0).toString(16).substring(1);
    };
    return (
      S4() +
      S4() +
      "-" +
      S4() +
      "-" +
      S4() +
      "-" +
      S4() +
      "-" +
      S4() +
      S4() +
      S4()
    );
  };

  const getBreakSectionRegex = () => {
    const regexTerms = [
      /^(article (\S*))(\s?[a-z][^\.]{,64}\.)?/,
      /^(exhibit (\S*))(\s?[a-z][^\.]{,64}\.)?/,
      /^(annex (\S*))(\s?[a-z][^\.]{,64}\.)?/,
      /^(schedule (\S*))(\s?[a-z][^\.]{,64}\.)?/,
      /^(section (\S*))(\s?[a-z][^\.]{,64}\.)?/,
      /^(\d{1,2}\.([\d]{1,2}\.?)*)(\s?[a-z][^\.]{,64}\.)?/,
    ];

    const regex = new RegExp(regexTerms.map((r) => r.source).join("|"), "gi");
    return regex;
  };

  const getSigPageRegex = () => {
    const regexTerms = [/(signature page (\S*))(\s?[a-z][^\.]{,64}\.)?/];

    const regex = new RegExp(regexTerms.map((r) => r.source).join("|"), "gi");
    return regex;
  };

  const getTitleRegex = () => {
    const regexTerms = [
      /(section (\S*))([^.]+)/,
      /(\d{1,2}\.([\d]{1,2}\.?)*)([^.]+)/,
    ];

    const regex = new RegExp(regexTerms.map((r) => r.source).join("|"), "gi");
    return regex;
  };

  const getWordRegex = () => {
    const regexTerms = [/(section (\S*))/, /\d{1,2}\.[\d]{1,2}(?=\W)/];

    const regex = new RegExp(regexTerms.map((r) => r.source).join("|"), "gi");
    return regex;
  };

  const getDefinedRegEx = () => {
    const regexTerms = [
      /^([\d.\t\s]*)?((section (\S*)[^,;.\s]))[^%]*$/,
      /^([\d.\t\s]*)?((\d{1,2}\.([\d]{1,2}\.?)*))[^%]*$/,
    ];

    const regex = new RegExp(regexTerms.map((r) => r.source).join("|"), "gi");
    return regex;
  };

  const getEquivalentTerms = function (term, paragraphs) {
    var t = [];

    t.push(term);

    const subSectionRegex = new RegExp(
      `(${term.replace(".", "\\.")}(\\S))([^\\s]+)`,
      "gi"
    );

    const subsectionParagraphs = paragraphs.filter((r) =>
      r.text.match(subSectionRegex)
    );

    subsectionParagraphs.forEach((paragraph, idx) => {
      const altTerms = paragraph.text.match(subSectionRegex);

      altTerms.forEach((alt, altTermIdx) => {
        let a = alt;
        if (a[a.length - 1].match(/\.|\,/)) a = a.slice(0, -1);
        t.push(a);
      });
    });

    if (term.toUpperCase().includes("SECTION")) {
      var termMatch = term.match(
        /(\d{1,2}\.([\d]{1,2}\.?)*)(\s?[a-z][^\.]{,64}\.)?/
      );

      if (termMatch && termMatch[0]) {
        t.push(termMatch[0]);
      }
    } else {
      t.push(`Section ${term}`);
    }

    return t;
  };

  const initAlternativeTerms = function (word, paragraphs) {
    var equivalentTerms = getEquivalentTerms(word, paragraphs);

    var t = [];

    for (var i = 0; i < equivalentTerms.length; i++) {
      const obj = {
        word: equivalentTerms[i],
        match: equivalentTerms[i],
        id: guidGenerator(),
      };

      t.push(obj);
    }

    return t;
  };

  const getTocEquivalentTerms = function (term) {
    var t = [];

    if (term.toUpperCase().includes("SECTION")) {
      var termMatch = term.match(
        /(\d{1,2}\.([\d]{1,2}\.?)*)(\s?[a-z][^\.]{,64}\.)?/
      );

      if (termMatch && termMatch[0]) {
        t.push(termMatch[0]);
      }
    } else {
      t.push(`Section ${term}`);
    }

    return t;
  };

  const isInToC = (term, paragraphs) => {
    const equivalentTerms = getEquivalentTerms(term, paragraphs);

    equivalentTerms.push(term);

    return paragraphs
      .filter((r) => r.isToc)
      .filter((r) => equivalentTerms.indexOf(r.text.toUpperCase()) > -1)[0];
  };

  const checkToc = (paragraphs) => {
    //return paragraphs.filter((r) => r.isToc).length > 0;
    return false;
  };

  const getTerms = (paragraphs) => {
    const definedRegEx = getDefinedRegEx();
    const wordRegEx = getWordRegex();
    const titleRegex = getTitleRegex();
 
    let sections = [];

    paragraphs.forEach((paragraph, index) => {

      if (paragraph.isToc) return;
      if (paragraph.text === "") return;
      if (!paragraph.isParagraph) return;

      const definedMatch = paragraph.text.match(definedRegEx);

      if (definedMatch !== null) {
        const word = definedMatch[0].match(wordRegEx);
        const title = paragraph.text.match(titleRegex);
        //console.log('word: ', word);

        if (word == null) return;

        if (checkToc(paragraphs) && !isInToC(word[0], paragraphs)) return;

        let tableDetected = false;
        let currentWord = word[0].replace("“", "");
        currentWord = currentWord.replace("”", "");
        currentWord = currentWord.replaceAll('"', "");

        const definitionParagraphs = [];
        const cleanParagraph = paragraph.text.trim();
        definitionParagraphs.push(paragraph);

        let paragraphEndFound = false;
        let count = 0;
        let nextParagraphIndex = index;
        let endOnNext = false;

        while (!paragraphEndFound && count < 50) {
          count += 1;
          nextParagraphIndex += 1;

          if (endOnNext) {
            paragraphEndFound = true;
            continue;
          }

          if (!paragraphs[nextParagraphIndex]) {
            paragraphEndFound = true;
            continue;
          }

          if (paragraphs[nextParagraphIndex].text === "") {
            continue;
          }

          if (paragraphs[nextParagraphIndex].tableNestingLevel > 0) {
            tableDetected = true;
            continue;
          }

          //is this needed?
          if (paragraphs[nextParagraphIndex].text.match(getDefinedRegEx())) {
            paragraphEndFound = true;
            continue;
          }

          if (paragraphs[nextParagraphIndex].text.match(getSigPageRegex())) {
            paragraphEndFound = true;
            continue;
          }

          if (
            paragraphs[nextParagraphIndex].text.match(getBreakSectionRegex())
          ) {
            paragraphEndFound = true;
            continue;
          }

          definitionParagraphs.push(paragraphs[nextParagraphIndex]);
        }

        let i = sections.length;
        let found = false;

        while (i--) {
          const section = sections[i];

          if (section.title.toUpperCase() === title[0].toUpperCase()) {
            for (var j = 0; j < definitionParagraphs.length; j++) {
              section.definedDefinition.push(definitionParagraphs[j]);
            }
            found = true;
          }
        }

        if (!found) {
          sections.push({
            id: guidGenerator(),
            match: definedMatch,
            word: currentWord,
            definition: null,
            definedDefinition: definitionParagraphs,
            definedTableDetected: tableDetected,
            title: title ? title[0] : "",
            alternativeTerms: initAlternativeTerms(currentWord, paragraphs),
            type: "section",
          });
        }
      }
    });

    return sections;
  };

  return {
    getSections: function (dom) {
      var ps = dom.querySelectorAll("p, tr, h1, h2, h3, h4, h5, h6");
      var paragraphs = [];

      var isToc = false;

      ps.forEach(function (item, idx) {
        if (
          isToc &&
          item.localName != "tr" &&
          (item.textContent.trim() ?? "").length > 5 &&
          item.textContent.trim().toLowerCase() != "section" &&
          item.textContent.trim().toLowerCase() != "table of contents" &&
          item.closest("table") === null
        ) {
          isToc = false;
        }

        if (item.textContent.trim().toLowerCase() === "table of contents") {
          isToc = true;
        }

        paragraphs.push({
          html: (item.outerHTML ?? "")
            .replaceAll("‚Äú", "“")
            .replaceAll("‚Äù", "”")
            .replaceAll("&nbsp;", " ")
            .replaceAll(/[\u202F\u00A0]/g, " ")
            .replaceAll(/\s\s+/g, " "),
          text: (item.textContent.trim() ?? "")
            .replaceAll("‚Äú", "“")
            .replaceAll("‚Äù", "”")
            .replaceAll("&nbsp;", " ")
            .replaceAll(/[\u202F\u00A0]/g, " ")
            .replaceAll(/\s\s+/g, " "),
          isParagraph:
            item.localName == "p" ||
            item.localName == "h1" ||
            item.localName == "h2" ||
            item.localName == "h3" ||
            item.localName == "h4" ||
            item.localName == "h5" ||
            item.localName == "h6",
          isToc: isToc,
        });
      });

      const sections = getTerms(paragraphs);

      return sections;
    },
  };
};

export default sectionsFinder;
