const articlesFinder = function (opts) {
  const guidGenerator = function () {
    var S4 = function () {
      return (((1 + Math.random()) * 0x10000) | 0).toString(16).substring(1);
    };
    return (
      S4() +
      S4() +
      "-" +
      S4() +
      "-" +
      S4() +
      "-" +
      S4() +
      "-" +
      S4() +
      S4() +
      S4()
    );
  };

  function getBreakSectionRegex() {
    const regexTerms = [
      /^(article (\S*))(\s?[a-z][^\.]{,64}\.)?/,
      /^(exhibit (\S*))(\s?[a-z][^\.]{,64}\.)?/,
      /^(annex (\S*))(\s?[a-z][^\.]{,64}\.)?/,
      /^(schedule (\S*))(\s?[a-z][^\.]{,64}\.)?/,
      /^(section (\S*))(\s?[a-z][^\.]{,64}\.)?/,
      /^(\d{1,2}\.?([\d]{1,2}\.?)*)(\s?[a-z][^\.]{,64}\.)?/,
    ];

    const regex = new RegExp(regexTerms.map((r) => r.source).join("|"), "gi");
    return regex;
  }

  function getSigPageRegex() {
    const regexTerms = [/(signature page (\S*))(\s?[a-z][^\.]{,64}\.)?/];

    const regex = new RegExp(regexTerms.map((r) => r.source).join("|"), "gi");
    return regex;
  }

  function getTitleRegex() {
    const regexTerms = [/(article (\S*))(\s?[a-z][^\.]{,64}\.)?([^.]+)/];

    const regex = new RegExp(regexTerms.map((r) => r.source).join("|"), "gi");
    return regex;
  }

  function getWordRegex() {
    const regexTerms = [/(article (\S*)[^,;.\s])(\s?[a-z][^\.]{,64}\.)?/];

    const regex = new RegExp(regexTerms.map((r) => r.source).join("|"), "gi");
    return regex;
  }

  function getDefinedRegEx() {
    const regexTerms = [
      /^([\d.\t\s]*)?((article (\S*))(\s?[a-z][^\.]{,64}\.)?)[^%]*$/,
    ];

    const regex = new RegExp(regexTerms.map((r) => r.source).join("|"), "gi");
    return regex;
  }



  function getEquivalentTerms(term, paragraphs) {
    var t = [];

    t.push(term);

    return t;
  }

  function initAlternativeTerms(word, paragraphs) {
    var equivalentTerms = getEquivalentTerms(word, paragraphs);

    var t = [];

    for (var i = 0; i < equivalentTerms.length; i++) {
      const obj = {
        word: equivalentTerms[i],
        match: equivalentTerms[i],
        id: guidGenerator(),
      };

      t.push(obj);
    }

    return t;
  }

  function isInToC(term, paragraphs) {
    const equivalentTerms = getEquivalentTerms(term, paragraphs);

    equivalentTerms.push(term);

    return paragraphs
      .filter((r) => r.isToc)
      .filter((r) => equivalentTerms.indexOf(r.text.toUpperCase()) > -1)[0];
  }

  function checkToc(paragraphs) {
    //return paragraphs.filter((r) => r.isToc).length > 0;
    return false;
  }

  function getTerms(paragraphs) {
    const definedRegEx = getDefinedRegEx();
    const wordRegEx = getWordRegex();
    const titleRegex = getTitleRegex();
    const sectionRegex = /^(?:Section\s+)?(\d{1,2}\.\d{1,2})(?:\s+([^.]+))?/i;

    let articles = [];

    paragraphs.forEach((paragraph, index) => {
      if (paragraph.text === "") return;
      if (!paragraph.isParagraph) return;

      const definedMatch = paragraph.text.match(definedRegEx);

      if (definedMatch !== null) {
        const word = definedMatch[0].match(wordRegEx);
        const title = paragraph.text.match(titleRegex);

        if (word == null) return;

        const inToc = isInToC(word[0], paragraphs);

        if (checkToc(paragraphs) && !inToc) return;

        let tableDetected = false;
        let currentWord = word[0].replace('"', "");
        currentWord = currentWord.replace('"', "");
        currentWord = currentWord.replaceAll('"', "");

        const definitionParagraphs = [];
        definitionParagraphs.push(paragraph);

        let paragraphEndFound = false;
        let count = 0;
        let nextParagraphIndex = index;
        let endOnNext = false;

        while (!paragraphEndFound && count < 50) {
          count += 1;
          nextParagraphIndex += 1;

          if (endOnNext) {
            paragraphEndFound = true;
            continue;
          }

          if (!paragraphs[nextParagraphIndex]) {
            paragraphEndFound = true;
            continue;
          }

          if (paragraphs[nextParagraphIndex].text === "") {
            continue;
          }

          if (paragraphs[nextParagraphIndex].tableNestingLevel > 0) {
            tableDetected = true;
            continue;
          }

          //is this needed?
          if (paragraphs[nextParagraphIndex].text.match(getDefinedRegEx())) {
            paragraphEndFound = true;
            continue;
          }

          if (paragraphs[nextParagraphIndex].text.match(getSigPageRegex())) {
            paragraphEndFound = true;
            continue;
          }

          if (
            paragraphs[nextParagraphIndex].text.match(getBreakSectionRegex())
          ) {
            paragraphEndFound = true;
            continue;
          }

          if (
            paragraphs[nextParagraphIndex].text.slice(-1) === "." &&
            paragraphs[nextParagraphIndex].isParagraph
          ) {
            paragraphEndFound = true;
          }

          definitionParagraphs.push(paragraphs[nextParagraphIndex]);
        }

        let i = articles.length;
        let found = false;

        while (i--) {
          const article = articles[i];

          if (article.word.toUpperCase() === currentWord.toUpperCase()) {
            for (var j = 0; j < definitionParagraphs.length; j++) {
              article.definedDefinition.push(definitionParagraphs[j]);
            }
            found = true;
          }
        }

        // Use a Set to collect unique sections
        const sectionsSet = new Set();
        let searchIndex = index;
        while (searchIndex < paragraphs.length) {
          const para = paragraphs[searchIndex];
          if (!para) break;
          
          // Stop if we hit another article
          if (searchIndex > index && para.text.match(definedRegEx)) break;
          
          const text = para.text.trim();
          const sectionMatch = text.match(sectionRegex);
          
          if (sectionMatch) {
            const sectionText = text.startsWith('Section') ? text.split(' ').slice(0, 2).join(' ') : `Section ${sectionMatch[1]}`;
            sectionsSet.add(sectionText);
          }
          searchIndex++;
        }

        if (!found) {
          articles.push({
            id: guidGenerator(),
            match: definedMatch,
            word: currentWord,
            definition: null,
            definedDefinition: definitionParagraphs,
            definedTableDetected: false,
            title: inToc ? (title ? title[0] : "") : currentWord,
            alternativeTerms: initAlternativeTerms(currentWord, paragraphs),
            type: "article",
            sections: Array.from(sectionsSet), // Convert Set back to array with unique values
          });
        }
      }
    });

    return articles;
  }

  return {
    getArticles: function (dom) {
      var ps = dom.querySelectorAll("p, td, h1, h2, h3, h4, h5, h6");
      var paragraphs = [];
      var isToc = false;
      var tableNestingLevel = 0;

      ps.forEach(function (item, idx) {
        if (item.closest("table")) {
          tableNestingLevel = item.closest("table").getElementsByTagName("table").length;
        } else {
          tableNestingLevel = 0;
        }

        if (
          isToc &&
          item.localName != "td" &&
          item.textContent.trim().toLowerCase() != "section" &&
          item.textContent.trim() != "Section" &&
          (item.textContent.trim() ?? "").length > 5 &&
          item.textContent.trim().toLowerCase() != "table of contents" &&
          item.closest("table") === null
        ) {
          isToc = false;
        }

        if (item.textContent.trim().toLowerCase() === "table of contents") {
          isToc = true;
        }

        paragraphs.push({
          html: (item.outerHTML ?? "")
            .replaceAll("‚Äú", "")
            .replaceAll("‚Äù", "")
            .replaceAll("&nbsp;", " ")
            .replaceAll(/[\u202F\u00A0]/g, " ")
            .replaceAll(/\s\s+/g, " "),
          text: (item.textContent.trim() ?? "")
            .replaceAll("‚Äú", "")
            .replaceAll("‚Äù", "")
            .replaceAll("&nbsp;", " ")
            .replaceAll(/[\u202F\u00A0]/g, " ")
            .replaceAll(/\s\s+/g, " "),
          isParagraph:
            item.localName == "p" ||
            item.localName == "h1" ||
            item.localName == "h2" ||
            item.localName == "h3" ||
            item.localName == "h4" ||
            item.localName == "h5" ||
            item.localName == "h6",
          isToc: isToc,
          tableNestingLevel: tableNestingLevel,
        });
      });

      const articles = getTerms(paragraphs);

  
      return articles;
    },
  };
};

export default articlesFinder;
