/**
 * @typedef {Object} PostLinkData
 * @property {String} url The URL of the link
 * @property {String} [label] The label of the link (taken from the markdown or the og:title meta tag)
 */

// Should match only the links that are not inside a markdown link (note that the links must start with a http or https protocol)
const RAW_LINKS_REGEX = /(?<url>https?:\/\/[^\s]+[\w])/gim;

// Should match only the links that are inside a markdown link (protocol agnostic)
const MARKDOWN_LINKS_REGEX = /\[(?<label>.*)\] ?\((?<url>.*)\)/gim;

/**
 * Extracts links from a markdown string
 * @param { string } markdown
 * @returns { PostLink[] } An array of PostLink objects
 */
export function extractLinksFromMarkdown(markdown) {
  if (!markdown) {
    return [];
  }

  // Find the links in the markdown using the regex
  let rawLinks = [...markdown.matchAll(RAW_LINKS_REGEX)] || [];
  let markdownLinks = [...markdown.matchAll(MARKDOWN_LINKS_REGEX)] || [];

  // rawLinks needs to be filtered, as it will also contain the markdown links
  //
  // With matchAll we can calculate the bounds of each
  rawLinks = rawLinks.filter((raw) =>
    markdownLinks.some((md) => raw.index < md.index && raw.index + raw.length > md.index + md.length)
  );

  // Organize into objects
  rawLinks = rawLinks.map((link) => ({ url: link.groups?.url?.replace(/&nbsp/gi, "") }));

  // We need to filter out the markdown links that are valid, but which should not have a link box
  // So we filter out any link with a protocol that isn't http or https
  markdownLinks = markdownLinks.filter((link) => {
    const url = link.groups?.url;

    if (url.indexOf(":") === -1) {
      return true;
    }

    return url.startsWith("http:") || url.startsWith("https:");
  });

  markdownLinks = markdownLinks.map((link) => ({
    url: link.groups?.url?.indexOf(":") !== -1 ? link.groups.url : `https://${link.groups.url}`,
    label: link.groups?.label,
  }));

  return rawLinks.concat(markdownLinks);
}
