import nlp from 'compromise';
import compromiseSentences from 'compromise-sentences';

nlp.extend(compromiseSentences);

function getBlocks(htmlString: string): string[] {
  const div = document.createElement('div');
  div.insertAdjacentHTML('beforeend', htmlString);
  return Array.from(div.querySelectorAll('p,ul,li,table'))
    .filter(t => t.textContent !== '')
    .map(t => t.outerHTML);
}
function stripTags(htmlString: string): string {
  return htmlString.replace(/(<([^>]+)>)/gi, '');
}
function getSentences(block: string): string[] {
  // doesn't work in safari
  // return block.split(/(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s/);
  const doc = nlp(block);
  const res: string[] = [];
  doc.sentences().forEach((sentence: any) => {
    res.push(sentence.text());
  });
  return res;
}

export interface Block {
  originalContent;
  sentences: string[];
}

export function getStructure(content: string): Block[] {
  const blocks = getBlocks(content);
  const newStructure: Block[] = [];
  for (const taggedBlock of blocks) {
    const block = stripTags(taggedBlock);
    const sentences = getSentences(block);
    newStructure.push({ originalContent: taggedBlock, sentences });
  }
  return newStructure;
}
