import PizZip from 'pizzip';
import { DOMParser } from '@xmldom/xmldom';

export async function parseDocx(file: File) {
  const fileBinary = await file.arrayBuffer();
  const zip = new PizZip(fileBinary);
  const fileRawText = zip.files['word/document.xml'].asText();
  const textDom = createFileDom(fileRawText);
  return getFileParagraphs(textDom);
}

function createFileDom(fileRawText: string) {
  if (fileRawText.charCodeAt(0) === 65279) {
    fileRawText = fileRawText.substring(1);
  }
  const parser = new DOMParser();
  return parser.parseFromString(fileRawText, 'text/xml');
}

function getFileParagraphs(textDom: Document) {
  const paragraphsXml = textDom.getElementsByTagName('w:p');
  const paragraphs: unknown[] = [];

  for (let i = 0, len = paragraphsXml.length; i < len; i++) {
    let paragraphText = '';
    const textsXml =
      paragraphsXml[i].getElementsByTagName('w:t');
    for (let j = 0, len2 = textsXml.length; j < len2; j++) {
      const textXml = textsXml[j];
      if (textXml.childNodes) {
        paragraphText += textXml.childNodes[0].nodeValue;
      }
    }

    paragraphText = paragraphText.trim();
    if (paragraphText.length > 0) {
      paragraphs.push(paragraphText);
    }
  }

  return paragraphs;
}
