package com.centit.support.office;

import com.centit.support.network.HttpExecutor;
import com.centit.support.office.exception.ExtractorTextException;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PushbackInputStream;
import java.util.Iterator;
import javax.swing.text.BadLocationException;
import javax.swing.text.DefaultStyledDocument;
import javax.swing.text.rtf.RTFEditorKit;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
import org.apache.poi.xdgf.usermodel.XmlVisioDocument;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.poi.xslf.usermodel.XSLFTextParagraph;
import org.apache.poi.xslf.usermodel.XSLFTextRun;
import org.apache.poi.xslf.usermodel.XSLFTextShape;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.htmlparser.Parser;
import org.htmlparser.util.NodeList;
import org.htmlparser.visitors.HtmlPage;

/* loaded from: input_file:com/centit/support/office/FileTextExtractor.class */
public final class FileTextExtractor {
    public static String extractorWord(File file) throws ExtractorTextException, IOException {
        return extractorWord(new FileInputStream(file));
    }

    public static String extractorWord(String str) throws ExtractorTextException, IOException {
        return extractorWord(new File(str));
    }

    public static String extractorWord(InputStream inputStream) throws ExtractorTextException, IOException {
        String str = null;
        if (!inputStream.markSupported()) {
            inputStream = new PushbackInputStream(inputStream, 8);
        }
        if (POIFSFileSystem.hasPOIFSHeader(inputStream)) {
            str = new WordExtractor(new HWPFDocument(inputStream)).getText();
        } else if (POIXMLDocument.hasOOXMLHeader(inputStream)) {
            str = new XWPFWordExtractor(new XWPFDocument(inputStream)).getText();
        }
        if (inputStream != null) {
            inputStream.close();
        }
        System.out.println("Word_Text:\n" + str);
        return str;
    }

    public static String extractorExcel(File file) throws ExtractorTextException, IOException, InvalidFormatException {
        return extractorExcel(new FileInputStream(file));
    }

    public static String extractorExcel(String str) throws ExtractorTextException, IOException, InvalidFormatException {
        return extractorExcel(new File(str));
    }

    public static String extractorExcel(InputStream inputStream) throws ExtractorTextException, IOException, InvalidFormatException {
        String str = null;
        if (!inputStream.markSupported()) {
            inputStream = new PushbackInputStream(inputStream, 8);
        }
        if (POIFSFileSystem.hasPOIFSHeader(inputStream)) {
            ExcelExtractor excelExtractor = new ExcelExtractor(new HSSFWorkbook(inputStream));
            excelExtractor.setFormulasNotResults(true);
            excelExtractor.setIncludeSheetNames(true);
            str = excelExtractor.getText();
        } else if (POIXMLDocument.hasOOXMLHeader(inputStream)) {
            str = new XSSFExcelExtractor(new XSSFWorkbook(OPCPackage.open(inputStream))).getText();
        } else {
            System.out.println("版本不支持");
        }
        if (inputStream != null) {
            inputStream.close();
        }
        System.out.println("Excel_Text:\n" + str);
        return str;
    }

    public static String extractorPpt(File file) throws ExtractorTextException, IOException, InvalidFormatException {
        return extractorPpt(new FileInputStream(file));
    }

    public static String extractorPpt(String str) throws ExtractorTextException, IOException, InvalidFormatException {
        return extractorPpt(new File(str));
    }

    public static String extractorPpt(InputStream inputStream) throws ExtractorTextException, IOException {
        String str = "";
        if (!inputStream.markSupported()) {
            inputStream = new PushbackInputStream(inputStream, 8);
        }
        if (POIFSFileSystem.hasPOIFSHeader(inputStream)) {
            PowerPointExtractor powerPointExtractor = new PowerPointExtractor(inputStream);
            str = powerPointExtractor.getText();
            powerPointExtractor.close();
        } else if (POIXMLDocument.hasOOXMLHeader(inputStream)) {
            Iterator it = new XMLSlideShow(inputStream).getSlides().iterator();
            while (it.hasNext()) {
                for (XSLFTextShape xSLFTextShape : ((XSLFSlide) it.next()).getShapes()) {
                    if (xSLFTextShape instanceof XSLFTextShape) {
                        Iterator it2 = xSLFTextShape.iterator();
                        while (it2.hasNext()) {
                            Iterator it3 = ((XSLFTextParagraph) it2.next()).iterator();
                            while (it3.hasNext()) {
                                str = str + ((XSLFTextRun) it3.next()).getRawText() + "\t";
                            }
                        }
                    }
                }
                str = str + "\n";
            }
        } else {
            System.out.println("版本不支持");
        }
        if (inputStream != null) {
            inputStream.close();
        }
        System.out.println("Ppt_Text:\n" + str);
        return str;
    }

    public static String extractorVisio(File file) throws ExtractorTextException, IOException {
        FileInputStream fileInputStream = new FileInputStream(file);
        String text = new XDGFVisioExtractor(new XmlVisioDocument(fileInputStream)).getText();
        if (fileInputStream != null) {
            fileInputStream.close();
        }
        System.out.println("Visio_Text:\n" + text);
        return text;
    }

    public static String extractorOutLook(File file) throws ExtractorTextException, IOException {
        return extractorOutLook(new FileInputStream(file));
    }

    public static String extractorOutLook(String str) throws ExtractorTextException, IOException {
        return extractorOutLook(new File(str));
    }

    public static String extractorOutLook(InputStream inputStream) throws ExtractorTextException, IOException {
        NPOIFSFileSystem nPOIFSFileSystem = new NPOIFSFileSystem(inputStream);
        OutlookTextExtactor outlookTextExtactor = new OutlookTextExtactor(nPOIFSFileSystem);
        String text = outlookTextExtactor.getText();
        if (outlookTextExtactor != null) {
            outlookTextExtactor.close();
        }
        if (nPOIFSFileSystem != null) {
            nPOIFSFileSystem.close();
        }
        if (inputStream != null) {
            inputStream.close();
        }
        System.out.println("OutLook_Text:\n" + text);
        return text;
    }

    public static String extractorPdf(File file) throws ExtractorTextException, IOException {
        return extractorPdf(new FileInputStream(file));
    }

    public static String extractorPdf(String str) throws ExtractorTextException, IOException {
        return extractorPdf(new File(str));
    }

    public static String extractorPdf(InputStream inputStream) throws ExtractorTextException, IOException {
        String str = "";
        PdfReader pdfReader = new PdfReader(inputStream);
        int numberOfPages = pdfReader.getNumberOfPages();
        for (int i = 1; i <= numberOfPages; i++) {
            str = str + PdfTextExtractor.getTextFromPage(pdfReader, i);
        }
        if (inputStream != null) {
            inputStream.close();
        }
        System.out.println("txt_Text:\n" + str);
        return str;
    }

    public static String extractorTxt(File file) throws ExtractorTextException, IOException {
        return extractorTxt(new FileInputStream(file));
    }

    public static String extractorTxt(String str) throws ExtractorTextException, IOException {
        return extractorTxt(new File(str));
    }

    public static String extractorTxt(InputStream inputStream) throws ExtractorTextException, IOException {
        String str;
        StringBuffer stringBuffer = new StringBuffer("");
        BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
        switch ((bufferedInputStream.read() << 8) + bufferedInputStream.read()) {
            case 23669:
                str = "ASCII";
                break;
            case 61371:
                str = "UTF-8";
                break;
            case 65279:
                str = "UTF-16BE";
                break;
            case 65534:
                str = "Unicode";
                break;
            default:
                str = "GBK";
                break;
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                if (inputStream != null) {
                    inputStream.close();
                }
                System.out.println("txt_Text:\n" + stringBuffer.toString());
                return stringBuffer.toString();
            }
            stringBuffer.append(readLine + "\n");
        }
    }

    public static String extractorRtf(File file) throws ExtractorTextException, IOException, BadLocationException {
        return extractorRtf(new FileInputStream(file));
    }

    public static String extractorRtf(String str) throws ExtractorTextException, IOException, BadLocationException {
        return extractorRtf(new File(str));
    }

    public static String extractorRtf(InputStream inputStream) throws IOException, BadLocationException {
        DefaultStyledDocument defaultStyledDocument = new DefaultStyledDocument();
        new RTFEditorKit().read(inputStream, defaultStyledDocument, 0);
        String text = defaultStyledDocument.getText(0, defaultStyledDocument.getLength());
        if (inputStream != null) {
            inputStream.close();
        }
        System.out.println("txt_Rtf:\n" + text);
        return text;
    }

    public static String extractorHTML(String str) throws ExtractorTextException, IOException {
        StringBuffer stringBuffer = new StringBuffer("");
        try {
            Parser parser = new Parser(str);
            HtmlPage htmlPage = new HtmlPage(parser);
            parser.visitAllNodesWith(htmlPage);
            NodeList body = htmlPage.getBody();
            int size = body.size();
            for (int i = 0; i < size; i++) {
                stringBuffer.append(body.elementAt(i).toPlainTextString());
            }
            System.out.println("test_HTML\n" + stringBuffer.toString());
            return stringBuffer.toString();
        } catch (Exception e) {
            e.getMessage();
            return null;
        }
    }

    public static String getWebUrl(String str) {
        StringBuffer stringBuffer = new StringBuffer("");
        try {
            String simpleGet = HttpExecutor.simpleGet(HttpExecutor.createHttpClient(), str, (String) null);
            Parser parser = new Parser();
            parser.setInputHTML(simpleGet);
            HtmlPage htmlPage = new HtmlPage(parser);
            parser.visitAllNodesWith(htmlPage);
            NodeList body = htmlPage.getBody();
            int size = body.size();
            for (int i = 0; i < size; i++) {
                stringBuffer.append(body.elementAt(i).toPlainTextString());
            }
            return stringBuffer.toString();
        } catch (Exception e) {
            e.getMessage();
            return null;
        }
    }
}
