package org.apache.tika.parser.rtf;

import com.centit.framework.common.ResponseData;
import com.ctc.wstx.io.CharsetNames;
import com.drew.metadata.exif.makernotes.LeicaMakernoteDirectory;
import com.google.zxing.common.StringUtils;
import com.lowagie.text.ElementTags;
import com.mysql.cj.CharsetMapping;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.Calendar;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Locale;
import java.util.Map;
import java.util.Stack;
import java.util.TimeZone;
import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.apache.poi.openxml4j.opc.ContentTypes;
import org.apache.poi.util.CodePageUtil;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.utils.CharsetUtils;
import org.apache.xmpbox.type.ResourceRefType;
import org.xml.sax.SAXException;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:WEB-INF/lib/tika-parsers-1.28.4.jar:org/apache/tika/parser/rtf/TextExtractor.class */
public final class TextExtractor {
    private static final char SPACE = ' ';
    private static final String P = "p";
    private static final String LI = "li";
    private static final String OL = "ol";
    private static final String UL = "ul";
    private static final Charset ASCII;
    private static final Charset WINDOWS_1252;
    private static final Charset MAC_ROMAN;
    private static final Charset SHIFT_JIS;
    private static final Charset WINDOWS_57011;
    private static final Charset WINDOWS_57010;
    private static final Charset WINDOWS_57009;
    private static final Charset WINDOWS_57008;
    private static final Charset WINDOWS_57007;
    private static final Charset WINDOWS_57006;
    private static final Charset WINDOWS_57005;
    private static final Charset WINDOWS_57004;
    private static final Charset WINDOWS_57003;
    private static final Charset X_ISCII91;
    private static final Charset X_MAC_CENTRAL_EUROPE;
    private static final Charset MAC_CYRILLIC;
    private static final Charset X_JOHAB;
    private static final Charset CP12582;
    private static final Charset CP12572;
    private static final Charset CP12562;
    private static final Charset CP12552;
    private static final Charset CP12542;
    private static final Charset CP12532;
    private static final Charset CP1252;
    private static final Charset CP12512;
    private static final Charset CP12502;
    private static final Charset CP950;
    private static final Charset CP949;
    private static final Charset MS9362;
    private static final Charset MS8742;
    private static final Charset CP866;
    private static final Charset CP865;
    private static final Charset CP864;
    private static final Charset CP863;
    private static final Charset CP862;
    private static final Charset CP860;
    private static final Charset CP852;
    private static final Charset CP8502;
    private static final Charset CP819;
    private static final Charset WINDOWS_720;
    private static final Charset WINDOWS_711;
    private static final Charset WINDOWS_710;
    private static final Charset WINDOWS_709;
    private static final Charset ISO_8859_6;
    private static final Charset CP4372;
    private static final Charset CP850;
    private static final Charset CP437;
    private static final Charset MS874;
    private static final Charset CP1257;
    private static final Charset CP1256;
    private static final Charset CP1255;
    private static final Charset CP1258;
    private static final Charset CP1254;
    private static final Charset CP1253;
    private static final Charset MS950;
    private static final Charset MS936;
    private static final Charset MS1361;
    private static final Charset MS932;
    private static final Charset CP1251;
    private static final Charset CP1250;
    private static final Charset MAC_THAI;
    private static final Charset MAC_TURKISH;
    private static final Charset MAC_GREEK;
    private static final Charset MAC_ARABIC;
    private static final Charset MAC_HEBREW;
    private static final Charset JOHAB;
    private static final Charset BIG5;
    private static final Charset GB2312;
    private static final Charset MS949;
    private static final Map<Integer, Charset> FCHARSET_MAP;
    private static final Map<Integer, Charset> ANSICPG_MAP;
    private final XHTMLContentHandler out;
    private final Metadata metadata;
    private final RTFEmbObjHandler embObjHandler;
    private int pendingByteCount;
    private int pendingCharCount;
    private int pendingControlCount;
    private CharsetDecoder decoder;
    private Charset lastCharset;
    private int fontTableDepth;
    private Property nextMetaData;
    private boolean inParagraph;
    private int fieldState;
    private int pendingListEnd;
    private Map<Integer, ListDescriptor> currentListTable;
    private ListDescriptor currentList;
    private boolean ignoreListMarkup;
    private String pendingURL;
    private int year;
    private int month;
    private int day;
    private int hour;
    private int minute;
    static final /* synthetic */ boolean $assertionsDisabled;
    private final char[] outputArray = new char[128];
    private final Buffer outputCharBuffer = CharBuffer.wrap(this.outputArray);
    private final Map<Integer, Charset> fontToCharset = new HashMap();
    private final LinkedList<GroupState> groupStates = new LinkedList<>();
    private final StringBuilder pendingBuffer = new StringBuilder();
    int ansiSkip = 0;
    private int written = 0;
    private byte[] pendingBytes = new byte[16];
    private Buffer pendingByteBuffer = ByteBuffer.wrap(this.pendingBytes);
    private char[] pendingChars = new char[10];
    private byte[] pendingControl = new byte[10];
    private Charset globalCharset = WINDOWS_1252;
    private int globalDefaultFont = -1;
    private int curFontID = -1;
    private GroupState groupState = new GroupState();
    private boolean inHeader = true;
    private int fontTableState = 0;
    private Map<Integer, ListDescriptor> listTable = new HashMap();
    private Map<Integer, ListDescriptor> listOverrideTable = new HashMap();
    private int listTableLevel = -1;
    private int uprState = -1;
    private Stack<String> paragraphStack = new Stack<>();
    private int maxStackSize = 1000;

    public TextExtractor(XHTMLContentHandler xHTMLContentHandler, Metadata metadata, RTFEmbObjHandler rTFEmbObjHandler) {
        this.metadata = metadata;
        this.out = xHTMLContentHandler;
        this.embObjHandler = rTFEmbObjHandler;
    }

    private static Charset getCharset(String str) {
        try {
            return CharsetUtils.forName(str);
        } catch (IllegalArgumentException e) {
            return ASCII;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static boolean isHexChar(int i) {
        return (i >= 48 && i <= 57) || (i >= 97 && i <= 102) || (i >= 65 && i <= 70);
    }

    private static boolean isAlpha(int i) {
        return (i >= 97 && i <= 122) || (i >= 65 && i <= 90);
    }

    private static boolean isDigit(int i) {
        return i >= 48 && i <= 57;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static int hexValue(int i) {
        if (i >= 48 && i <= 57) {
            return i - 48;
        }
        if (i >= 97 && i <= 122) {
            return 10 + (i - 97);
        }
        if ($assertionsDisabled || (i >= 65 && i <= 90)) {
            return 10 + (i - 65);
        }
        throw new AssertionError();
    }

    public boolean isIgnoringLists() {
        return this.ignoreListMarkup;
    }

    public void setIgnoreListMarkup(boolean z) {
        this.ignoreListMarkup = z;
    }

    private void pushText() throws IOException, SAXException, TikaException {
        if (this.pendingByteCount == 0) {
            pushChars();
        } else {
            if (!$assertionsDisabled && this.pendingCharCount != 0) {
                throw new AssertionError();
            }
            pushBytes();
        }
    }

    private void addOutputByte(int i) throws IOException, SAXException, TikaException {
        if (!$assertionsDisabled && (i < 0 || i >= 256)) {
            throw new AssertionError("byte value out of range: " + i);
        }
        if (this.pendingCharCount != 0) {
            pushChars();
        }
        if (this.groupState.pictDepth > 0) {
            this.embObjHandler.writeMetadataChar((char) i);
            return;
        }
        if (this.pendingByteCount == this.pendingBytes.length) {
            byte[] bArr = new byte[(int) (this.pendingBytes.length * 1.25d)];
            System.arraycopy(this.pendingBytes, 0, bArr, 0, this.pendingBytes.length);
            this.pendingBytes = bArr;
            this.pendingByteBuffer = ByteBuffer.wrap(this.pendingBytes);
        }
        byte[] bArr2 = this.pendingBytes;
        int i2 = this.pendingByteCount;
        this.pendingByteCount = i2 + 1;
        bArr2[i2] = (byte) i;
    }

    private void addControl(int i) {
        if (!$assertionsDisabled && !isAlpha(i)) {
            throw new AssertionError();
        }
        if (this.pendingControlCount == this.pendingControl.length) {
            byte[] bArr = new byte[(int) (this.pendingControl.length * 1.25d)];
            System.arraycopy(this.pendingControl, 0, bArr, 0, this.pendingControl.length);
            this.pendingControl = bArr;
        }
        byte[] bArr2 = this.pendingControl;
        int i2 = this.pendingControlCount;
        this.pendingControlCount = i2 + 1;
        bArr2[i2] = (byte) i;
    }

    private void addOutputChar(char c) throws IOException, SAXException, TikaException {
        if (this.pendingByteCount != 0) {
            pushBytes();
        }
        if (this.inHeader || this.fieldState == 1) {
            this.pendingBuffer.append(c);
            return;
        }
        if (this.groupState.sn || this.groupState.sv) {
            this.embObjHandler.writeMetadataChar(c);
            return;
        }
        if (this.pendingCharCount == this.pendingChars.length) {
            char[] cArr = new char[(int) (this.pendingChars.length * 1.25d)];
            System.arraycopy(this.pendingChars, 0, cArr, 0, this.pendingChars.length);
            this.pendingChars = cArr;
        }
        char[] cArr2 = this.pendingChars;
        int i = this.pendingCharCount;
        this.pendingCharCount = i + 1;
        cArr2[i] = c;
    }

    public void extract(InputStream inputStream) throws IOException, SAXException, TikaException {
        extract(new PushbackInputStream(inputStream, 2));
    }

    private void extract(PushbackInputStream pushbackInputStream) throws IOException, SAXException, TikaException {
        this.out.startDocument();
        while (true) {
            int read = pushbackInputStream.read();
            if (read == -1) {
                break;
            }
            if (read == 92) {
                parseControlToken(pushbackInputStream);
            } else if (read == 123) {
                pushText();
                processGroupStart(pushbackInputStream);
            } else if (read == 125) {
                pushText();
                processGroupEnd();
                if (this.groupStates.isEmpty()) {
                    break;
                }
            } else if (this.groupState.objdata || this.groupState.pictDepth == 1) {
                this.embObjHandler.writeHexChar(read);
            } else if (read != 13 && read != 10 && (!this.groupState.ignore || this.nextMetaData != null || this.groupState.sn || this.groupState.sv)) {
                if (this.ansiSkip != 0) {
                    this.ansiSkip--;
                } else {
                    addOutputByte(read);
                }
            }
        }
        endParagraph(false);
        while (this.paragraphStack.size() > 0) {
            end(this.paragraphStack.pop());
        }
        this.out.endDocument();
    }

    private void parseControlToken(PushbackInputStream pushbackInputStream) throws IOException, SAXException, TikaException {
        int read = pushbackInputStream.read();
        if (read == 39) {
            parseHexChar(pushbackInputStream);
            return;
        }
        if (isAlpha(read)) {
            parseControlWord((char) read, pushbackInputStream);
            return;
        }
        if (read == 123 || read == 125 || read == 92 || read == 13 || read == 10) {
            addOutputByte(read);
        } else if (read != -1) {
            processControlSymbol((char) read);
        }
    }

    private void parseHexChar(PushbackInputStream pushbackInputStream) throws IOException, SAXException, TikaException {
        int read = pushbackInputStream.read();
        if (!isHexChar(read)) {
            pushbackInputStream.unread(read);
            return;
        }
        int read2 = pushbackInputStream.read();
        if (!isHexChar(read2)) {
            pushbackInputStream.unread(read2);
        } else if (this.ansiSkip != 0) {
            this.ansiSkip--;
        } else {
            addOutputByte((16 * hexValue(read)) + hexValue(read2));
        }
    }

    private void parseControlWord(int i, PushbackInputStream pushbackInputStream) throws IOException, SAXException, TikaException {
        int i2;
        addControl(i);
        int read = pushbackInputStream.read();
        while (true) {
            i2 = read;
            if (!isAlpha(i2)) {
                break;
            }
            addControl(i2);
            read = pushbackInputStream.read();
        }
        boolean z = false;
        boolean z2 = false;
        if (i2 == 45) {
            z2 = true;
            z = true;
            i2 = pushbackInputStream.read();
        }
        int i3 = 0;
        while (isDigit(i2)) {
            i3 = (i3 * 10) + (i2 - 48);
            z = true;
            i2 = pushbackInputStream.read();
        }
        if (i2 != 32) {
            pushbackInputStream.unread(i2);
        }
        if (z) {
            if (z2) {
                i3 = -i3;
            }
            processControlWord(i3, pushbackInputStream);
        } else {
            processControlWord();
        }
        this.pendingControlCount = 0;
    }

    private void lazyStartParagraph() throws IOException, SAXException, TikaException {
        boolean z = this.inParagraph;
        if (this.paragraphStack.size() > 0 && this.paragraphStack.contains("p")) {
            z = true;
        }
        if (z) {
            return;
        }
        if (this.groupState.italic) {
            end("i");
        }
        if (this.groupState.bold) {
            end("b");
        }
        if (this.pendingListEnd != 0 && this.groupState.list != this.pendingListEnd) {
            endList(this.pendingListEnd);
            this.pendingListEnd = 0;
        }
        if (inList() && this.pendingListEnd != this.groupState.list) {
            startList(this.groupState.list);
        }
        if (inList()) {
            start("li");
            pushParagraphTag("li");
        } else {
            start("p");
            pushParagraphTag("p");
        }
        if (this.groupState.bold) {
            start("b");
        }
        if (this.groupState.italic) {
            start("i");
        }
        this.inParagraph = true;
    }

    private void pushParagraphTag(String str) {
        if (this.paragraphStack.size() < this.maxStackSize) {
            this.paragraphStack.push(str);
        }
    }

    private void endParagraph(boolean z) throws IOException, SAXException, TikaException {
        pushText();
        if (!this.inParagraph) {
            lazyStartParagraph();
        }
        if (this.inParagraph || this.paragraphStack.size() > 0) {
            if (this.groupState.italic) {
                end("i");
                this.groupState.italic = z;
            }
            if (this.groupState.bold) {
                end("b");
                this.groupState.bold = z;
            }
            boolean z2 = false;
            if (inList()) {
                if (this.paragraphStack.size() > 0) {
                    String pop = this.paragraphStack.pop();
                    if (pop.equals("li")) {
                        end("li");
                    } else {
                        pushParagraphTag(pop);
                        z2 = true;
                    }
                }
            } else if (this.paragraphStack.size() > 0) {
                String pop2 = this.paragraphStack.pop();
                if ("p".equals(pop2)) {
                    end("p");
                } else {
                    pushParagraphTag(pop2);
                    z2 = true;
                }
            }
            if (z2) {
                while (this.paragraphStack.size() > 0) {
                    end(this.paragraphStack.pop());
                }
            }
            if (z && (this.groupState.bold || this.groupState.italic)) {
                start("p");
                pushParagraphTag("p");
                if (this.groupState.bold) {
                    start("b");
                }
                if (this.groupState.italic) {
                    start("i");
                }
                this.inParagraph = true;
            } else {
                this.inParagraph = false;
            }
        }
        if (z || this.pendingListEnd == 0) {
            return;
        }
        endList(this.pendingListEnd);
        this.pendingListEnd = 0;
    }

    private void pushChars() throws IOException, SAXException, TikaException {
        if (this.pendingCharCount != 0) {
            lazyStartParagraph();
            this.out.characters(this.pendingChars, 0, this.pendingCharCount);
            this.pendingCharCount = 0;
        }
    }

    private void pushBytes() throws IOException, SAXException, TikaException {
        CoderResult decode;
        CoderResult flush;
        if (this.pendingByteCount > 0 && (!this.groupState.ignore || this.nextMetaData != null)) {
            CharsetDecoder decoder = getDecoder();
            this.pendingByteBuffer.limit(this.pendingByteCount);
            if (!$assertionsDisabled && this.pendingByteBuffer.position() != 0) {
                throw new AssertionError();
            }
            if (!$assertionsDisabled && this.outputCharBuffer.position() != 0) {
                throw new AssertionError();
            }
            do {
                decode = decoder.decode((ByteBuffer) this.pendingByteBuffer, (CharBuffer) this.outputCharBuffer, true);
                int position = this.outputCharBuffer.position();
                if (position > 0) {
                    if (this.inHeader || this.fieldState == 1) {
                        this.pendingBuffer.append(this.outputArray, 0, position);
                    } else {
                        lazyStartParagraph();
                        this.out.characters(this.outputArray, 0, position);
                    }
                    this.outputCharBuffer.position(0);
                }
            } while (decode != CoderResult.UNDERFLOW);
            do {
                flush = decoder.flush((CharBuffer) this.outputCharBuffer);
                int position2 = this.outputCharBuffer.position();
                if (position2 > 0) {
                    if (this.inHeader || this.fieldState == 1) {
                        this.pendingBuffer.append(this.outputArray, 0, position2);
                    } else {
                        lazyStartParagraph();
                        this.out.characters(this.outputArray, 0, position2);
                    }
                    this.outputCharBuffer.position(0);
                }
            } while (flush != CoderResult.UNDERFLOW);
            decoder.reset();
            this.pendingByteBuffer.position(0);
        }
        this.pendingByteCount = 0;
    }

    private boolean equals(String str) {
        if (this.pendingControlCount != str.length()) {
            return false;
        }
        for (int i = 0; i < this.pendingControlCount; i++) {
            if (!$assertionsDisabled && !isAlpha(str.charAt(i))) {
                throw new AssertionError();
            }
            if (((byte) str.charAt(i)) != this.pendingControl[i]) {
                return false;
            }
        }
        return true;
    }

    private void processControlSymbol(char c) throws IOException, SAXException, TikaException {
        switch (c) {
            case '*':
            default:
                return;
            case '-':
                addOutputChar((char) 173);
                return;
            case '_':
                addOutputChar((char) 8209);
                return;
            case '~':
                addOutputChar((char) 160);
                return;
        }
    }

    private CharsetDecoder getDecoder() throws TikaException {
        Charset charset = getCharset();
        if (this.lastCharset == null || !charset.equals(this.lastCharset)) {
            this.decoder = charset.newDecoder();
            this.decoder.onMalformedInput(CodingErrorAction.REPLACE);
            this.decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
            this.lastCharset = charset;
        }
        return this.decoder;
    }

    private Charset getCharset() throws TikaException {
        Charset charset;
        if (this.groupState.fontCharset != null) {
            return this.groupState.fontCharset;
        }
        if (this.globalDefaultFont != -1 && !this.inHeader && (charset = this.fontToCharset.get(Integer.valueOf(this.globalDefaultFont))) != null) {
            return charset;
        }
        if (this.globalCharset == null) {
            throw new TikaException("unable to determine charset");
        }
        return this.globalCharset;
    }

    private void processControlWord(int i, PushbackInputStream pushbackInputStream) throws IOException, SAXException, TikaException {
        Charset charset;
        if (this.inHeader) {
            if (equals("ansicpg")) {
                Charset charset2 = ANSICPG_MAP.get(Integer.valueOf(i));
                if (charset2 != null) {
                    this.globalCharset = charset2;
                }
            } else if (equals("deff")) {
                this.globalDefaultFont = i;
            } else if (equals("nofpages")) {
                this.metadata.add(Office.PAGE_COUNT, Integer.toString(i));
            } else if (equals("nofwords")) {
                this.metadata.add(Office.WORD_COUNT, Integer.toString(i));
            } else if (equals("nofchars")) {
                this.metadata.add(Office.CHARACTER_COUNT, Integer.toString(i));
            } else if (equals("yr")) {
                this.year = i;
            } else if (equals("mo")) {
                this.month = i;
            } else if (equals("dy")) {
                this.day = i;
            } else if (equals("hr")) {
                this.hour = i;
            } else if (equals("min")) {
                this.minute = i;
            }
            if (this.fontTableState == 1) {
                if (this.groupState.depth < this.fontTableDepth) {
                    this.fontTableState = 2;
                } else if (equals(OperatorName.FILL_NON_ZERO)) {
                    this.curFontID = i;
                } else if (equals("fcharset") && (charset = FCHARSET_MAP.get(Integer.valueOf(i))) != null) {
                    this.fontToCharset.put(Integer.valueOf(this.curFontID), charset);
                }
            }
            if (this.fontTableState == 2 && !this.groupState.ignore && equals(OperatorName.FILL_NON_ZERO)) {
                this.inHeader = false;
            }
            if (this.currentList != null) {
                if (equals("listid")) {
                    this.currentList.id = i;
                    this.currentListTable.put(Integer.valueOf(this.currentList.id), this.currentList);
                } else if (equals("listtemplateid")) {
                    this.currentList.templateID = i;
                } else if ((equals("levelnfc") || equals("levelnfcn")) && this.listTableLevel > -1 && this.listTableLevel < this.currentList.numberType.length) {
                    this.currentList.numberType[this.listTableLevel] = i;
                }
            }
        } else if (equals("b")) {
            if (!$assertionsDisabled && i != 0) {
                throw new AssertionError();
            }
            if (this.groupState.bold) {
                pushText();
                if (this.groupState.italic) {
                    end("i");
                }
                end("b");
                if (this.groupState.italic) {
                    start("i");
                }
                this.groupState.bold = false;
            }
        } else if (equals("i")) {
            if (!$assertionsDisabled && i != 0) {
                throw new AssertionError();
            }
            if (this.groupState.italic) {
                pushText();
                end("i");
                this.groupState.italic = false;
            }
        } else if (equals(OperatorName.FILL_NON_ZERO)) {
            Charset charset3 = this.fontToCharset.get(Integer.valueOf(i));
            pushText();
            if (charset3 != null) {
                this.groupState.fontCharset = charset3;
            } else {
                this.groupState.fontCharset = null;
            }
        } else if (equals("ls")) {
            this.groupState.list = i;
        } else if (equals("lslvl")) {
            this.groupState.listLevel = i;
        }
        if (equals("u")) {
            if (!this.groupState.ignore || this.groupState.sv || this.groupState.sn) {
                addOutputChar((char) (i & 65535));
            }
            this.ansiSkip = this.groupState.ucSkip;
            return;
        }
        if (equals("uc")) {
            this.groupState.ucSkip = i;
            return;
        }
        if (!equals("bin") || i < 0) {
            return;
        }
        if (this.groupState.pictDepth != 1) {
            IOUtils.skipFully(pushbackInputStream, i);
            return;
        }
        try {
            this.embObjHandler.writeBytes(pushbackInputStream, i);
        } catch (IOException | TikaException e) {
            EmbeddedDocumentUtil.recordEmbeddedStreamException(e, this.metadata);
            this.embObjHandler.reset();
        }
    }

    private boolean inList() {
        return (this.ignoreListMarkup || this.groupState.list == 0) ? false : true;
    }

    private void pendingListEnd() {
        this.pendingListEnd = this.groupState.list;
        this.groupState.list = 0;
    }

    private void endList(int i) throws IOException, SAXException, TikaException {
        if (this.ignoreListMarkup) {
            return;
        }
        String str = isUnorderedList(i) ? "ul" : "ol";
        if (this.paragraphStack.size() <= 0 || !str.equals(this.paragraphStack.pop())) {
            return;
        }
        end(str);
    }

    private void startList(int i) throws IOException, SAXException, TikaException {
        if (this.ignoreListMarkup) {
            return;
        }
        String str = isUnorderedList(i) ? "ul" : "ol";
        start(str);
        pushParagraphTag(str);
    }

    private boolean isUnorderedList(int i) {
        ListDescriptor listDescriptor = this.listTable.get(Integer.valueOf(i));
        if (listDescriptor != null) {
            return listDescriptor.isUnordered(this.groupState.listLevel);
        }
        return true;
    }

    private void end(String str) throws IOException, SAXException, TikaException {
        this.out.endElement(str);
    }

    private void start(String str) throws IOException, SAXException, TikaException {
        this.out.startElement(str);
    }

    private void processControlWord() throws IOException, SAXException, TikaException {
        if (this.inHeader) {
            if (equals("ansi")) {
                this.globalCharset = WINDOWS_1252;
            } else if (equals("pca")) {
                this.globalCharset = CP850;
            } else if (equals("pc")) {
                this.globalCharset = CP437;
            } else if (equals("mac")) {
                this.globalCharset = MAC_ROMAN;
            }
            if (equals("colortbl") || equals("stylesheet") || equals("fonttbl")) {
                this.groupState.ignore = true;
            } else if (equals("listtable")) {
                this.currentListTable = this.listTable;
            } else if (equals("listoverridetable")) {
                this.currentListTable = this.listOverrideTable;
            }
            if (this.uprState == -1) {
                if (equals("author")) {
                    this.nextMetaData = TikaCoreProperties.CREATOR;
                } else if (equals("title")) {
                    this.nextMetaData = TikaCoreProperties.TITLE;
                } else if (equals("subject")) {
                    this.nextMetaData = TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT;
                } else if (equals("keywords")) {
                    this.nextMetaData = TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT;
                } else if (equals("category")) {
                    this.nextMetaData = OfficeOpenXMLCore.CATEGORY;
                } else if (equals("comment")) {
                    this.nextMetaData = TikaCoreProperties.COMMENTS;
                } else if (equals("company")) {
                    this.nextMetaData = OfficeOpenXMLExtended.COMPANY;
                } else if (equals(ResourceRefType.MANAGER)) {
                    this.nextMetaData = OfficeOpenXMLExtended.MANAGER;
                } else if (equals("template")) {
                    this.nextMetaData = OfficeOpenXMLExtended.TEMPLATE;
                } else if (equals("creatim")) {
                    this.nextMetaData = TikaCoreProperties.CREATED;
                }
            }
            if (this.fontTableState == 0) {
                if (equals("fonttbl")) {
                    this.fontTableState = 1;
                    this.fontTableDepth = this.groupState.depth;
                }
            } else if (this.fontTableState == 1 && this.groupState.depth < this.fontTableDepth) {
                this.fontTableState = 2;
            }
            if (this.currentListTable != null) {
                if (equals("list") || equals("listoverride")) {
                    this.currentList = new ListDescriptor();
                    this.listTableLevel = -1;
                } else if (this.currentList != null) {
                    if (equals("liststylename")) {
                        this.currentList.isStyle = true;
                    } else if (equals("listlevel")) {
                        this.listTableLevel++;
                    }
                }
            }
            if (!this.groupState.ignore && (equals("par") || equals("pard") || equals("sect") || equals("sectd") || equals("plain") || equals("ltrch") || equals("rtlch") || equals("htmlrtf") || equals("line"))) {
                this.inHeader = false;
            }
        } else if (equals("b")) {
            if (!this.groupState.bold) {
                pushText();
                lazyStartParagraph();
                if (this.groupState.italic) {
                    end("i");
                }
                this.groupState.bold = true;
                start("b");
                if (this.groupState.italic) {
                    start("i");
                }
            }
        } else if (equals("i") && !this.groupState.italic) {
            pushText();
            lazyStartParagraph();
            this.groupState.italic = true;
            start("i");
        }
        boolean z = this.groupState.ignore;
        if (equals("pard")) {
            pushText();
            if (this.groupState.italic) {
                end("i");
                this.groupState.italic = false;
            }
            if (this.groupState.bold) {
                end("b");
                this.groupState.bold = false;
            }
            if (inList()) {
                pendingListEnd();
                return;
            }
            return;
        }
        if (equals("plain")) {
            if (this.groupState.italic || this.groupState.bold) {
                pushText();
                if (this.groupState.italic) {
                    end("i");
                    this.groupState.italic = false;
                }
                if (this.groupState.bold) {
                    end("b");
                    this.groupState.bold = false;
                    return;
                }
                return;
            }
            return;
        }
        if (equals("par")) {
            if (z) {
                return;
            }
            endParagraph(true);
            if (inList()) {
                pendingListEnd();
                return;
            }
            return;
        }
        if (equals("shptxt")) {
            pushText();
            this.groupState.ignore = false;
            return;
        }
        if (equals("chatn")) {
            addOutputChar(' ');
            pushText();
            this.groupState.ignore = false;
            return;
        }
        if (equals("atnid")) {
            addOutputChar(' ');
            pushText();
            this.groupState.ignore = false;
            return;
        }
        if (equals("atnauthor")) {
            addOutputChar(' ');
            pushText();
            this.groupState.ignore = false;
            return;
        }
        if (equals(ElementTags.ANNOTATION)) {
            this.groupState.annotation = true;
            pushText();
            this.groupState.ignore = false;
            return;
        }
        if (equals("listtext")) {
            this.groupState.ignore = true;
            return;
        }
        if (equals("cell")) {
            endParagraph(true);
            return;
        }
        if (equals("sp")) {
            this.groupState.sp = true;
            return;
        }
        if (equals("sn")) {
            this.embObjHandler.startSN();
            this.groupState.sn = true;
            return;
        }
        if (equals("sv")) {
            this.embObjHandler.startSV();
            this.groupState.sv = true;
            return;
        }
        if (equals("object")) {
            pushText();
            this.embObjHandler.setInObject(true);
            this.groupState.object = true;
            return;
        }
        if (equals("objdata")) {
            this.groupState.objdata = true;
            this.embObjHandler.startObjData();
            return;
        }
        if (equals(ContentTypes.EXTENSION_PICT)) {
            pushText();
            this.groupState.pictDepth = 1;
            this.embObjHandler.startPict();
            return;
        }
        if (equals("line")) {
            if (z) {
                return;
            }
            addOutputChar('\n');
            return;
        }
        if (equals("column")) {
            if (z) {
                return;
            }
            addOutputChar(' ');
            return;
        }
        if (equals("page")) {
            if (z) {
                return;
            }
            addOutputChar('\n');
            return;
        }
        if (equals("softline")) {
            if (z) {
                return;
            }
            addOutputChar('\n');
            return;
        }
        if (equals("softcolumn")) {
            if (z) {
                return;
            }
            addOutputChar(' ');
            return;
        }
        if (equals("softpage")) {
            if (z) {
                return;
            }
            addOutputChar('\n');
            return;
        }
        if (equals("tab")) {
            if (z) {
                return;
            }
            addOutputChar('\t');
            return;
        }
        if (equals("upr")) {
            this.uprState = 0;
            return;
        }
        if (equals("ud") && this.uprState == 1) {
            this.uprState = -1;
            this.groupState.ignore = false;
            return;
        }
        if (equals("bullet")) {
            if (z) {
                return;
            }
            addOutputChar((char) 8226);
            return;
        }
        if (equals("endash")) {
            if (z) {
                return;
            }
            addOutputChar((char) 8211);
            return;
        }
        if (equals("emdash")) {
            if (z) {
                return;
            }
            addOutputChar((char) 8212);
            return;
        }
        if (equals("enspace")) {
            if (z) {
                return;
            }
            addOutputChar((char) 8194);
            return;
        }
        if (equals("qmspace")) {
            if (z) {
                return;
            }
            addOutputChar((char) 8197);
            return;
        }
        if (equals("emspace")) {
            if (z) {
                return;
            }
            addOutputChar((char) 8195);
            return;
        }
        if (equals("lquote")) {
            if (z) {
                return;
            }
            addOutputChar((char) 8216);
            return;
        }
        if (equals("rquote")) {
            if (z) {
                return;
            }
            addOutputChar((char) 8217);
            return;
        }
        if (equals("ldblquote")) {
            if (z) {
                return;
            }
            addOutputChar((char) 8220);
            return;
        }
        if (equals("rdblquote")) {
            if (z) {
                return;
            }
            addOutputChar((char) 8221);
            return;
        }
        if (equals("fldinst")) {
            this.fieldState = 1;
            this.groupState.ignore = false;
            return;
        }
        if (equals("fldrslt") && this.fieldState == 2) {
            if (!$assertionsDisabled && this.pendingURL == null) {
                throw new AssertionError();
            }
            lazyStartParagraph();
            this.out.startElement("a", "href", this.pendingURL);
            this.pendingURL = null;
            this.fieldState = 3;
            this.groupState.ignore = false;
        }
    }

    private void processGroupStart(PushbackInputStream pushbackInputStream) throws IOException {
        this.ansiSkip = 0;
        this.groupStates.add(this.groupState);
        this.groupState = new GroupState(this.groupState);
        if (!$assertionsDisabled && this.groupStates.size() != this.groupState.depth) {
            throw new AssertionError("size=" + this.groupStates.size() + " depth=" + this.groupState.depth);
        }
        if (this.uprState == 0) {
            this.uprState = 1;
            this.groupState.ignore = true;
        }
        int read = pushbackInputStream.read();
        if (read == 92) {
            int read2 = pushbackInputStream.read();
            if (read2 == 42) {
                this.groupState.ignore = true;
            }
            pushbackInputStream.unread(read2);
        }
        pushbackInputStream.unread(read);
    }

    private void processGroupEnd() throws IOException, SAXException, TikaException {
        int indexOf;
        if (this.inHeader) {
            if (this.nextMetaData != null) {
                if (this.nextMetaData == TikaCoreProperties.CREATED) {
                    Calendar calendar = Calendar.getInstance(TimeZone.getDefault(), Locale.ROOT);
                    calendar.set(this.year, this.month - 1, this.day, this.hour, this.minute, 0);
                    this.metadata.set(this.nextMetaData, calendar.getTime());
                } else if (this.nextMetaData.isMultiValuePermitted()) {
                    this.metadata.add(this.nextMetaData, this.pendingBuffer.toString());
                } else {
                    this.metadata.set(this.nextMetaData, this.pendingBuffer.toString());
                }
                this.nextMetaData = null;
            }
            this.pendingBuffer.setLength(0);
        }
        if (!$assertionsDisabled && this.groupState.depth <= 0) {
            throw new AssertionError();
        }
        this.ansiSkip = 0;
        if (this.groupState.objdata) {
            try {
                this.embObjHandler.handleCompletedObject();
            } catch (IOException | TikaException e) {
                EmbeddedDocumentUtil.recordException(e, this.metadata);
            }
            this.groupState.objdata = false;
        } else if (this.groupState.pictDepth > 0) {
            if (this.groupState.sn) {
                this.embObjHandler.endSN();
            } else if (this.groupState.sv) {
                this.embObjHandler.endSV();
            } else if (this.groupState.sp) {
                this.embObjHandler.endSP();
            } else if (this.groupState.pictDepth == 1) {
                this.embObjHandler.handleCompletedObject();
            }
        }
        if (this.groupState.annotation) {
            addOutputChar(' ');
        }
        if (this.groupState.object) {
            this.embObjHandler.setInObject(false);
        }
        if (this.groupStates.size() > 0) {
            GroupState removeLast = this.groupStates.removeLast();
            if (this.groupState.italic && (!removeLast.italic || this.groupState.bold != removeLast.bold)) {
                end("i");
                this.groupState.italic = false;
            }
            if (this.groupState.bold && !removeLast.bold) {
                end("b");
            }
            if (!this.groupState.bold && removeLast.bold) {
                start("b");
            }
            if (!this.groupState.italic && removeLast.italic) {
                start("i");
            }
            this.groupState = removeLast;
        }
        if (!$assertionsDisabled && this.groupStates.size() != this.groupState.depth) {
            throw new AssertionError();
        }
        if (this.fieldState != 1) {
            if (this.fieldState == 3) {
                end("a");
                this.fieldState = 0;
                return;
            }
            return;
        }
        String trim = this.pendingBuffer.toString().trim();
        this.pendingBuffer.setLength(0);
        if (!trim.startsWith("HYPERLINK")) {
            this.fieldState = 0;
            return;
        }
        String trim2 = trim.substring(9).trim();
        boolean contains = trim2.contains("\\l ");
        int indexOf2 = trim2.indexOf(34);
        if (indexOf2 != -1 && (indexOf = trim2.indexOf(34, 1 + indexOf2)) != -1) {
            trim2 = trim2.substring(1 + indexOf2, indexOf);
        }
        this.pendingURL = (contains ? "#" : "") + trim2;
        this.fieldState = 2;
    }

    static {
        $assertionsDisabled = !TextExtractor.class.desiredAssertionStatus();
        ASCII = Charset.forName("US-ASCII");
        WINDOWS_1252 = getCharset("WINDOWS-1252");
        MAC_ROMAN = getCharset("MacRoman");
        SHIFT_JIS = getCharset(CharsetNames.CS_SHIFT_JIS);
        WINDOWS_57011 = getCharset("windows-57011");
        WINDOWS_57010 = getCharset("windows-57010");
        WINDOWS_57009 = getCharset("windows-57009");
        WINDOWS_57008 = getCharset("windows-57008");
        WINDOWS_57007 = getCharset("windows-57007");
        WINDOWS_57006 = getCharset("windows-57006");
        WINDOWS_57005 = getCharset("windows-57005");
        WINDOWS_57004 = getCharset("windows-57004");
        WINDOWS_57003 = getCharset("windows-57003");
        X_ISCII91 = getCharset("x-ISCII91");
        X_MAC_CENTRAL_EUROPE = getCharset("x-MacCentralEurope");
        MAC_CYRILLIC = getCharset("MacCyrillic");
        X_JOHAB = getCharset("x-Johab");
        CP12582 = getCharset("CP1258");
        CP12572 = getCharset("CP1257");
        CP12562 = getCharset("CP1256");
        CP12552 = getCharset("CP1255");
        CP12542 = getCharset("CP1254");
        CP12532 = getCharset("CP1253");
        CP1252 = getCharset("CP1252");
        CP12512 = getCharset("CP1251");
        CP12502 = getCharset("CP1250");
        CP950 = getCharset("CP950");
        CP949 = getCharset("CP949");
        MS9362 = getCharset("MS936");
        MS8742 = getCharset("MS874");
        CP866 = getCharset("CP866");
        CP865 = getCharset("CP865");
        CP864 = getCharset("CP864");
        CP863 = getCharset("CP863");
        CP862 = getCharset("CP862");
        CP860 = getCharset("CP860");
        CP852 = getCharset("CP852");
        CP8502 = getCharset("CP850");
        CP819 = getCharset("CP819");
        WINDOWS_720 = getCharset("windows-720");
        WINDOWS_711 = getCharset("windows-711");
        WINDOWS_710 = getCharset("windows-710");
        WINDOWS_709 = getCharset("windows-709");
        ISO_8859_6 = getCharset("ISO-8859-6");
        CP4372 = getCharset("CP437");
        CP850 = getCharset(CharsetMapping.MYSQL_CHARSET_NAME_cp850);
        CP437 = getCharset("cp437");
        MS874 = getCharset("ms874");
        CP1257 = getCharset(CharsetMapping.MYSQL_CHARSET_NAME_cp1257);
        CP1256 = getCharset(CharsetMapping.MYSQL_CHARSET_NAME_cp1256);
        CP1255 = getCharset("cp1255");
        CP1258 = getCharset("cp1258");
        CP1254 = getCharset("cp1254");
        CP1253 = getCharset("cp1253");
        MS950 = getCharset("ms950");
        MS936 = getCharset("ms936");
        MS1361 = getCharset("ms1361");
        MS932 = getCharset("MS932");
        CP1251 = getCharset(CharsetMapping.MYSQL_CHARSET_NAME_cp1251);
        CP1250 = getCharset(CharsetMapping.MYSQL_CHARSET_NAME_cp1250);
        MAC_THAI = getCharset("MacThai");
        MAC_TURKISH = getCharset("MacTurkish");
        MAC_GREEK = getCharset("MacGreek");
        MAC_ARABIC = getCharset("MacArabic");
        MAC_HEBREW = getCharset("MacHebrew");
        JOHAB = getCharset("johab");
        BIG5 = getCharset("Big5");
        GB2312 = getCharset(StringUtils.GB2312);
        MS949 = getCharset("ms949");
        FCHARSET_MAP = new HashMap();
        ANSICPG_MAP = new HashMap();
        FCHARSET_MAP.put(0, WINDOWS_1252);
        FCHARSET_MAP.put(77, MAC_ROMAN);
        FCHARSET_MAP.put(78, SHIFT_JIS);
        FCHARSET_MAP.put(79, MS949);
        FCHARSET_MAP.put(80, GB2312);
        FCHARSET_MAP.put(81, BIG5);
        FCHARSET_MAP.put(82, JOHAB);
        FCHARSET_MAP.put(83, MAC_HEBREW);
        FCHARSET_MAP.put(84, MAC_ARABIC);
        FCHARSET_MAP.put(85, MAC_GREEK);
        FCHARSET_MAP.put(86, MAC_TURKISH);
        FCHARSET_MAP.put(87, MAC_THAI);
        FCHARSET_MAP.put(88, CP1250);
        FCHARSET_MAP.put(89, CP1251);
        FCHARSET_MAP.put(128, MS932);
        FCHARSET_MAP.put(129, MS949);
        FCHARSET_MAP.put(130, MS1361);
        FCHARSET_MAP.put(134, MS936);
        FCHARSET_MAP.put(136, MS950);
        FCHARSET_MAP.put(161, CP1253);
        FCHARSET_MAP.put(162, CP1254);
        FCHARSET_MAP.put(163, CP1258);
        FCHARSET_MAP.put(177, CP1255);
        FCHARSET_MAP.put(178, CP1256);
        FCHARSET_MAP.put(186, CP1257);
        FCHARSET_MAP.put(204, CP1251);
        FCHARSET_MAP.put(222, MS874);
        FCHARSET_MAP.put(238, CP1250);
        FCHARSET_MAP.put(254, CP437);
        FCHARSET_MAP.put(255, CP850);
        ANSICPG_MAP.put(437, CP4372);
        ANSICPG_MAP.put(Integer.valueOf(ResponseData.ERROR_METHOD_DISABLED), ISO_8859_6);
        ANSICPG_MAP.put(Integer.valueOf(ResponseData.ERROR_SESSION_TIMEOUT), WINDOWS_709);
        ANSICPG_MAP.put(Integer.valueOf(ResponseData.ERROR_USER_NOTFOUND), WINDOWS_710);
        ANSICPG_MAP.put(Integer.valueOf(ResponseData.ERROR_USER_NOTFOUND), WINDOWS_711);
        ANSICPG_MAP.put(Integer.valueOf(ResponseData.ERROR_USER_NOTFOUND), WINDOWS_720);
        ANSICPG_MAP.put(Integer.valueOf(LeicaMakernoteDirectory.TAG_M16_C_VERSION), CP819);
        ANSICPG_MAP.put(Integer.valueOf(LeicaMakernoteDirectory.TAG_M16_C_VERSION), CP819);
        ANSICPG_MAP.put(Integer.valueOf(LeicaMakernoteDirectory.TAG_M16_C_VERSION), CP819);
        ANSICPG_MAP.put(850, CP8502);
        ANSICPG_MAP.put(852, CP852);
        ANSICPG_MAP.put(860, CP860);
        ANSICPG_MAP.put(862, CP862);
        ANSICPG_MAP.put(863, CP863);
        ANSICPG_MAP.put(864, CP864);
        ANSICPG_MAP.put(865, CP865);
        ANSICPG_MAP.put(866, CP866);
        ANSICPG_MAP.put(874, MS8742);
        ANSICPG_MAP.put(Integer.valueOf(CodePageUtil.CP_SJIS), MS932);
        ANSICPG_MAP.put(Integer.valueOf(CodePageUtil.CP_GBK), MS9362);
        ANSICPG_MAP.put(Integer.valueOf(CodePageUtil.CP_MS949), CP949);
        ANSICPG_MAP.put(950, CP950);
        ANSICPG_MAP.put(1250, CP12502);
        ANSICPG_MAP.put(1251, CP12512);
        ANSICPG_MAP.put(1252, CP1252);
        ANSICPG_MAP.put(1253, CP12532);
        ANSICPG_MAP.put(1254, CP12542);
        ANSICPG_MAP.put(1255, CP12552);
        ANSICPG_MAP.put(1256, CP12562);
        ANSICPG_MAP.put(1257, CP12572);
        ANSICPG_MAP.put(1258, CP12582);
        ANSICPG_MAP.put(1361, X_JOHAB);
        ANSICPG_MAP.put(10000, MAC_ROMAN);
        ANSICPG_MAP.put(10001, SHIFT_JIS);
        ANSICPG_MAP.put(10004, MAC_ARABIC);
        ANSICPG_MAP.put(10005, MAC_HEBREW);
        ANSICPG_MAP.put(10006, MAC_GREEK);
        ANSICPG_MAP.put(10007, MAC_CYRILLIC);
        ANSICPG_MAP.put(Integer.valueOf(CodePageUtil.CP_MAC_CENTRAL_EUROPE), X_MAC_CENTRAL_EUROPE);
        ANSICPG_MAP.put(Integer.valueOf(CodePageUtil.CP_MAC_TURKISH), MAC_TURKISH);
        ANSICPG_MAP.put(57002, X_ISCII91);
        ANSICPG_MAP.put(57003, WINDOWS_57003);
        ANSICPG_MAP.put(57004, WINDOWS_57004);
        ANSICPG_MAP.put(57005, WINDOWS_57005);
        ANSICPG_MAP.put(57006, WINDOWS_57006);
        ANSICPG_MAP.put(57007, WINDOWS_57007);
        ANSICPG_MAP.put(57008, WINDOWS_57008);
        ANSICPG_MAP.put(57009, WINDOWS_57009);
        ANSICPG_MAP.put(57010, WINDOWS_57010);
        ANSICPG_MAP.put(57011, WINDOWS_57011);
    }
}
