/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.french.pipeline;

import edu.stanford.nlp.international.french.scripts.MWEPreprocessor;
import edu.stanford.nlp.international.process.AbstractDataset;
import edu.stanford.nlp.international.process.DefaultMapper;
import edu.stanford.nlp.international.process.StringMap;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.international.arabic.ATBTreeUtils;
import edu.stanford.nlp.trees.international.french.FrenchTreeReaderFactory;
import edu.stanford.nlp.trees.tregex.ParseException;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Iterator;

public class FTBDataset
extends AbstractDataset {
    public FTBDataset() {
        this.treebank = new MemoryTreebank(new FrenchTreeReaderFactory(), "ISO8859_1");
        this.treeFileExtension = "xml";
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void build() {
        for (File path : this.pathsToData) {
            int prevSize = this.treebank.size();
            if (this.splitFilter == null) {
                this.treebank.loadPath(path, this.treeFileExtension, false);
            } else {
                this.treebank.loadPath(path, this.splitFilter);
            }
            this.toStringBuffer.append(String.format(" Loaded %d trees from %s\n", this.treebank.size() - prevSize, path.getPath()));
        }
        PrintWriter outfile = null;
        PrintWriter flatFile = null;
        try {
            outfile = new PrintWriter(new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(this.outFileName), "UTF-8")));
            flatFile = this.makeFlatFile ? new PrintWriter(new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(this.flatFileName), "UTF-8"))) : null;
            this.outputFileList.add(this.outFileName);
            if (this.makeFlatFile) {
                this.outputFileList.add(this.flatFileName);
                this.toStringBuffer.append(" Made flat files\n");
            }
            this.preprocessMWEs();
            ArrayList<TregexPattern> badTrees = new ArrayList<TregexPattern>();
            badTrees.add(TregexPattern.compile("@SENT <: @PUNC"));
            badTrees.add(TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC !<3 __"));
            badTrees.add(TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC <3 @PUNC <4 @PUNC !<5 __"));
            for (Tree t : this.treebank) {
                TregexPattern p;
                boolean skipTree = false;
                Iterator i$ = badTrees.iterator();
                while (i$.hasNext() && !(skipTree = (p = (TregexPattern)i$.next()).matcher(t).find())) {
                }
                if (skipTree) {
                    System.err.println("Discarding tree: " + t.toString());
                    continue;
                }
                if (this.customTreeVisitor != null) {
                    this.customTreeVisitor.visitTree(t);
                }
                outfile.println(t.toString());
                if (!this.makeFlatFile) continue;
                String flatString = this.removeEscapeTokens ? ATBTreeUtils.unEscape(ATBTreeUtils.flattenTree(t)) : ATBTreeUtils.flattenTree(t);
                flatFile.println(flatString);
            }
        }
        catch (UnsupportedEncodingException e) {
            System.err.printf("%s: Filesystem does not support UTF-8 output%n", this.getClass().getName());
            e.printStackTrace();
        }
        catch (FileNotFoundException e) {
            System.err.printf("%s: Could not open %s for writing%n", this.getClass().getName(), this.outFileName);
        }
        catch (ParseException e) {
            System.err.printf("%s: Could not compile Tregex expressions%n", this.getClass().getName());
            e.printStackTrace();
        }
        finally {
            if (outfile != null) {
                outfile.close();
            }
            if (flatFile != null) {
                flatFile.close();
            }
        }
    }

    private void preprocessMWEs() {
        TwoDimensionalCounter<String, String> labelTerm = new TwoDimensionalCounter<String, String>();
        TwoDimensionalCounter<String, String> termLabel = new TwoDimensionalCounter<String, String>();
        TwoDimensionalCounter<String, String> labelPreterm = new TwoDimensionalCounter<String, String>();
        TwoDimensionalCounter<String, String> pretermLabel = new TwoDimensionalCounter<String, String>();
        TwoDimensionalCounter<String, String> unigramTagger = new TwoDimensionalCounter<String, String>();
        for (Tree t : this.treebank) {
            MWEPreprocessor.countMWEStatistics(t, unigramTagger, labelPreterm, pretermLabel, labelTerm, termLabel);
        }
        for (Tree t : this.treebank) {
            MWEPreprocessor.traverseAndFix(t, pretermLabel, unigramTagger);
        }
    }

    public boolean setOptions(StringMap opts) {
        boolean ret = super.setOptions(opts);
        if (this.lexMapper == null) {
            this.lexMapper = new DefaultMapper();
            this.lexMapper.setup(null, this.lexMapOptions.split(","));
        }
        if (this.pathsToMappings.size() != 0) {
            if (this.posMapper == null) {
                this.posMapper = new DefaultMapper();
            }
            for (File path : this.pathsToMappings) {
                this.posMapper.setup(path, new String[0]);
            }
        }
        return ret;
    }
}

