package de.hu_berlin.german.korpling.saltnpepper.pepperModules.nlpModules;

import com.neovisionaries.i18n.LanguageCode;
import de.hu_berlin.german.korpling.saltnpepper.pepper.pepperExceptions.PepperModuleNotReadyException;
import de.hu_berlin.german.korpling.saltnpepper.pepper.pepperModules.MAPPING_RESULT;
import de.hu_berlin.german.korpling.saltnpepper.pepper.pepperModules.PepperMapper;
import de.hu_berlin.german.korpling.saltnpepper.pepper.pepperModules.impl.PepperManipulatorImpl;
import de.hu_berlin.german.korpling.saltnpepper.pepper.pepperModules.impl.PepperMapperImpl;
import de.hu_berlin.german.korpling.saltnpepper.pepperModules.nlpModules.exceptions.TokenizerException;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.SDocumentGraph;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.STextualDS;
import de.hu_berlin.german.korpling.saltnpepper.salt.saltCore.SElementId;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.io.FilenameUtils;
import org.osgi.service.component.annotations.Component;

@Component(name = "TokenizerComponent", factory = "PepperManipulatorComponentFactory")
/* loaded from: input_file:de/hu_berlin/german/korpling/saltnpepper/pepperModules/nlpModules/Tokenizer.class */
public class Tokenizer extends PepperManipulatorImpl {
    private Map<LanguageCode, HashSet<String>> abbreviationMap = null;

    /* loaded from: input_file:de/hu_berlin/german/korpling/saltnpepper/pepperModules/nlpModules/Tokenizer$TokenizerMapper.class */
    private class TokenizerMapper extends PepperMapperImpl {
        private TokenizerMapper() {
        }

        public MAPPING_RESULT mapSDocument() {
            SDocumentGraph sDocumentGraph = getSDocument().getSDocumentGraph();
            if (sDocumentGraph != null) {
                de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.tokenizer.Tokenizer createTokenizer = sDocumentGraph.createTokenizer();
                if (Tokenizer.this.abbreviationMap != null) {
                    for (LanguageCode languageCode : Tokenizer.this.abbreviationMap.keySet()) {
                        createTokenizer.addAbbreviation(languageCode, (HashSet) Tokenizer.this.abbreviationMap.get(languageCode));
                    }
                }
                if (sDocumentGraph.getSTextualDSs() != null && sDocumentGraph.getSTextualDSs().size() > 0) {
                    Iterator it = sDocumentGraph.getSTextualDSs().iterator();
                    while (it.hasNext()) {
                        createTokenizer.tokenize((STextualDS) it.next());
                    }
                }
            }
            return MAPPING_RESULT.FINISHED;
        }
    }

    public Tokenizer() {
        this.name = "Tokenizer";
        setProperties(new TokenizerProperties());
    }

    public boolean isReadyToStart() throws PepperModuleNotReadyException {
        if (((TokenizerProperties) getProperties()).getAbbreviationFolder() == null) {
            return true;
        }
        loadAbbFolder();
        return true;
    }

    private void loadAbbFolder() {
        File[] listFiles = ((TokenizerProperties) getProperties()).getAbbreviationFolder().listFiles();
        if (listFiles != null) {
            for (File file : listFiles) {
                LanguageCode valueOf = LanguageCode.valueOf(FilenameUtils.getExtension(file.getName()));
                if (valueOf != null) {
                    if (this.abbreviationMap == null) {
                        this.abbreviationMap = new ConcurrentHashMap();
                    }
                    try {
                        HashSet<String> hashSet = new HashSet<>();
                        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file.getAbsolutePath()), "UTF8"));
                        while (true) {
                            String readLine = bufferedReader.readLine();
                            if (readLine == null) {
                                break;
                            } else {
                                hashSet.add(readLine);
                            }
                        }
                        bufferedReader.close();
                        this.abbreviationMap.put(valueOf, hashSet);
                    } catch (FileNotFoundException e) {
                        throw new TokenizerException("Cannot tokenize the given text, because the file for abbreviation '" + file.getAbsolutePath() + "' was not found.");
                    } catch (IOException e2) {
                        throw new TokenizerException("Cannot tokenize the given text, because can not read file '" + file.getAbsolutePath() + "'.");
                    }
                }
            }
        }
    }

    public PepperMapper createPepperMapper(SElementId sElementId) {
        return new TokenizerMapper();
    }
}
