/*
 * Decompiled with CFR 0.152.
 */
package io.ballerina.stdlib.ai;

import io.ballerina.runtime.api.Module;
import io.ballerina.runtime.api.creators.TypeCreator;
import io.ballerina.runtime.api.creators.ValueCreator;
import io.ballerina.runtime.api.types.RecordType;
import io.ballerina.runtime.api.utils.StringUtils;
import io.ballerina.runtime.api.values.BMap;
import io.ballerina.runtime.api.values.BString;
import io.ballerina.stdlib.ai.ModuleUtils;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.apache.tika.parser.pdf.PDFParser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class TextDataLoader {
    private static final String TYPE_FIELD = "type";
    private static final String METADATA_FIELD = "metadata";
    private static final String CONTENT_FIELD = "content";
    private static final String MIME_TYPE_FIELD = "mimeType";
    private static final String FILE_NAME_FIELD = "fileName";
    private static final String DEFAULT_MIME_TYPE = "application/octet-stream";
    private static final String TEXT_DOCUMENT_TYPE = "text";
    private static final String TEXT_DOCUMENT_RECORD = "TextDocument";
    private static final String X_TIKA_PREFIX = "x-tika";
    private static final int UNLIMITED_CONTENT_SIZE = -1;
    private static final String MIME_TYPE_PDF = "application/pdf";
    private static final String MIME_TYPE_DOCX = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
    private static final String MIME_TYPE_PPTX = "application/vnd.openxmlformats-officedocument.presentationml.presentation";
    private static final String MIME_TYPE_DOC = "application/msword";
    private static final String MIME_TYPE_PPT = "application/vnd.ms-powerpoint";
    private static final String EXTENSION_PDF = "pdf";
    private static final String EXTENSION_DOCX = "docx";
    private static final String EXTENSION_PPTX = "pptx";
    private static final String EXTENSION_DOC = "doc";
    private static final String EXTENSION_PPT = "ppt";

    public static Object readPdf(BString filePath) {
        String path = filePath.getValue();
        try {
            TextDocumentInfo docInfo = TextDataLoader.parsePDF(path);
            return docInfo.toBallerinaTextDocument();
        }
        catch (IOException | TikaException | SAXException e) {
            return ModuleUtils.createError("Error reading document: " + e.getMessage());
        }
        catch (RuntimeException e) {
            return ModuleUtils.createError("Unexpected error: " + e.getMessage());
        }
    }

    public static Object readDocx(BString filePath) {
        String path = filePath.getValue();
        try {
            TextDocumentInfo docInfo = TextDataLoader.parseOfficeX(path, FileType.DOCX);
            return docInfo.toBallerinaTextDocument();
        }
        catch (IOException | TikaException | SAXException e) {
            return ModuleUtils.createError("Error reading document: " + e.getMessage());
        }
        catch (RuntimeException e) {
            return ModuleUtils.createError("Unexpected error: " + e.getMessage());
        }
    }

    public static Object readPptx(BString filePath) {
        String path = filePath.getValue();
        try {
            TextDocumentInfo docInfo = TextDataLoader.parseOfficeX(path, FileType.PPTX);
            return docInfo.toBallerinaTextDocument();
        }
        catch (IOException | TikaException | SAXException e) {
            return ModuleUtils.createError("Error reading document: " + e.getMessage());
        }
        catch (RuntimeException e) {
            return ModuleUtils.createError("Unexpected error: " + e.getMessage());
        }
    }

    static TextDocumentInfo parsePDF(String path) throws IOException, TikaException, SAXException {
        try (FileInputStream inputStream = new FileInputStream(path);){
            PDFParser parser = new PDFParser();
            BodyContentHandler handler = new BodyContentHandler(-1);
            Metadata metadata = new Metadata();
            ParseContext context = new ParseContext();
            parser.parse((InputStream)inputStream, (ContentHandler)handler, metadata, context);
            String content = handler.toString();
            TextDocumentInfo textDocumentInfo = TextDocumentInfo.fromPdf(content, TextDataLoader.extractMetadata(metadata), TextDataLoader.getFileName(path));
            return textDocumentInfo;
        }
    }

    static TextDocumentInfo parseOfficeX(String path, FileType fileType) throws IOException, TikaException, SAXException {
        try (FileInputStream inputStream = new FileInputStream(path);){
            OOXMLParser parser = new OOXMLParser();
            BodyContentHandler handler = new BodyContentHandler(-1);
            Metadata metadata = new Metadata();
            ParseContext context = new ParseContext();
            parser.parse((InputStream)inputStream, (ContentHandler)handler, metadata, context);
            String content = handler.toString();
            TextDocumentInfo textDocumentInfo = switch (fileType.ordinal()) {
                default -> throw new MatchException(null, null);
                case 0 -> TextDocumentInfo.fromDocx(content, TextDataLoader.extractMetadata(metadata), TextDataLoader.getFileName(path));
                case 1 -> TextDocumentInfo.fromPptx(content, TextDataLoader.extractMetadata(metadata), TextDataLoader.getFileName(path));
            };
            return textDocumentInfo;
        }
    }

    private static String getFileName(String filePath) {
        if (filePath == null || filePath.isBlank()) {
            return null;
        }
        Path path = Paths.get(filePath, new String[0]);
        Path fileNamePath = path.getFileName();
        if (fileNamePath != null) {
            return fileNamePath.toString();
        }
        return null;
    }

    static String parseOfficeX(String path) {
        String string;
        FileInputStream inputStream = new FileInputStream(path);
        try {
            OOXMLParser parser = new OOXMLParser();
            BodyContentHandler handler = new BodyContentHandler(-1);
            Metadata metadata = new Metadata();
            ParseContext context = new ParseContext();
            parser.parse((InputStream)inputStream, (ContentHandler)handler, metadata, context);
            string = handler.toString();
        }
        catch (Throwable throwable) {
            try {
                try {
                    ((InputStream)inputStream).close();
                }
                catch (Throwable throwable2) {
                    throwable.addSuppressed(throwable2);
                }
                throw throwable;
            }
            catch (IOException | TikaException | SAXException e) {
                throw new RuntimeException(e);
            }
        }
        ((InputStream)inputStream).close();
        return string;
    }

    static Map<String, String> extractMetadata(Metadata metadata) {
        HashMap<String, String> metadataMap = new HashMap<String, String>();
        for (String name : metadata.names()) {
            CharSequence[] values;
            if (name != null && name.toLowerCase(Locale.ENGLISH).startsWith(X_TIKA_PREFIX) || (values = metadata.getValues(name)) == null || values.length <= 0) continue;
            String value = values.length == 1 ? values[0] : String.join((CharSequence)"; ", values);
            metadataMap.put(name, value);
        }
        return metadataMap;
    }

    record TextDocumentInfo(String mimeType, String fileName, Map<String, String> metadata, String content) {
        static TextDocumentInfo fromPdf(String content, Map<String, String> metadata, String fileName) {
            return new TextDocumentInfo(TextDataLoader.MIME_TYPE_PDF, fileName, metadata, content);
        }

        static TextDocumentInfo fromDocx(String content, Map<String, String> metadata, String fileName) {
            return new TextDocumentInfo(TextDataLoader.MIME_TYPE_DOCX, fileName, metadata, content);
        }

        static TextDocumentInfo fromPptx(String content, Map<String, String> metadata, String fileName) {
            return new TextDocumentInfo(TextDataLoader.MIME_TYPE_PPTX, fileName, metadata, content);
        }

        BMap<BString, Object> toBallerinaTextDocument() {
            RecordType resultRecordType = TypeCreator.createRecordType((String)TextDataLoader.TEXT_DOCUMENT_RECORD, (Module)ModuleUtils.getModule(), (long)0L, (boolean)false, (int)0);
            BMap textDocument = ValueCreator.createRecordValue((RecordType)resultRecordType);
            textDocument.put((Object)StringUtils.fromString((String)TextDataLoader.TYPE_FIELD), (Object)StringUtils.fromString((String)TextDataLoader.TEXT_DOCUMENT_TYPE));
            textDocument.put((Object)StringUtils.fromString((String)TextDataLoader.CONTENT_FIELD), (Object)StringUtils.fromString((String)this.content()));
            BMap metadataMap = ValueCreator.createMapValue();
            metadataMap.put((Object)StringUtils.fromString((String)TextDataLoader.MIME_TYPE_FIELD), (Object)StringUtils.fromString((String)(this.mimeType() != null ? this.mimeType() : TextDataLoader.DEFAULT_MIME_TYPE)));
            metadataMap.put((Object)StringUtils.fromString((String)TextDataLoader.FILE_NAME_FIELD), (Object)StringUtils.fromString((String)this.fileName()));
            for (Map.Entry<String, String> entry : this.metadata().entrySet()) {
                metadataMap.put((Object)StringUtils.fromString((String)entry.getKey()), (Object)StringUtils.fromString((String)entry.getValue()));
            }
            textDocument.put((Object)StringUtils.fromString((String)TextDataLoader.METADATA_FIELD), (Object)metadataMap);
            return textDocument;
        }
    }

    static enum FileType {
        DOCX,
        PPTX;

    }
}

