/*
 * Decompiled with CFR 0.152.
 */
package io.ballerina.stdlib.ai;

import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentSplitter;
import dev.langchain4j.data.document.Metadata;
import dev.langchain4j.data.document.splitter.DocumentByCharacterSplitter;
import dev.langchain4j.data.document.splitter.DocumentByLineSplitter;
import dev.langchain4j.data.document.splitter.DocumentByParagraphSplitter;
import dev.langchain4j.data.document.splitter.DocumentBySentenceSplitter;
import dev.langchain4j.data.document.splitter.DocumentByWordSplitter;
import dev.langchain4j.data.segment.TextSegment;
import io.ballerina.runtime.api.Module;
import io.ballerina.runtime.api.creators.TypeCreator;
import io.ballerina.runtime.api.creators.ValueCreator;
import io.ballerina.runtime.api.types.ArrayType;
import io.ballerina.runtime.api.types.Type;
import io.ballerina.runtime.api.utils.StringUtils;
import io.ballerina.runtime.api.values.BArray;
import io.ballerina.runtime.api.values.BError;
import io.ballerina.runtime.api.values.BMap;
import io.ballerina.runtime.api.values.BString;
import io.ballerina.runtime.api.values.BTypedesc;
import io.ballerina.stdlib.ai.ChunkStrategy;
import io.ballerina.stdlib.ai.HtmlChunker;
import io.ballerina.stdlib.ai.MarkdownChunker;
import io.ballerina.stdlib.ai.ModuleUtils;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;

public class Chunkers {
    private static final String TEXT_CHUNK_RECORD_TYPE_NAME = "TextChunk";
    private static final String META_DATA_RECORD_TYPE_NAME = "Metadata";
    private static final String CONTENT_FIELD_NAME = "content";
    private static final String METADATA_FIELD_NAME = "metadata";
    private static final String INDEX_FIELD_NAME = "index";
    private static final String ID_FIELD_NAME = "id";
    private static final String PREV_FIELD_NAME = "prev";
    private static final Set<String> INTEGER_FIELDS = Set.of("index", "id", "prev");

    public static Object chunkTextDocument(BMap<BString, Object> document, int chunkSize, int maxOverlapSize, BString chunkStrategy, BTypedesc textChunkType) {
        try {
            String content = document.getStringValue(StringUtils.fromString((String)CONTENT_FIELD_NAME)).getValue();
            Document inputDocument = Document.from((String)content);
            DocumentSplitter splitter = Chunkers.getDocumentSplitter(chunkStrategy, chunkSize, maxOverlapSize);
            List textSegments = splitter.split(inputDocument);
            return Chunkers.createTextChunkRecordArray(document, textSegments, textChunkType.getDescribingType());
        }
        catch (RuntimeException e) {
            return Chunkers.handleChunkingErrors(e);
        }
    }

    public static Object chunkMarkdownDocument(BMap<BString, Object> document, int chunkSize, int maxOverlapSize, BString chunkStrategy, BTypedesc textChunkType) {
        try {
            String content = document.getStringValue(StringUtils.fromString((String)CONTENT_FIELD_NAME)).getValue();
            MarkdownChunker.MarkdownChunkStrategy strategy = switch (chunkStrategy.getValue()) {
                case "MARKDOWN_HEADER" -> MarkdownChunker.MarkdownChunkStrategy.BY_HEADER;
                case "CODE_BLOCK" -> MarkdownChunker.MarkdownChunkStrategy.BY_CODE_BLOCK;
                case "HORIZONTAL_LINE" -> MarkdownChunker.MarkdownChunkStrategy.BY_HORIZONTAL_LINE;
                case "PARAGRAPH" -> MarkdownChunker.MarkdownChunkStrategy.BY_PARAGRAPH;
                case "LINE" -> MarkdownChunker.MarkdownChunkStrategy.BY_LINE;
                case "SENTENCE" -> MarkdownChunker.MarkdownChunkStrategy.BY_SENTENCE;
                case "WORD" -> MarkdownChunker.MarkdownChunkStrategy.BY_WORD;
                case "CHARACTER" -> MarkdownChunker.MarkdownChunkStrategy.BY_CHARACTER;
                default -> throw new IllegalArgumentException("unknown chunking strategy " + chunkStrategy.getValue());
            };
            List<TextSegment> textSegments = MarkdownChunker.chunk(content, strategy, chunkSize, maxOverlapSize);
            return Chunkers.createTextChunkRecordArray(document, textSegments, textChunkType.getDescribingType());
        }
        catch (RuntimeException e) {
            return Chunkers.handleChunkingErrors(e);
        }
    }

    public static Object chunkHtmlDocument(BMap<BString, Object> document, int chunkSize, int maxOverlapSize, BString chunkStrategy, BTypedesc textChunkType) {
        try {
            String content = document.getStringValue(StringUtils.fromString((String)CONTENT_FIELD_NAME)).getValue();
            HtmlChunker.HtmlChunkStrategy strategy = switch (chunkStrategy.getValue()) {
                case "HTML_HEADER" -> HtmlChunker.HtmlChunkStrategy.HTML_HEADER;
                case "HTML_PARAGRAPH" -> HtmlChunker.HtmlChunkStrategy.HTML_PARAGRAPH;
                case "HTML_LINE" -> HtmlChunker.HtmlChunkStrategy.HTML_LINE;
                case "SENTENCE" -> HtmlChunker.HtmlChunkStrategy.SENTENCE;
                case "WORD" -> HtmlChunker.HtmlChunkStrategy.WORD;
                case "CHARACTER" -> HtmlChunker.HtmlChunkStrategy.CHARACTER;
                default -> throw new IllegalArgumentException("unknown chunking strategy " + chunkStrategy.getValue());
            };
            List<TextSegment> textSegments = HtmlChunker.chunk(content, strategy, chunkSize, maxOverlapSize);
            return Chunkers.createTextChunkRecordArray(document, textSegments, textChunkType.getDescribingType());
        }
        catch (RuntimeException e) {
            return Chunkers.handleChunkingErrors(e);
        }
    }

    private static DocumentSplitter getDocumentSplitter(BString chunkStrategy, int maxChunkSize, int overlapSize) {
        return switch (ChunkStrategy.fromString(chunkStrategy.getValue())) {
            default -> throw new MatchException(null, null);
            case ChunkStrategy.LINE -> new DocumentByLineSplitter(maxChunkSize, overlapSize);
            case ChunkStrategy.CHARACTER -> new DocumentByCharacterSplitter(maxChunkSize, overlapSize);
            case ChunkStrategy.WORD -> new DocumentByWordSplitter(maxChunkSize, overlapSize);
            case ChunkStrategy.SENTENCE -> new DocumentBySentenceSplitter(maxChunkSize, overlapSize);
            case ChunkStrategy.PARAGRAPH -> new DocumentByParagraphSplitter(maxChunkSize, overlapSize);
        };
    }

    private static BArray createTextChunkRecordArray(BMap<BString, Object> document, List<TextSegment> textSegments, Type textChunkType) {
        Object[] chunkArray = textSegments.stream().map(textSegment -> Chunkers.createTextChunkRecord(document, textSegment)).toArray();
        return ValueCreator.createArrayValue((Object[])chunkArray, (ArrayType)TypeCreator.createArrayType((Type)textChunkType));
    }

    private static BMap<BString, Object> createTextChunkRecord(BMap<BString, Object> document, TextSegment textSegment) {
        HashMap<String, String> textChunkRecordFields = new HashMap<String, String>();
        textChunkRecordFields.put(CONTENT_FIELD_NAME, textSegment.text());
        textChunkRecordFields.put(METADATA_FIELD_NAME, (String)Chunkers.createMetadataRecord(document, textSegment.metadata()));
        return ValueCreator.createRecordValue((Module)ModuleUtils.getModule(), (String)TEXT_CHUNK_RECORD_TYPE_NAME, textChunkRecordFields);
    }

    private static BMap<BString, Object> createMetadataRecord(BMap<BString, Object> document, Metadata metadata) {
        BMap existingMetadata = document.containsKey((Object)StringUtils.fromString((String)METADATA_FIELD_NAME)) ? (BMap)document.get((Object)StringUtils.fromString((String)METADATA_FIELD_NAME)) : ValueCreator.createMapValue();
        for (Map.Entry entry : metadata.toMap().entrySet()) {
            BString key = StringUtils.fromString((String)((String)entry.getKey()));
            Object value = entry.getValue();
            if (INTEGER_FIELDS.contains(entry.getKey()) && value instanceof String) {
                String stringValue = (String)value;
                existingMetadata.put((Object)key, (Object)Integer.parseInt(stringValue));
                continue;
            }
            if (value instanceof String) {
                String strVal = (String)value;
                existingMetadata.put((Object)key, (Object)StringUtils.fromString((String)strVal));
                continue;
            }
            if (value instanceof UUID) continue;
            existingMetadata.put((Object)key, value);
        }
        return existingMetadata.isEmpty() ? null : ValueCreator.createRecordValue((Module)ModuleUtils.getModule(), (String)META_DATA_RECORD_TYPE_NAME, (BMap)existingMetadata);
    }

    private static BError handleChunkingErrors(RuntimeException e) {
        String subSplitterErrorRegex = ", and there is no subSplitter defined to split it further\\.";
        String errorMessage = e.getMessage().replaceAll(subSplitterErrorRegex, "");
        return ModuleUtils.createError(errorMessage);
    }
}

