/*
 * Decompiled with CFR 0.152.
 */
package io.ballerina.stdlib.ai;

import dev.langchain4j.data.segment.TextSegment;
import io.ballerina.stdlib.ai.HtmlHeaderSplitter;
import io.ballerina.stdlib.ai.HtmlParagraphSplitter;
import io.ballerina.stdlib.ai.RecursiveChunker;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.stream.IntStream;

public class HtmlChunker {
    static List<TextSegment> chunk(String content, HtmlChunkStrategy strategy, int maxChunkSize, int maxOverlapSize) {
        if (maxChunkSize <= 0) {
            throw new IllegalArgumentException("Chunk size must be greater than 0");
        }
        if (maxOverlapSize > maxChunkSize) {
            throw new IllegalArgumentException("Max overlap size must be less than or equal to chunk size");
        }
        RecursiveChunker chunker = new RecursiveChunker(Set.of());
        List<RecursiveChunker.Chunk> chunks = chunker.chunkUsingSplitters(content, strategy.getSplitters(), maxChunkSize, maxOverlapSize);
        return IntStream.range(0, chunks.size()).mapToObj(i -> ((RecursiveChunker.Chunk)chunks.get(i)).toTextSegment(i)).toList();
    }

    static List<TextSegment> chunk(String content, int chunkSize, int maxOverlapSize) {
        RecursiveChunker chunker = new RecursiveChunker(Set.of());
        List<RecursiveChunker.Chunk> chunks = chunker.chunkUsingSplitters(content, HtmlChunkStrategy.HTML_HEADER.getSplitters(), chunkSize, maxOverlapSize);
        return IntStream.range(0, chunks.size()).mapToObj(i -> ((RecursiveChunker.Chunk)chunks.get(i)).toTextSegment(i)).toList();
    }

    static enum HtmlChunkStrategy {
        HTML_HEADER,
        HTML_PARAGRAPH,
        HTML_LINE,
        SENTENCE,
        WORD,
        CHARACTER;


        public List<RecursiveChunker.Splitter> getSplitters() {
            ArrayList<RecursiveChunker.Splitter> splitters = new ArrayList<RecursiveChunker.Splitter>();
            switch (this.ordinal()) {
                case 0: {
                    splitters.addAll(List.of(new HtmlHeaderSplitter(1), new HtmlHeaderSplitter(2), new HtmlHeaderSplitter(3), new HtmlHeaderSplitter(4), new HtmlHeaderSplitter(5), new HtmlHeaderSplitter(6)));
                }
                case 1: {
                    splitters.add(new HtmlParagraphSplitter());
                }
                case 2: {
                    splitters.add(new RecursiveChunker.SimpleDelimiterSplitter("<br>"));
                }
                case 3: {
                    splitters.add(RecursiveChunker.Splitter.createSentenceSplitter());
                }
                case 4: {
                    splitters.add(RecursiveChunker.Splitter.createWordSplitter());
                }
                case 5: {
                    splitters.add(RecursiveChunker.Splitter.createCharacterSplitter());
                }
            }
            return splitters;
        }
    }
}

