/*
 * Decompiled with CFR 0.152.
 */
package io.ballerina.stdlib.ai;

import dev.langchain4j.data.segment.TextSegment;
import io.ballerina.stdlib.ai.RecursiveChunker;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.IntStream;

class MarkdownChunker {
    private static final Set<String> NON_MERGEABLE_TYPES = Set.of("code_block");

    MarkdownChunker() {
    }

    static List<TextSegment> chunk(String content, MarkdownChunkStrategy strategy, int maxChunkSize, int maxOverlapSize) {
        if (maxChunkSize <= 0) {
            throw new IllegalArgumentException("Chunk size must be greater than 0");
        }
        if (maxOverlapSize > maxChunkSize) {
            throw new IllegalArgumentException("Max overlap size must be less than or equal to chunk size");
        }
        RecursiveChunker chunker = new RecursiveChunker(NON_MERGEABLE_TYPES);
        List<RecursiveChunker.Chunk> chunks = chunker.chunkUsingSplitters(content, strategy.getSplitters(), maxChunkSize, maxOverlapSize);
        return IntStream.range(0, chunks.size()).mapToObj(i -> ((RecursiveChunker.Chunk)chunks.get(i)).toTextSegment(i)).toList();
    }

    static List<TextSegment> chunk(String content, int chunkSize, int maxOverlapSize) {
        RecursiveChunker chunker = new RecursiveChunker(NON_MERGEABLE_TYPES);
        List<RecursiveChunker.Chunk> chunks = chunker.chunkUsingSplitters(content, MarkdownChunkStrategy.BY_HEADER.getSplitters(), chunkSize, maxOverlapSize);
        return IntStream.range(0, chunks.size()).mapToObj(i -> ((RecursiveChunker.Chunk)chunks.get(i)).toTextSegment(i)).toList();
    }

    static enum MarkdownChunkStrategy {
        BY_HEADER,
        BY_CODE_BLOCK,
        BY_HORIZONTAL_LINE,
        BY_PARAGRAPH,
        BY_LINE,
        BY_SENTENCE,
        BY_WORD,
        BY_CHARACTER;


        public List<RecursiveChunker.Splitter> getSplitters() {
            ArrayList<RecursiveChunker.Splitter> splitters = new ArrayList<RecursiveChunker.Splitter>();
            switch (this.ordinal()) {
                case 0: {
                    splitters.addAll(List.of(new HeaderSplitter(2), new HeaderSplitter(3), new HeaderSplitter(4), new HeaderSplitter(5), new HeaderSplitter(6)));
                }
                case 1: {
                    splitters.add(new CodeBlockSplitter());
                }
                case 2: {
                    splitters.addAll(List.of(new RecursiveChunker.SimpleDelimiterSplitter("\n\\*\\*\\*+\n"), new RecursiveChunker.SimpleDelimiterSplitter("\\n---+\\n"), new RecursiveChunker.SimpleDelimiterSplitter("\n___+\n")));
                }
                case 3: {
                    splitters.add(new RecursiveChunker.SimpleDelimiterSplitter("\n\n"));
                }
                case 4: {
                    splitters.add(new RecursiveChunker.SimpleDelimiterSplitter("\n"));
                }
                case 5: {
                    splitters.add(RecursiveChunker.Splitter.createSentenceSplitter());
                }
                case 6: {
                    splitters.add(RecursiveChunker.Splitter.createWordSplitter());
                }
                case 7: {
                    splitters.add(RecursiveChunker.Splitter.createCharacterSplitter());
                }
            }
            return splitters;
        }
    }

    static class CodeBlockSplitter
    implements RecursiveChunker.Splitter {
        private final Pattern codeBlockStartPattern = Pattern.compile("```(\\w+)?\\n");
        private final Pattern codeBlockEndPattern = Pattern.compile("```\\n");

        CodeBlockSplitter() {
        }

        @Override
        public Iterator<RecursiveChunker.Chunk> split(final String content) {
            return new Iterator<RecursiveChunker.Chunk>(){
                private final Matcher startMatcher;
                private final Matcher endMatcher;
                private int lastIndex;
                private String nextPiece;
                private Map<String, String> nextPieceMetadata;
                private boolean hasNextPiece;
                private boolean finished;
                {
                    this.startMatcher = codeBlockStartPattern.matcher(content);
                    this.endMatcher = codeBlockEndPattern.matcher(content);
                    this.lastIndex = 0;
                    this.nextPiece = null;
                    this.nextPieceMetadata = Map.of();
                    this.hasNextPiece = false;
                    this.finished = false;
                }

                private void prepareNext() {
                    if (this.finished) {
                        return;
                    }
                    this.startMatcher.region(this.lastIndex, content.length());
                    if (this.startMatcher.find()) {
                        int startIndex = this.startMatcher.start();
                        int startEndIndex = this.startMatcher.end();
                        if (startIndex > this.lastIndex) {
                            this.nextPiece = content.substring(this.lastIndex, startIndex);
                            this.nextPieceMetadata = Map.of();
                            this.lastIndex = startIndex;
                            this.hasNextPiece = true;
                            return;
                        }
                        String language = this.startMatcher.group(1);
                        if (language == null) {
                            language = "unknown";
                        }
                        this.endMatcher.region(startEndIndex, content.length());
                        if (this.endMatcher.find()) {
                            int endEndIndex = this.endMatcher.end();
                            this.nextPiece = content.substring(startIndex, endEndIndex);
                            this.nextPieceMetadata = Map.of("language", language, "type", "code_block");
                            this.lastIndex = endEndIndex;
                            this.hasNextPiece = true;
                        } else {
                            this.nextPiece = content.substring(startIndex);
                            this.nextPieceMetadata = Map.of("language", language, "type", "code_block");
                            this.lastIndex = content.length();
                            this.hasNextPiece = true;
                            this.finished = true;
                        }
                        return;
                    }
                    if (this.lastIndex < content.length()) {
                        this.nextPiece = content.substring(this.lastIndex);
                        this.nextPieceMetadata = Map.of();
                        this.lastIndex = content.length();
                        this.hasNextPiece = true;
                    } else {
                        this.hasNextPiece = false;
                    }
                    this.finished = true;
                }

                @Override
                public boolean hasNext() {
                    if (!this.hasNextPiece && !this.finished) {
                        this.prepareNext();
                    }
                    return this.hasNextPiece;
                }

                @Override
                public RecursiveChunker.Chunk next() {
                    if (!this.hasNext()) {
                        throw new NoSuchElementException();
                    }
                    this.hasNextPiece = false;
                    return new RecursiveChunker.Chunk(this.nextPiece, this.nextPieceMetadata);
                }
            };
        }
    }

    static class HeaderSplitter
    implements RecursiveChunker.Splitter {
        private final Pattern headerPattern;

        HeaderSplitter(int level) {
            this.headerPattern = Pattern.compile(String.format("\n#{%d} (.*)\n", level));
        }

        @Override
        public Iterator<RecursiveChunker.Chunk> split(final String content) {
            return new Iterator<RecursiveChunker.Chunk>(){
                private final Matcher matcher;
                private int lastIndex;
                private String nextPiece;
                private Map<String, String> nextPieceMetadata;
                private boolean hasNextPiece;
                private boolean finished;
                private String lastHeader;
                {
                    this.matcher = headerPattern.matcher(content);
                    this.lastIndex = 0;
                    this.nextPiece = null;
                    this.nextPieceMetadata = Map.of();
                    this.hasNextPiece = false;
                    this.finished = false;
                    this.lastHeader = null;
                }

                private void prepareNext() {
                    if (this.finished) {
                        return;
                    }
                    this.matcher.region(this.lastIndex, content.length());
                    if (this.matcher.find()) {
                        int delimiterStart = this.matcher.start();
                        int delimiterEnd = this.matcher.end();
                        if (delimiterStart > this.lastIndex) {
                            this.nextPiece = content.substring(this.lastIndex, delimiterStart);
                            this.lastIndex = delimiterStart;
                            this.hasNextPiece = true;
                            this.nextPieceMetadata = this.lastHeader != null ? Map.of("header", this.lastHeader) : Map.of();
                            return;
                        }
                        this.nextPiece = content.substring(delimiterStart, delimiterEnd);
                        this.lastHeader = this.matcher.group(1);
                        this.nextPieceMetadata = Map.of("header", this.lastHeader);
                        this.lastIndex = delimiterEnd;
                        this.hasNextPiece = true;
                        return;
                    }
                    if (this.lastIndex < content.length()) {
                        this.nextPiece = content.substring(this.lastIndex);
                        this.lastIndex = content.length();
                        this.hasNextPiece = true;
                        this.nextPieceMetadata = this.lastHeader != null ? Map.of("header", this.lastHeader) : Map.of();
                    } else {
                        this.hasNextPiece = false;
                    }
                    this.finished = true;
                }

                @Override
                public boolean hasNext() {
                    if (!this.hasNextPiece && !this.finished) {
                        this.prepareNext();
                    }
                    return this.hasNextPiece;
                }

                @Override
                public RecursiveChunker.Chunk next() {
                    if (!this.hasNext()) {
                        throw new NoSuchElementException();
                    }
                    this.hasNextPiece = false;
                    return new RecursiveChunker.Chunk(this.nextPiece, this.nextPieceMetadata);
                }
            };
        }
    }
}

