/*
 * Decompiled with CFR 0.152.
 */
package io.ballerina.stdlib.ai;

import dev.langchain4j.data.segment.TextSegment;
import io.ballerina.stdlib.ai.HtmlChunker;
import io.ballerina.stdlib.ai.TestUtil;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.FileAttribute;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

public class HtmlChunkerIntegrationTest {
    private static final int CHUNK_SIZE = 500;
    private static final int MAX_OVERLAP_SIZE = 50;
    private static final String INPUT_DIR = "html-chunker-test/input";
    private static final String EXPECTED_DIR = "html-chunker-test/expected";

    @BeforeMethod
    public void setUp() throws Exception {
        TestUtil.resetChunkIdCounter();
    }

    static String normalizeNewLines(String content) {
        return content.replaceAll("\\r\\n", "\n").replaceAll("\\r", "\n");
    }

    /*
     * Exception decompiling
     */
    @DataProvider(name="htmlFiles")
    public Object[][] htmlFiles() throws IOException {
        /*
         * This method has failed to decompile.  When submitting a bug report, please provide this stack trace, and (if you hold appropriate legal rights) the relevant class file.
         * 
         * java.lang.UnsupportedOperationException
         *     at org.benf.cfr.reader.bytecode.analysis.parse.expression.NewAnonymousArray.getDimSize(NewAnonymousArray.java:142)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.op4rewriters.LambdaRewriter.isNewArrayLambda(LambdaRewriter.java:455)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.op4rewriters.LambdaRewriter.rewriteDynamicExpression(LambdaRewriter.java:409)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.op4rewriters.LambdaRewriter.rewriteDynamicExpression(LambdaRewriter.java:167)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.op4rewriters.LambdaRewriter.rewriteExpression(LambdaRewriter.java:105)
         *     at org.benf.cfr.reader.bytecode.analysis.parse.rewriters.ExpressionRewriterHelper.applyForwards(ExpressionRewriterHelper.java:12)
         *     at org.benf.cfr.reader.bytecode.analysis.parse.expression.AbstractMemberFunctionInvokation.applyExpressionRewriterToArgs(AbstractMemberFunctionInvokation.java:101)
         *     at org.benf.cfr.reader.bytecode.analysis.parse.expression.AbstractMemberFunctionInvokation.applyExpressionRewriter(AbstractMemberFunctionInvokation.java:88)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.op4rewriters.LambdaRewriter.rewriteExpression(LambdaRewriter.java:103)
         *     at org.benf.cfr.reader.bytecode.analysis.parse.expression.AbstractMemberFunctionInvokation.applyExpressionRewriter(AbstractMemberFunctionInvokation.java:87)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.op4rewriters.LambdaRewriter.rewriteExpression(LambdaRewriter.java:103)
         *     at org.benf.cfr.reader.bytecode.analysis.parse.expression.CastExpression.applyExpressionRewriter(CastExpression.java:128)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.op4rewriters.LambdaRewriter.rewriteExpression(LambdaRewriter.java:103)
         *     at org.benf.cfr.reader.bytecode.analysis.structured.statement.StructuredAssignment.rewriteExpressions(StructuredAssignment.java:146)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.op4rewriters.LambdaRewriter.rewrite(LambdaRewriter.java:88)
         *     at org.benf.cfr.reader.bytecode.analysis.opgraph.Op04StructuredStatement.rewriteLambdas(Op04StructuredStatement.java:1137)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysisInner(CodeAnalyser.java:912)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysisOrWrapFail(CodeAnalyser.java:278)
         *     at org.benf.cfr.reader.bytecode.CodeAnalyser.getAnalysis(CodeAnalyser.java:201)
         *     at org.benf.cfr.reader.entities.attributes.AttributeCode.analyse(AttributeCode.java:94)
         *     at org.benf.cfr.reader.entities.Method.analyse(Method.java:531)
         *     at org.benf.cfr.reader.entities.ClassFile.analyseMid(ClassFile.java:1055)
         *     at org.benf.cfr.reader.entities.ClassFile.analyseTop(ClassFile.java:942)
         *     at org.benf.cfr.reader.Driver.doJarVersionTypes(Driver.java:257)
         *     at org.benf.cfr.reader.Driver.doJar(Driver.java:139)
         *     at org.benf.cfr.reader.CfrDriverImpl.analyse(CfrDriverImpl.java:76)
         *     at org.benf.cfr.reader.Main.main(Main.java:54)
         */
        throw new IllegalStateException("Decompilation failed");
    }

    @Test(dataProvider="htmlFiles")
    public void testHtmlChunking(String fileName) throws IOException {
        String inputContent = HtmlChunkerIntegrationTest.normalizeNewLines(this.loadFileContent("html-chunker-test/input/" + fileName));
        List chunks = HtmlChunker.chunk((String)inputContent, (int)500, (int)50);
        this.validateTextSegmentIndices(chunks);
        this.validateTextSegmentMaxSize(chunks, 500);
        this.validateChunkContent(chunks, inputContent);
        String actualOutput = TestUtil.formatChunksOutput(chunks, 500, 50);
        String expectedFileName = fileName.replace(".html", "_500_50.txt");
        String expectedOutput = HtmlChunkerIntegrationTest.normalizeNewLines(this.getExpectedOutput(expectedFileName, actualOutput));
        Assert.assertEquals((String)actualOutput, (String)expectedOutput, (String)("Chunking output for " + fileName + " does not match expected result"));
    }

    @Test(dataProvider="htmlFiles")
    public void testHtmlChunkingWithoutOverlap(String fileName) throws IOException {
        String inputContent = HtmlChunkerIntegrationTest.normalizeNewLines(this.loadFileContent("html-chunker-test/input/" + fileName));
        List chunks = HtmlChunker.chunk((String)inputContent, (int)500, (int)0);
        this.validateTextSegmentIndices(chunks);
        this.validateTextSegmentMaxSize(chunks, 500);
        String combinedChunks = chunks.stream().map(TextSegment::text).collect(Collectors.joining());
        Assert.assertEquals((String)combinedChunks, (String)inputContent, (String)("Chunking without overlap should return the original content for " + fileName));
        String actualOutput = TestUtil.formatChunksOutput(chunks, 500, 0);
        String expectedFileName = fileName.replace(".html", "_500_0.txt");
        String expectedOutput = HtmlChunkerIntegrationTest.normalizeNewLines(this.getExpectedOutput(expectedFileName, actualOutput));
        Assert.assertEquals((String)actualOutput, (String)expectedOutput, (String)("Chunking output for " + fileName + " does not match expected result"));
    }

    @Test
    public void testHeaderSplitters() {
        String htmlWithHeaders = "<h1>Header 1</h1>\n<p>Content under header 1.</p>\n\n<h2>Header 2</h2>\n<p>Content under header 2.</p>\n\n<h3>Header 3</h3>\n<p>Content under header 3.</p>\n";
        List chunks = HtmlChunker.chunk((String)htmlWithHeaders, (int)200, (int)20);
        Assert.assertFalse((boolean)chunks.isEmpty(), (String)"HTML header chunking should produce chunks");
        boolean hasHeaderMetadata = chunks.stream().anyMatch(chunk -> {
            Map metadata = chunk.metadata().toMap();
            return metadata.containsKey("header1") || metadata.containsKey("header2") || metadata.containsKey("header3");
        });
        Assert.assertTrue((boolean)hasHeaderMetadata, (String)"HTML header chunking should produce chunks with header metadata");
    }

    private String loadFileContent(String relativePath) throws IOException {
        Path resourcePath = this.getResourcePath(relativePath);
        return Files.readString(resourcePath);
    }

    private Path getResourcePath(String relativePath) {
        return Paths.get(System.getProperty("user.dir"), new String[0]).resolve("src/test/resources").resolve(relativePath);
    }

    private void validateTextSegmentMaxSize(List<TextSegment> chunks, int maxSize) {
        for (TextSegment chunk : chunks) {
            String text = chunk.text();
            Assert.assertTrue((text.length() <= maxSize ? 1 : 0) != 0, (String)("TextSegment exceeds max size of " + maxSize + ": " + text.length()));
        }
    }

    private void validateChunkContent(List<TextSegment> chunks, String originalContent) {
        for (TextSegment chunk : chunks) {
            String text = chunk.text();
            Assert.assertTrue((boolean)originalContent.contains(text), (String)("Chunk content should be part of the original content: " + text));
        }
    }

    private void validateTextSegmentIndices(List<TextSegment> chunks) {
        for (int i = 0; i < chunks.size(); ++i) {
            TextSegment chunk = chunks.get(i);
            Map metadata = chunk.metadata().toMap();
            Assert.assertTrue((boolean)metadata.containsKey("index"), (String)("TextSegment at position " + i + " should have index in metadata"));
            Object indexValue = metadata.get("index");
            Assert.assertTrue((boolean)(indexValue instanceof Integer), (String)("Index should be an Integer, but was " + indexValue.getClass().getSimpleName()));
            Integer index = (Integer)indexValue;
            Assert.assertEquals((int)index, (int)i, (String)("TextSegment at position " + i + " should have index " + i + ", but had " + index));
        }
    }

    private String getExpectedOutput(String expectedFileName, String actualOutput) throws IOException {
        String blessEnv = System.getenv("BLESS");
        boolean shouldBless = "true".equalsIgnoreCase(blessEnv);
        Path expectedPath = this.getResourcePath("html-chunker-test/expected/" + expectedFileName);
        if (shouldBless) {
            Files.createDirectories(expectedPath.getParent(), new FileAttribute[0]);
            Files.writeString(expectedPath, (CharSequence)actualOutput, new OpenOption[0]);
            return actualOutput;
        }
        return Files.readString(expectedPath);
    }

    private static /* synthetic */ Object[][] lambda$htmlFiles$3(int x$0) {
        return new Object[x$0][];
    }
}

