HEX
Server: Apache/2.4.54 (Win64) OpenSSL/1.1.1p PHP/7.4.30
System: Windows NT website-api 10.0 build 20348 (Windows Server 2016) AMD64
User: SYSTEM (0)
PHP: 7.4.30
Disabled: NONE
Upload Files
File: C:/github_repos/casibase_customer_0022/split/markdown_test.go
// Copyright 2025 The Casibase Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//	http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !skipCi
// +build !skipCi

package split

import (
	"fmt"
	"reflect"
	"testing"
)

func TestSplit(t *testing.T) {
	p, err := GetSplitProvider("Markdown")
	if err != nil {
		panic(err)
	}

	text := `# Section 1

Here is a standard Markdown table:

| Header1 | Header2 | Header3 |
|---------|---------|---------|
| A1      | B1      | C1      |
| A2      | B2      | C2      |

# Section 2

This is the content of the second section.

1. **The first point**

   This is the first sentence of the content below the first point.

2. **The second point**

   This is the first sentence of the content below the second point.

A borderless Markdown table:
Some text before

Data1 | Data2
:-----|:-----
More data1 | More data2

There is also an HTML table:
<table>
    <tr>
        <td>Cell 1</td>
        <td>Cell 2</td>
    </tr>
</table>
`

	textSections, err := p.SplitText(text)
	if err != nil {
		panic(err)
	}
	targetSections := []string{
		"# Section 1\n\nHere is a standard Markdown table:",
		"# Section 2\n\nThis is the content of the second section.",
		"1. **The first point**\n\nThis is the first sentence of the content below the first point.",
		"2. **The second point**\n\nThis is the first sentence of the content below the second point.\nA borderless Markdown table:\nSome text before\nThere is also an HTML table:",
		"| Header1 | Header2 | Header3 |\n|---------|---------|---------|\n| A1      | B1      | C1      |\n| A2      | B2      | C2      |",
		"Data1 | Data2\n:-----|:-----\nMore data1 | More data2",
		"<table>\n    <tr>\n        <td>Cell 1</td>\n        <td>Cell 2</td>\n    </tr>\n</table>",
	}

	if !reflect.DeepEqual(textSections, targetSections) {
		panic(fmt.Errorf("markdown test failed: did not get the expected result"))
	}
}

func TestExtractMarkdownTree(t *testing.T) {
	text := `# main title

This is the content of the main title.

## sub title 1

This is the content of the sub title 1.

### sub title 1.1

This is the content of the sub title 1.1.

## sub title 2

This is the content of the sub title 2.

Second paragraph.

# another main title

This is the content of the another main title.

## another sub title

This is the content of the another sub title.
`

	headingsMap := ExtractMarkdownTree(text)

	expectedMap := map[string]string{
		"# main title":                                      "This is the content of the main title.",
		"# main title > ## sub title 1":                     "This is the content of the sub title 1.",
		"# main title > ## sub title 1 > ### sub title 1.1": "This is the content of the sub title 1.1.",
		"# main title > ## sub title 2":                     "This is the content of the sub title 2.\nSecond paragraph.",
		"# another main title":                              "This is the content of the another main title.",
		"# another main title > ## another sub title":       "This is the content of the another sub title.",
	}

	if len(headingsMap) != len(expectedMap) {
		t.Fatalf("Expected %d headings, got %d", len(expectedMap), len(headingsMap))
	}

	for key, expectedValue := range expectedMap {
		if value, exists := headingsMap[key]; !exists {
			t.Errorf("Expected key '%s' not found in result", key)
		} else if value != expectedValue {
			t.Errorf("For key '%s':\nExpected:\n%s\nGot:\n%s", key, expectedValue, value)
		}
	}
}