HEX
Server: Apache/2.4.54 (Win64) OpenSSL/1.1.1p PHP/7.4.30
System: Windows NT website-api 10.0 build 20348 (Windows Server 2016) AMD64
User: SYSTEM (0)
PHP: 7.4.30
Disabled: NONE
Upload Files
File: C:/github_repos/casibase_customer_0022/object/video_word.go
// Copyright 2023 The Casibase Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package object

import (
	"fmt"
	"unicode"

	"github.com/casibase/casibase/i18n"
	"github.com/casibase/casibase/util"
	"github.com/wangbin/jiebago"
)

var seg *jiebago.Segmenter = nil

func isPunctuation(r rune) bool {
	return !unicode.IsLetter(r) && !unicode.IsNumber(r)
}

func isNumber(s string) bool {
	for _, r := range s {
		if !unicode.IsNumber(r) {
			return false
		}
	}
	return true
}

func (v *Video) PopulateWordCountMap(lang string) error {
	if len(v.Segments) == 0 {
		return nil
	}

	dictPath := "data/dict.txt"
	if !util.FileExist(dictPath) {
		return fmt.Errorf(i18n.Translate(lang, "object:Cannot generate word cloud, the dict file: [%s] does not exist"), dictPath)
	}

	if seg == nil {
		seg = &jiebago.Segmenter{}
		err := seg.LoadDictionary(dictPath)
		if err != nil {
			return err
		}
	}

	v.WordCountMap = map[string]int{}
	for _, segment := range v.Segments {
		words := seg.Cut(segment.Text, true)
		for word := range words {
			if len(word) > 3 && !isPunctuation([]rune(word)[0]) && !isNumber(word) {
				v.WordCountMap[word]++
			}
		}
	}

	return nil
}