HEX
Server: Apache/2.4.54 (Win64) OpenSSL/1.1.1p PHP/7.4.30
System: Windows NT website-api 10.0 build 20348 (Windows Server 2016) AMD64
User: SYSTEM (0)
PHP: 7.4.30
Disabled: NONE
Upload Files
File: C:/github_repos/casibase/object/video_import.go
// Copyright 2023 The Casibase Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package object

import (
	"bufio"
	"fmt"
	"os"
	"path/filepath"
	"strconv"
	"strings"

	"github.com/beego/beego/logs"
)

func getImportedVideos(path string) ([]*Video, error) {
	files, err := os.ReadDir(path)
	if err != nil {
		return nil, err
	}

	videos := []*Video{}
	for _, file := range files {
		if filepath.Ext(file.Name()) != ".txt" {
			continue
		}

		filePath := filepath.Join(path, file.Name())
		video, err := parseVideoFile(filePath)
		if err != nil {
			return nil, err
		}

		videos = append(videos, video)
	}

	return videos, nil
}

func getParentFolderName(filePath string) string {
	// 获取父目录的完整路径
	parentDir := filepath.Dir(filePath)

	// 获取父目录的基本名称
	parentFolderName := filepath.Base(parentDir)

	return parentFolderName
}

func parseVideoFile(filePath string) (*Video, error) {
	file, err := os.Open(filePath)
	if err != nil {
		return nil, err
	}
	defer file.Close()

	fileId := strings.TrimSuffix(filepath.Base(filePath), filepath.Ext(filePath))
	tag := getParentFolderName(filePath)
	video := &Video{
		Owner:        "tsing",
		Name:         fileId,
		DisplayName:  fileId,
		Tag:          tag,
		Type:         ".mp4",
		EditMode:     "Text Recognition",
		Labels:       []*Label{},
		Segments:     []*Label{},
		WordCountMap: map[string]int{},
		Remarks:      []*Remark{},
		Remarks2:     []*Remark{},
		DataUrls:     []string{},
		Keywords:     []string{},
	}

	scanner := bufio.NewScanner(file)

	// 读取并解析第一行(时间信息)
	if scanner.Scan() {
		timestamp := scanner.Text()
		createdTime, videoLength, err := parseTimeString(timestamp)
		if err != nil {
			return nil, err
		}
		video.CreatedTime = createdTime
		video.VideoLength = videoLength
	} else {
		return nil, fmt.Errorf("file is empty")
	}

	for scanner.Scan() {
		line := scanner.Text()
		if line == "关键词:" {
			scanner.Scan()
			line = scanner.Text()
			video.Keywords = parseKeywords(line)
			break
		}
	}

	// 解析文字记录
	var recordBuilder strings.Builder
	for scanner.Scan() {
		line := scanner.Text()
		if line != "" {
			recordBuilder.WriteString(line + "\n")
		} else {
			// 处理当前记录块
			err = parseTextRecord(recordBuilder.String(), video)
			if err != nil {
				return nil, err
			}
			recordBuilder.Reset()
		}
	}
	// 处理最后一个记录块
	err = parseTextRecord(recordBuilder.String(), video)
	if err != nil {
		return nil, err
	}

	for i := 0; i < len(video.Segments); i++ {
		if i != len(video.Segments)-1 {
			video.Segments[i].EndTime = video.Segments[i+1].StartTime
		} else {
			endTime, err := timeToSeconds(video.VideoLength)
			if err != nil {
				return nil, err
			}

			video.Segments[i].EndTime = endTime
		}
	}

	return video, nil
}

func parseTextRecord(record string, video *Video) error {
	tokens := strings.Split(record, "\n")
	if len(tokens) < 2 {
		return nil // 忽略空记录
	}

	lines := []string{}
	for _, token := range tokens {
		if token != "" && token != "文字记录:" {
			token = strings.Trim(token, " ")
			lines = append(lines, token)
		}
	}

	// 第一行是角色和时间
	parts := SplitLastN(lines[0], " ", 2)
	if len(parts) != 2 {
		return fmt.Errorf("invalid record format")
	}

	speaker, timeStr := parts[0], parts[1]

	speaker = mapSpeaker(speaker)

	timeStr = strings.Trim(timeStr, " ")
	startTime, err := timeToSeconds(timeStr)
	if err != nil {
		return err
	}

	// 其余行是内容
	text := strings.Join(lines[1:], " ")

	label := &Label{
		Id:        strconv.Itoa(len(video.Segments)),
		StartTime: startTime,
		Text:      text,
		Speaker:   speaker,
	}

	video.Segments = append(video.Segments, label)
	return nil
}

func importVideos(path string) error {
	videos, err := getImportedVideos(path)
	if err != nil {
		return err
	}

	for i, video := range videos {
		logs.Info("[%d] Add video: %v\n", i, video)
		_, err = AddVideo(video)
		if err != nil {
			return err
		}
	}

	return nil
}