initial commit

This commit is contained in:
2025-06-06 16:35:29 +02:00
commit 58b7437ef8
10 changed files with 540 additions and 0 deletions

52
analyze_file.go Normal file
View File

@ -0,0 +1,52 @@
package parsers
import (
"context"
"os"
"git.tordarus.net/nyaanime/model"
"gopkg.in/vansante/go-ffprobe.v2"
)
// TODO cache
func AnalyzeFile(path string) (*model.ParsedFile, error) {
props := &model.ParsedFile{File: path}
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
data, err := ffprobe.ProbeReader(context.Background(), file)
if err != nil {
return nil, err
}
defaultVideoLang := ""
for _, s := range data.StreamType(ffprobe.StreamVideo) {
if s.Disposition.Default > 0 {
props.Resolution = model.Resolution(s.Height)
defaultVideoLang = ParseLanguage(s.Tags.Language)
break
}
}
for _, s := range data.StreamType(ffprobe.StreamAudio) {
if s.Tags.Language != "" {
props.Languages = append(props.Languages, ParseLanguage(s.Tags.Language))
} else if s.Disposition.Default > 0 {
props.Languages = append(props.Languages, defaultVideoLang)
}
}
for _, s := range data.StreamType(ffprobe.StreamSubtitle) {
if s.Tags.Language != "" {
props.Subtitles = append(props.Subtitles, ParseLanguage(s.Tags.Language))
} else if s.Disposition.Default > 0 {
props.Subtitles = append(props.Subtitles, defaultVideoLang)
}
}
return props, nil
}

11
errors.go Normal file
View File

@ -0,0 +1,11 @@
package parsers
import "git.tordarus.net/tordarus/adverr/v2"
var (
ErrTorrentParserInsufficientData = adverr.NewErrTmpl("ErrTorrentParserInsufficientData", "regex '%s' must at least provide title and episode")
ErrTorrentParserInsufficientLanguageData = adverr.NewErrTmpl("ErrTorrentParserInsufficientLanguageData", "no language reference in regex and no default language set")
ErrTorrentParserInsufficientSubtitleData = adverr.NewErrTmpl("ErrTorrentParserInsufficientSubtitleData", "no subtitle reference in regex and no default subtitle set")
ErrTorrentParserInsufficientResolutionData = adverr.NewErrTmpl("ErrTorrentParserInsufficientResolutionData", "no resolution reference in regex and no default resolution set")
ErrTorrentParserInvalidGroupReference = adverr.NewErrTmpl("ErrTorrentParserInvalidGroupReference", "options references group %d but regex only has %d groups")
)

11
go.mod Normal file
View File

@ -0,0 +1,11 @@
module git.tordarus.net/tordarus/parsers
go 1.18
require (
git.tordarus.net/nyaanime/model v0.0.1
git.tordarus.net/tordarus/adverr/v2 v2.0.2
gopkg.in/vansante/go-ffprobe.v2 v2.2.1
)
require git.tordarus.net/tordarus/anilist v1.5.2 // indirect

8
go.sum Normal file
View File

@ -0,0 +1,8 @@
git.tordarus.net/nyaanime/model v0.0.1 h1:/I+87Z6eEw/o2adltKnCk4FZai2mPekjYlzEjY1ppyQ=
git.tordarus.net/nyaanime/model v0.0.1/go.mod h1:oHV82UMNy4XgPHkI6tZiwabdi6myqHXgjMi9sNZ+rG4=
git.tordarus.net/tordarus/adverr/v2 v2.0.2 h1:7nvNjMMjtGPq0EY6duMiv+seJ7MacNvKSBmckHl6Erg=
git.tordarus.net/tordarus/adverr/v2 v2.0.2/go.mod h1:gCC46KsWosZJh7MVNDEU99hKQoxEWZgHITDHtmFwwiQ=
git.tordarus.net/tordarus/anilist v1.5.2 h1:SxlovS+e3lgL2SowQQwj8dQrIZzRFPomcGCw3V+My0Q=
git.tordarus.net/tordarus/anilist v1.5.2/go.mod h1:Mrhx/9+8HJVj5ebQ5fJuXqL220tEJhgQIqFK2WKPXgA=
gopkg.in/vansante/go-ffprobe.v2 v2.2.1 h1:sFV08OT1eZ1yroLCZVClIVd9YySgCh9eGjBWO0oRayI=
gopkg.in/vansante/go-ffprobe.v2 v2.2.1/go.mod h1:qF0AlAjk7Nqzqf3y333Ly+KxN3cKF2JqA3JT5ZheUGE=

148
lang_codes.go Normal file
View File

@ -0,0 +1,148 @@
package parsers
import "strings"
// langSynonyms converts all irregular lang codes to ISO 639-1
var langSynonyms = map[string]string{
// english
"eng": "en", // Erai-Raws | ffprobe
"us": "en", // Erai-Raws
// portuguese
"por-br": "pt", // Erai-Raws
"por": "pt", // Erai-Raws | ffprobe
"br": "pt", // Erai-Raws
// spanish
"spa-la": "es", // Erai-Raws
"spa": "es", // Erai-Raws | ffprobe
"mx": "es", // Erai-Raws
// arabic
"ara": "ar", // Erai-Raws | ffprobe
"sa": "ar", // Erai-Raws
// french
"fre": "fr", // Erai-Raws | ffprobe
// german
"ger": "de", // Erai-Raws | ffprobe
// italian
"ita": "it", // Erai-Raws | ffprobe
// finnish
"fin": "fi", // ffprobe | ffprobe
// russian
"rus": "ru", // Erai-Raws
// japanese
"jpn": "ja", // Erai-Raws | ffprobe
"jp": "ja", // Erai-Raws
// polish
"pol": "pl", // Erai-Raws | ffprobe
// dutch
"dut": "nl", // Erai-Raws | ffprobe
// norwegian
"nob": "no", // Erai-Raws | ffprobe
// turkish
"tur": "tr", // Erai-Raws | ffprobe
// swedish
"swe": "sv", // Erai-Raws | ffprobe
"se": "sv", // Erai-Raws
// greek
"gre": "el", // Erai-Raws | ffprobe
"gr": "el", // Erai-Raws
// hebrew
"heb": "he", // Erai-Raws | ffprobe
"il": "he", // Erai-Raws
// romanian
"rum": "ro", // Erai-Raws
"rom": "ro", // ffprobe
// indonesian
"ind": "id", // Erai-Raws
// thai
"tha": "th", // Erai-Raws | ffprobe
// korean
"kor": "ko", // Erai-Raws | ffprobe
"kr": "ko", // Erai-Raws
// danish
"dan": "da", // Erai-Raws | ffprobe
"dk": "da", // Erai-Raws
// chinese (simplified & traditional)
"chi": "zh", // Erai-Raws | ffprobe
"cn": "zh", // Erai-Raws
// bulgarian
"bul": "bg", // Erai-Raws | ffprobe
// vietnamese
"vie": "vi", // Erai-Raws
"vn": "vi", // Erai-Raws
// hindi
"hin": "hi", // Erai-Raws
"in": "hi", // Erai-Raws
// tamil
"tel": "ta", // Erai-Raws
"lk": "ta", // Erai-Raws
// ukrainian
"ukr": "uk", // Erai-Raws
"ua": "uk", // Erai-Raws
// hungarian
"hun": "hu", // Erai-Raws
// czech
"ces": "cs", // Erai-Raws
"cz": "cs", // Erai-Raws
// croatian
"hrv": "hr", // Erai-Raws
// malaysian
"may": "ms", // Erai-Raws
"my": "ms", // Erai-Raws
// slovakian
"slk": "sk", // Erai-Raws
// filipino
"fil": "tl", // Erai-Raws
"ph": "tl", // Erai-Raws
}
// ParseLanguage converts irregular language codes to ISO 639-1
func ParseLanguage(str string) string {
if code, ok := langSynonyms[strings.ToLower(str)]; ok {
return code
}
return strings.ToLower(str)
}
// ParseLanguages converts multiple irregular language codes to ISO 639-1.
// It simply calls ParseLanguage for each language code
func ParseLanguages(langCodes []string) []string {
codes := make([]string, 0, len(langCodes))
for _, irregularLangCode := range langCodes {
codes = append(codes, ParseLanguage(irregularLangCode))
}
return codes
}

56
parse_file.go Normal file
View File

@ -0,0 +1,56 @@
package parsers
import (
"path/filepath"
"regexp"
"git.tordarus.net/nyaanime/model"
)
// FileParseOptions holds the subgroup index in which information can be found in a given regex
type FileParseOptions struct {
Name int
Episode int
}
func RegexFileParser(regex string, options FileParseOptions) model.FileParserFunc {
pattern := regexp.MustCompile(regex)
// handle faulty regexes
if options.Name == 0 || options.Episode == 0 {
panic(ErrTorrentParserInsufficientData.New(regex))
}
// handle faulty group references
for _, g := range []int{options.Name, options.Episode} {
if g > pattern.NumSubexp() {
panic(ErrTorrentParserInvalidGroupReference.New(g, pattern.NumSubexp()))
}
}
return func(parser *model.Parser, path string) (file *model.ParsedFile, ok bool) {
filename := filepath.Base(path)
matches := pattern.FindStringSubmatch(filename)
if matches == nil {
return nil, false
}
episode, ok := atoi(matches[options.Episode])
if !ok {
return nil, false
}
parsedFile, err := AnalyzeFile(path)
if err != nil {
return nil, false
}
parsedFile.OriginalAnimeTitle = matches[options.Name]
parsedFile.Episode = episode
parsedFile.Parser = parser
parsedFile.File = path
return parsedFile, true
}
}

11
parse_language.go Normal file
View File

@ -0,0 +1,11 @@
package parsers
import "regexp"
type LanguageParserFunc func(str string) []string
var SquareBracketsLanguageParserRegex = regexp.MustCompile(`\[.+?\]`)
func SquareBracketsLanguageParser(str string) []string {
return TrimPrefixSuffix(SquareBracketsLanguageParserRegex.FindAllString(str, -1), "[", "]")
}

93
parse_torrent.go Normal file
View File

@ -0,0 +1,93 @@
package parsers
import (
"regexp"
"git.tordarus.net/nyaanime/model"
)
// TorrentParseOptions holds the subgroup index in which information can be found in a given regex
// as well as some parser specific functions
type TorrentParseOptions struct {
// regex group references
Name int
Episode int
Languages int
Subtitles int
Resolution int
// language parsers
LanguageParser LanguageParserFunc
SubtitleParser LanguageParserFunc
// default values used when group reference is 0
DefaultLanguages []string
DefaultSubtitles []string
DefaultResolution model.Resolution
}
func RegexTorrentParser(regex string, options TorrentParseOptions) model.TorrentParserFunc {
pattern := regexp.MustCompile(regex)
// handle faulty regexes
if options.Name == 0 || options.Episode == 0 {
panic(ErrTorrentParserInsufficientData.New(regex))
} else if options.Languages == 0 && options.DefaultLanguages == nil {
panic(ErrTorrentParserInsufficientLanguageData.New(regex))
} else if options.Subtitles == 0 && options.DefaultSubtitles == nil {
panic(ErrTorrentParserInsufficientSubtitleData.New(regex))
} else if options.Resolution == 0 && options.DefaultResolution == 0 {
panic(ErrTorrentParserInsufficientResolutionData.New(regex))
}
// handle faulty group references
for _, g := range []int{options.Name, options.Episode, options.Languages, options.Subtitles, options.Resolution} {
if g > pattern.NumSubexp() {
panic(ErrTorrentParserInvalidGroupReference.New(g, pattern.NumSubexp()))
}
}
return func(parser *model.Parser, torrent *model.Torrent) (ParsedTorrent *model.ParsedTorrent, ok bool) {
var err error
matches := pattern.FindStringSubmatch(torrent.Title)
if matches == nil {
return nil, false
}
episode, ok := atoi(matches[options.Episode])
if !ok {
return nil, false
}
resolution := options.DefaultResolution
if options.Resolution != 0 {
resolution, err = model.ParseResolution(matches[options.Resolution])
if err != nil {
return nil, false
}
}
languages := options.DefaultLanguages
if options.Languages != 0 {
languages = options.LanguageParser(matches[options.Languages])
}
subtitles := options.DefaultSubtitles
if options.Subtitles != 0 {
subtitles = options.SubtitleParser(matches[options.Subtitles])
}
return &model.ParsedTorrent{
OriginalAnimeTitle: matches[options.Name],
Episode: episode,
Resolution: resolution,
Parser: parser,
Languages: ParseLanguages(languages),
Subtitles: ParseLanguages(subtitles),
Torrent: torrent,
}, true
}
}

128
parsers.go Normal file
View File

@ -0,0 +1,128 @@
package parsers
import "git.tordarus.net/nyaanime/model"
/*
how to get all torrent names on a nyaa page:
let s = "";
document.querySelectorAll("tr > td:nth-child(2) > a:not(.comments)").forEach(element => {
s += element.textContent + "\n";
})
console.log(s);
*/
var Parsers = []model.Parser{
{
Identity: "Erai-Raws",
TorrentParser: RegexTorrentParser(
`^\[Erai-raws\] (.*) - (.*?) (?:END )?(?:\[v\d+\])?\[(.*?)p\](?:\[HEVC\])?(?:\[Multiple Subtitle\])?(?:\s(\[.*?\]+)?|\[[A-Z0-9]{8}\]\.mkv)$`,
TorrentParseOptions{
Name: 1,
Episode: 2,
Resolution: 3,
Subtitles: 4,
SubtitleParser: SquareBracketsLanguageParser,
DefaultLanguages: []string{"ja"},
},
),
FileParser: RegexFileParser(
`^\[Erai-raws\] (.*?) - (\d+?) .*?\.mkv$`,
FileParseOptions{
Name: 1,
Episode: 2,
},
),
},
{
Identity: "SubsPlease",
TorrentParser: RegexTorrentParser(
`^\[SubsPlease\] (.*) - (\d+?) \((.*?)\) \[.*?\].mkv$`,
TorrentParseOptions{
Name: 1,
Episode: 2,
Resolution: 3,
DefaultLanguages: []string{"ja"},
DefaultSubtitles: []string{"en"},
},
),
FileParser: RegexFileParser(
`^\[SubsPlease\] (.*?) - (\d+?) .*?\.mkv$`,
FileParseOptions{
Name: 1,
Episode: 2,
},
),
},
{
Identity: "PuyaSubs!",
TorrentParser: RegexTorrentParser(
`^\[PuyaSubs!\] (.*) - (\d+?) \[ESP-ENG\]\[(.*?)\]\[.*?\]\.mkv$`,
TorrentParseOptions{
Name: 1,
Episode: 2,
Resolution: 3,
DefaultLanguages: []string{"ja"},
DefaultSubtitles: []string{"en"},
},
),
FileParser: RegexFileParser(
`^\[PuyaSubs!\] (.*?) - (\d+?) .*?\.mkv$`,
FileParseOptions{
Name: 1,
Episode: 2,
},
),
// tag 0th audio stream as japanese language and copy all other streams unchanged into output file
FileEncoding: "-map 0 -c:v copy -c:a copy -c:s copy -metadata:s:a:0 language=jpn",
},
{
Identity: "NanakoRaws-JP",
TorrentParser: RegexTorrentParser(
`^\[NanakoRaws\] (.*?) - (\d+?)(?:v\d+)?(?: END)? \((.*?)p\)(?: \(.*?\))?\.mkv \(include JPsub.*?\)$`,
TorrentParseOptions{
Name: 1,
Episode: 2,
Resolution: 3,
DefaultLanguages: []string{"ja"},
DefaultSubtitles: []string{"ja"},
},
),
FileParser: RegexFileParser(
`^\[NanakoRaws\] (.*?) - (\d+?)(?:v\d+)?(?: END)? \((.*?)p\)(?: \(.*?\))?\.mkv$`,
FileParseOptions{
Name: 1,
Episode: 2,
},
),
// tag 0th audio stream and subtitle stream as japanese language and copy all other streams unchanged into output file
FileEncoding: "-map 0 -c:v copy -c:a copy -c:s copy -metadata:s:a:0 language=jpn -metadata:s:s:0 language=jpn",
},
{
Identity: "Ohys-Raws",
TorrentParser: RegexTorrentParser(
`^\[Ohys-Raws\] (.*?) - (\d+?) \(.*? \d+x(\d+?) .*?\)(?: v2)?\.mp4$`,
TorrentParseOptions{
Name: 1,
Episode: 2,
Resolution: 3,
DefaultLanguages: []string{"ja"},
DefaultSubtitles: []string{},
},
),
FileParser: RegexFileParser(
`^\[Ohys-Raws\] (.*) - (.*?) (?:END )?\(.*?\)(?: v2)?\.mp4$`,
FileParseOptions{
Name: 1,
Episode: 2,
},
),
},
}

22
utils.go Normal file
View File

@ -0,0 +1,22 @@
package parsers
import (
"strconv"
"strings"
)
func atoi(s string) (int, bool) {
v, err := strconv.Atoi(s)
if err != nil {
return 0, false
}
return v, true
}
func TrimPrefixSuffix(arr []string, prefix, suffix string) []string {
trims := make([]string, 0, len(arr))
for _, str := range arr {
trims = append(trims, strings.TrimSuffix(strings.TrimPrefix(str, prefix), suffix))
}
return trims
}