initial commit
This commit is contained in:
52
analyze_file.go
Normal file
52
analyze_file.go
Normal file
@ -0,0 +1,52 @@
|
||||
package parsers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
"git.tordarus.net/nyaanime/model"
|
||||
"gopkg.in/vansante/go-ffprobe.v2"
|
||||
)
|
||||
|
||||
// TODO cache
|
||||
func AnalyzeFile(path string) (*model.ParsedFile, error) {
|
||||
props := &model.ParsedFile{File: path}
|
||||
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
data, err := ffprobe.ProbeReader(context.Background(), file)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defaultVideoLang := ""
|
||||
for _, s := range data.StreamType(ffprobe.StreamVideo) {
|
||||
if s.Disposition.Default > 0 {
|
||||
props.Resolution = model.Resolution(s.Height)
|
||||
defaultVideoLang = ParseLanguage(s.Tags.Language)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
for _, s := range data.StreamType(ffprobe.StreamAudio) {
|
||||
if s.Tags.Language != "" {
|
||||
props.Languages = append(props.Languages, ParseLanguage(s.Tags.Language))
|
||||
} else if s.Disposition.Default > 0 {
|
||||
props.Languages = append(props.Languages, defaultVideoLang)
|
||||
}
|
||||
}
|
||||
|
||||
for _, s := range data.StreamType(ffprobe.StreamSubtitle) {
|
||||
if s.Tags.Language != "" {
|
||||
props.Subtitles = append(props.Subtitles, ParseLanguage(s.Tags.Language))
|
||||
} else if s.Disposition.Default > 0 {
|
||||
props.Subtitles = append(props.Subtitles, defaultVideoLang)
|
||||
}
|
||||
}
|
||||
|
||||
return props, nil
|
||||
}
|
11
errors.go
Normal file
11
errors.go
Normal file
@ -0,0 +1,11 @@
|
||||
package parsers
|
||||
|
||||
import "git.tordarus.net/tordarus/adverr/v2"
|
||||
|
||||
var (
|
||||
ErrTorrentParserInsufficientData = adverr.NewErrTmpl("ErrTorrentParserInsufficientData", "regex '%s' must at least provide title and episode")
|
||||
ErrTorrentParserInsufficientLanguageData = adverr.NewErrTmpl("ErrTorrentParserInsufficientLanguageData", "no language reference in regex and no default language set")
|
||||
ErrTorrentParserInsufficientSubtitleData = adverr.NewErrTmpl("ErrTorrentParserInsufficientSubtitleData", "no subtitle reference in regex and no default subtitle set")
|
||||
ErrTorrentParserInsufficientResolutionData = adverr.NewErrTmpl("ErrTorrentParserInsufficientResolutionData", "no resolution reference in regex and no default resolution set")
|
||||
ErrTorrentParserInvalidGroupReference = adverr.NewErrTmpl("ErrTorrentParserInvalidGroupReference", "options references group %d but regex only has %d groups")
|
||||
)
|
11
go.mod
Normal file
11
go.mod
Normal file
@ -0,0 +1,11 @@
|
||||
module git.tordarus.net/tordarus/parsers
|
||||
|
||||
go 1.18
|
||||
|
||||
require (
|
||||
git.tordarus.net/nyaanime/model v0.0.1
|
||||
git.tordarus.net/tordarus/adverr/v2 v2.0.2
|
||||
gopkg.in/vansante/go-ffprobe.v2 v2.2.1
|
||||
)
|
||||
|
||||
require git.tordarus.net/tordarus/anilist v1.5.2 // indirect
|
8
go.sum
Normal file
8
go.sum
Normal file
@ -0,0 +1,8 @@
|
||||
git.tordarus.net/nyaanime/model v0.0.1 h1:/I+87Z6eEw/o2adltKnCk4FZai2mPekjYlzEjY1ppyQ=
|
||||
git.tordarus.net/nyaanime/model v0.0.1/go.mod h1:oHV82UMNy4XgPHkI6tZiwabdi6myqHXgjMi9sNZ+rG4=
|
||||
git.tordarus.net/tordarus/adverr/v2 v2.0.2 h1:7nvNjMMjtGPq0EY6duMiv+seJ7MacNvKSBmckHl6Erg=
|
||||
git.tordarus.net/tordarus/adverr/v2 v2.0.2/go.mod h1:gCC46KsWosZJh7MVNDEU99hKQoxEWZgHITDHtmFwwiQ=
|
||||
git.tordarus.net/tordarus/anilist v1.5.2 h1:SxlovS+e3lgL2SowQQwj8dQrIZzRFPomcGCw3V+My0Q=
|
||||
git.tordarus.net/tordarus/anilist v1.5.2/go.mod h1:Mrhx/9+8HJVj5ebQ5fJuXqL220tEJhgQIqFK2WKPXgA=
|
||||
gopkg.in/vansante/go-ffprobe.v2 v2.2.1 h1:sFV08OT1eZ1yroLCZVClIVd9YySgCh9eGjBWO0oRayI=
|
||||
gopkg.in/vansante/go-ffprobe.v2 v2.2.1/go.mod h1:qF0AlAjk7Nqzqf3y333Ly+KxN3cKF2JqA3JT5ZheUGE=
|
148
lang_codes.go
Normal file
148
lang_codes.go
Normal file
@ -0,0 +1,148 @@
|
||||
package parsers
|
||||
|
||||
import "strings"
|
||||
|
||||
// langSynonyms converts all irregular lang codes to ISO 639-1
|
||||
var langSynonyms = map[string]string{
|
||||
// english
|
||||
"eng": "en", // Erai-Raws | ffprobe
|
||||
"us": "en", // Erai-Raws
|
||||
|
||||
// portuguese
|
||||
"por-br": "pt", // Erai-Raws
|
||||
"por": "pt", // Erai-Raws | ffprobe
|
||||
"br": "pt", // Erai-Raws
|
||||
|
||||
// spanish
|
||||
"spa-la": "es", // Erai-Raws
|
||||
"spa": "es", // Erai-Raws | ffprobe
|
||||
"mx": "es", // Erai-Raws
|
||||
|
||||
// arabic
|
||||
"ara": "ar", // Erai-Raws | ffprobe
|
||||
"sa": "ar", // Erai-Raws
|
||||
|
||||
// french
|
||||
"fre": "fr", // Erai-Raws | ffprobe
|
||||
|
||||
// german
|
||||
"ger": "de", // Erai-Raws | ffprobe
|
||||
|
||||
// italian
|
||||
"ita": "it", // Erai-Raws | ffprobe
|
||||
|
||||
// finnish
|
||||
"fin": "fi", // ffprobe | ffprobe
|
||||
|
||||
// russian
|
||||
"rus": "ru", // Erai-Raws
|
||||
|
||||
// japanese
|
||||
"jpn": "ja", // Erai-Raws | ffprobe
|
||||
"jp": "ja", // Erai-Raws
|
||||
|
||||
// polish
|
||||
"pol": "pl", // Erai-Raws | ffprobe
|
||||
|
||||
// dutch
|
||||
"dut": "nl", // Erai-Raws | ffprobe
|
||||
|
||||
// norwegian
|
||||
"nob": "no", // Erai-Raws | ffprobe
|
||||
|
||||
// turkish
|
||||
"tur": "tr", // Erai-Raws | ffprobe
|
||||
|
||||
// swedish
|
||||
"swe": "sv", // Erai-Raws | ffprobe
|
||||
"se": "sv", // Erai-Raws
|
||||
|
||||
// greek
|
||||
"gre": "el", // Erai-Raws | ffprobe
|
||||
"gr": "el", // Erai-Raws
|
||||
|
||||
// hebrew
|
||||
"heb": "he", // Erai-Raws | ffprobe
|
||||
"il": "he", // Erai-Raws
|
||||
|
||||
// romanian
|
||||
"rum": "ro", // Erai-Raws
|
||||
"rom": "ro", // ffprobe
|
||||
|
||||
// indonesian
|
||||
"ind": "id", // Erai-Raws
|
||||
|
||||
// thai
|
||||
"tha": "th", // Erai-Raws | ffprobe
|
||||
|
||||
// korean
|
||||
"kor": "ko", // Erai-Raws | ffprobe
|
||||
"kr": "ko", // Erai-Raws
|
||||
|
||||
// danish
|
||||
"dan": "da", // Erai-Raws | ffprobe
|
||||
"dk": "da", // Erai-Raws
|
||||
|
||||
// chinese (simplified & traditional)
|
||||
"chi": "zh", // Erai-Raws | ffprobe
|
||||
"cn": "zh", // Erai-Raws
|
||||
|
||||
// bulgarian
|
||||
"bul": "bg", // Erai-Raws | ffprobe
|
||||
|
||||
// vietnamese
|
||||
"vie": "vi", // Erai-Raws
|
||||
"vn": "vi", // Erai-Raws
|
||||
|
||||
// hindi
|
||||
"hin": "hi", // Erai-Raws
|
||||
"in": "hi", // Erai-Raws
|
||||
|
||||
// tamil
|
||||
"tel": "ta", // Erai-Raws
|
||||
"lk": "ta", // Erai-Raws
|
||||
|
||||
// ukrainian
|
||||
"ukr": "uk", // Erai-Raws
|
||||
"ua": "uk", // Erai-Raws
|
||||
|
||||
// hungarian
|
||||
"hun": "hu", // Erai-Raws
|
||||
|
||||
// czech
|
||||
"ces": "cs", // Erai-Raws
|
||||
"cz": "cs", // Erai-Raws
|
||||
|
||||
// croatian
|
||||
"hrv": "hr", // Erai-Raws
|
||||
|
||||
// malaysian
|
||||
"may": "ms", // Erai-Raws
|
||||
"my": "ms", // Erai-Raws
|
||||
|
||||
// slovakian
|
||||
"slk": "sk", // Erai-Raws
|
||||
|
||||
// filipino
|
||||
"fil": "tl", // Erai-Raws
|
||||
"ph": "tl", // Erai-Raws
|
||||
}
|
||||
|
||||
// ParseLanguage converts irregular language codes to ISO 639-1
|
||||
func ParseLanguage(str string) string {
|
||||
if code, ok := langSynonyms[strings.ToLower(str)]; ok {
|
||||
return code
|
||||
}
|
||||
|
||||
return strings.ToLower(str)
|
||||
}
|
||||
|
||||
// ParseLanguages converts multiple irregular language codes to ISO 639-1.
|
||||
// It simply calls ParseLanguage for each language code
|
||||
func ParseLanguages(langCodes []string) []string {
|
||||
codes := make([]string, 0, len(langCodes))
|
||||
for _, irregularLangCode := range langCodes {
|
||||
codes = append(codes, ParseLanguage(irregularLangCode))
|
||||
}
|
||||
return codes
|
||||
}
|
56
parse_file.go
Normal file
56
parse_file.go
Normal file
@ -0,0 +1,56 @@
|
||||
package parsers
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
|
||||
"git.tordarus.net/nyaanime/model"
|
||||
)
|
||||
|
||||
// FileParseOptions holds the subgroup index in which information can be found in a given regex
|
||||
type FileParseOptions struct {
|
||||
Name int
|
||||
Episode int
|
||||
}
|
||||
|
||||
func RegexFileParser(regex string, options FileParseOptions) model.FileParserFunc {
|
||||
pattern := regexp.MustCompile(regex)
|
||||
|
||||
// handle faulty regexes
|
||||
if options.Name == 0 || options.Episode == 0 {
|
||||
panic(ErrTorrentParserInsufficientData.New(regex))
|
||||
}
|
||||
|
||||
// handle faulty group references
|
||||
for _, g := range []int{options.Name, options.Episode} {
|
||||
if g > pattern.NumSubexp() {
|
||||
panic(ErrTorrentParserInvalidGroupReference.New(g, pattern.NumSubexp()))
|
||||
}
|
||||
}
|
||||
|
||||
return func(parser *model.Parser, path string) (file *model.ParsedFile, ok bool) {
|
||||
filename := filepath.Base(path)
|
||||
matches := pattern.FindStringSubmatch(filename)
|
||||
|
||||
if matches == nil {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
episode, ok := atoi(matches[options.Episode])
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
parsedFile, err := AnalyzeFile(path)
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
parsedFile.OriginalAnimeTitle = matches[options.Name]
|
||||
parsedFile.Episode = episode
|
||||
parsedFile.Parser = parser
|
||||
parsedFile.File = path
|
||||
|
||||
return parsedFile, true
|
||||
}
|
||||
}
|
11
parse_language.go
Normal file
11
parse_language.go
Normal file
@ -0,0 +1,11 @@
|
||||
package parsers
|
||||
|
||||
import "regexp"
|
||||
|
||||
type LanguageParserFunc func(str string) []string
|
||||
|
||||
var SquareBracketsLanguageParserRegex = regexp.MustCompile(`\[.+?\]`)
|
||||
|
||||
func SquareBracketsLanguageParser(str string) []string {
|
||||
return TrimPrefixSuffix(SquareBracketsLanguageParserRegex.FindAllString(str, -1), "[", "]")
|
||||
}
|
93
parse_torrent.go
Normal file
93
parse_torrent.go
Normal file
@ -0,0 +1,93 @@
|
||||
package parsers
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
"git.tordarus.net/nyaanime/model"
|
||||
)
|
||||
|
||||
// TorrentParseOptions holds the subgroup index in which information can be found in a given regex
|
||||
// as well as some parser specific functions
|
||||
type TorrentParseOptions struct {
|
||||
// regex group references
|
||||
Name int
|
||||
Episode int
|
||||
Languages int
|
||||
Subtitles int
|
||||
Resolution int
|
||||
|
||||
// language parsers
|
||||
LanguageParser LanguageParserFunc
|
||||
SubtitleParser LanguageParserFunc
|
||||
|
||||
// default values used when group reference is 0
|
||||
DefaultLanguages []string
|
||||
DefaultSubtitles []string
|
||||
DefaultResolution model.Resolution
|
||||
}
|
||||
|
||||
func RegexTorrentParser(regex string, options TorrentParseOptions) model.TorrentParserFunc {
|
||||
pattern := regexp.MustCompile(regex)
|
||||
|
||||
// handle faulty regexes
|
||||
if options.Name == 0 || options.Episode == 0 {
|
||||
panic(ErrTorrentParserInsufficientData.New(regex))
|
||||
} else if options.Languages == 0 && options.DefaultLanguages == nil {
|
||||
panic(ErrTorrentParserInsufficientLanguageData.New(regex))
|
||||
} else if options.Subtitles == 0 && options.DefaultSubtitles == nil {
|
||||
panic(ErrTorrentParserInsufficientSubtitleData.New(regex))
|
||||
} else if options.Resolution == 0 && options.DefaultResolution == 0 {
|
||||
panic(ErrTorrentParserInsufficientResolutionData.New(regex))
|
||||
}
|
||||
|
||||
// handle faulty group references
|
||||
for _, g := range []int{options.Name, options.Episode, options.Languages, options.Subtitles, options.Resolution} {
|
||||
if g > pattern.NumSubexp() {
|
||||
panic(ErrTorrentParserInvalidGroupReference.New(g, pattern.NumSubexp()))
|
||||
}
|
||||
}
|
||||
|
||||
return func(parser *model.Parser, torrent *model.Torrent) (ParsedTorrent *model.ParsedTorrent, ok bool) {
|
||||
var err error
|
||||
|
||||
matches := pattern.FindStringSubmatch(torrent.Title)
|
||||
|
||||
if matches == nil {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
episode, ok := atoi(matches[options.Episode])
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
resolution := options.DefaultResolution
|
||||
if options.Resolution != 0 {
|
||||
resolution, err = model.ParseResolution(matches[options.Resolution])
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
}
|
||||
|
||||
languages := options.DefaultLanguages
|
||||
if options.Languages != 0 {
|
||||
languages = options.LanguageParser(matches[options.Languages])
|
||||
}
|
||||
|
||||
subtitles := options.DefaultSubtitles
|
||||
if options.Subtitles != 0 {
|
||||
subtitles = options.SubtitleParser(matches[options.Subtitles])
|
||||
}
|
||||
|
||||
return &model.ParsedTorrent{
|
||||
OriginalAnimeTitle: matches[options.Name],
|
||||
Episode: episode,
|
||||
Resolution: resolution,
|
||||
Parser: parser,
|
||||
Languages: ParseLanguages(languages),
|
||||
Subtitles: ParseLanguages(subtitles),
|
||||
|
||||
Torrent: torrent,
|
||||
}, true
|
||||
}
|
||||
}
|
128
parsers.go
Normal file
128
parsers.go
Normal file
@ -0,0 +1,128 @@
|
||||
package parsers
|
||||
|
||||
import "git.tordarus.net/nyaanime/model"
|
||||
|
||||
/*
|
||||
how to get all torrent names on a nyaa page:
|
||||
|
||||
let s = "";
|
||||
document.querySelectorAll("tr > td:nth-child(2) > a:not(.comments)").forEach(element => {
|
||||
s += element.textContent + "\n";
|
||||
})
|
||||
console.log(s);
|
||||
|
||||
*/
|
||||
|
||||
var Parsers = []model.Parser{
|
||||
{
|
||||
Identity: "Erai-Raws",
|
||||
TorrentParser: RegexTorrentParser(
|
||||
`^\[Erai-raws\] (.*) - (.*?) (?:END )?(?:\[v\d+\])?\[(.*?)p\](?:\[HEVC\])?(?:\[Multiple Subtitle\])?(?:\s(\[.*?\]+)?|\[[A-Z0-9]{8}\]\.mkv)$`,
|
||||
TorrentParseOptions{
|
||||
Name: 1,
|
||||
Episode: 2,
|
||||
Resolution: 3,
|
||||
Subtitles: 4,
|
||||
SubtitleParser: SquareBracketsLanguageParser,
|
||||
DefaultLanguages: []string{"ja"},
|
||||
},
|
||||
),
|
||||
FileParser: RegexFileParser(
|
||||
`^\[Erai-raws\] (.*?) - (\d+?) .*?\.mkv$`,
|
||||
FileParseOptions{
|
||||
Name: 1,
|
||||
Episode: 2,
|
||||
},
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
Identity: "SubsPlease",
|
||||
TorrentParser: RegexTorrentParser(
|
||||
`^\[SubsPlease\] (.*) - (\d+?) \((.*?)\) \[.*?\].mkv$`,
|
||||
TorrentParseOptions{
|
||||
Name: 1,
|
||||
Episode: 2,
|
||||
Resolution: 3,
|
||||
DefaultLanguages: []string{"ja"},
|
||||
DefaultSubtitles: []string{"en"},
|
||||
},
|
||||
),
|
||||
FileParser: RegexFileParser(
|
||||
`^\[SubsPlease\] (.*?) - (\d+?) .*?\.mkv$`,
|
||||
FileParseOptions{
|
||||
Name: 1,
|
||||
Episode: 2,
|
||||
},
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
Identity: "PuyaSubs!",
|
||||
TorrentParser: RegexTorrentParser(
|
||||
`^\[PuyaSubs!\] (.*) - (\d+?) \[ESP-ENG\]\[(.*?)\]\[.*?\]\.mkv$`,
|
||||
TorrentParseOptions{
|
||||
Name: 1,
|
||||
Episode: 2,
|
||||
Resolution: 3,
|
||||
DefaultLanguages: []string{"ja"},
|
||||
DefaultSubtitles: []string{"en"},
|
||||
},
|
||||
),
|
||||
FileParser: RegexFileParser(
|
||||
`^\[PuyaSubs!\] (.*?) - (\d+?) .*?\.mkv$`,
|
||||
FileParseOptions{
|
||||
Name: 1,
|
||||
Episode: 2,
|
||||
},
|
||||
),
|
||||
|
||||
// tag 0th audio stream as japanese language and copy all other streams unchanged into output file
|
||||
FileEncoding: "-map 0 -c:v copy -c:a copy -c:s copy -metadata:s:a:0 language=jpn",
|
||||
},
|
||||
|
||||
{
|
||||
Identity: "NanakoRaws-JP",
|
||||
TorrentParser: RegexTorrentParser(
|
||||
`^\[NanakoRaws\] (.*?) - (\d+?)(?:v\d+)?(?: END)? \((.*?)p\)(?: \(.*?\))?\.mkv \(include JPsub.*?\)$`,
|
||||
TorrentParseOptions{
|
||||
Name: 1,
|
||||
Episode: 2,
|
||||
Resolution: 3,
|
||||
DefaultLanguages: []string{"ja"},
|
||||
DefaultSubtitles: []string{"ja"},
|
||||
},
|
||||
),
|
||||
FileParser: RegexFileParser(
|
||||
`^\[NanakoRaws\] (.*?) - (\d+?)(?:v\d+)?(?: END)? \((.*?)p\)(?: \(.*?\))?\.mkv$`,
|
||||
FileParseOptions{
|
||||
Name: 1,
|
||||
Episode: 2,
|
||||
},
|
||||
),
|
||||
|
||||
// tag 0th audio stream and subtitle stream as japanese language and copy all other streams unchanged into output file
|
||||
FileEncoding: "-map 0 -c:v copy -c:a copy -c:s copy -metadata:s:a:0 language=jpn -metadata:s:s:0 language=jpn",
|
||||
},
|
||||
|
||||
{
|
||||
Identity: "Ohys-Raws",
|
||||
TorrentParser: RegexTorrentParser(
|
||||
`^\[Ohys-Raws\] (.*?) - (\d+?) \(.*? \d+x(\d+?) .*?\)(?: v2)?\.mp4$`,
|
||||
TorrentParseOptions{
|
||||
Name: 1,
|
||||
Episode: 2,
|
||||
Resolution: 3,
|
||||
DefaultLanguages: []string{"ja"},
|
||||
DefaultSubtitles: []string{},
|
||||
},
|
||||
),
|
||||
FileParser: RegexFileParser(
|
||||
`^\[Ohys-Raws\] (.*) - (.*?) (?:END )?\(.*?\)(?: v2)?\.mp4$`,
|
||||
FileParseOptions{
|
||||
Name: 1,
|
||||
Episode: 2,
|
||||
},
|
||||
),
|
||||
},
|
||||
}
|
22
utils.go
Normal file
22
utils.go
Normal file
@ -0,0 +1,22 @@
|
||||
package parsers
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func atoi(s string) (int, bool) {
|
||||
v, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
return v, true
|
||||
}
|
||||
|
||||
func TrimPrefixSuffix(arr []string, prefix, suffix string) []string {
|
||||
trims := make([]string, 0, len(arr))
|
||||
for _, str := range arr {
|
||||
trims = append(trims, strings.TrimSuffix(strings.TrimPrefix(str, prefix), suffix))
|
||||
}
|
||||
return trims
|
||||
}
|
Reference in New Issue
Block a user