commit 58b7437ef81e817aa48b1b622054f6315b247dbf
Author: Tordarus <tordarus@protonmail.com>
Date:   Fri Jun 6 16:35:29 2025 +0200

    initial commit

diff --git a/analyze_file.go b/analyze_file.go
new file mode 100644
index 0000000..639f781
--- /dev/null
+++ b/analyze_file.go
@@ -0,0 +1,52 @@
+package parsers
+
+import (
+	"context"
+	"os"
+
+	"git.tordarus.net/nyaanime/model"
+	"gopkg.in/vansante/go-ffprobe.v2"
+)
+
+// TODO cache
+func AnalyzeFile(path string) (*model.ParsedFile, error) {
+	props := &model.ParsedFile{File: path}
+
+	file, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer file.Close()
+
+	data, err := ffprobe.ProbeReader(context.Background(), file)
+	if err != nil {
+		return nil, err
+	}
+
+	defaultVideoLang := ""
+	for _, s := range data.StreamType(ffprobe.StreamVideo) {
+		if s.Disposition.Default > 0 {
+			props.Resolution = model.Resolution(s.Height)
+			defaultVideoLang = ParseLanguage(s.Tags.Language)
+			break
+		}
+	}
+
+	for _, s := range data.StreamType(ffprobe.StreamAudio) {
+		if s.Tags.Language != "" {
+			props.Languages = append(props.Languages, ParseLanguage(s.Tags.Language))
+		} else if s.Disposition.Default > 0 {
+			props.Languages = append(props.Languages, defaultVideoLang)
+		}
+	}
+
+	for _, s := range data.StreamType(ffprobe.StreamSubtitle) {
+		if s.Tags.Language != "" {
+			props.Subtitles = append(props.Subtitles, ParseLanguage(s.Tags.Language))
+		} else if s.Disposition.Default > 0 {
+			props.Subtitles = append(props.Subtitles, defaultVideoLang)
+		}
+	}
+
+	return props, nil
+}
diff --git a/errors.go b/errors.go
new file mode 100644
index 0000000..8af1f88
--- /dev/null
+++ b/errors.go
@@ -0,0 +1,11 @@
+package parsers
+
+import "git.tordarus.net/tordarus/adverr/v2"
+
+var (
+	ErrTorrentParserInsufficientData           = adverr.NewErrTmpl("ErrTorrentParserInsufficientData", "regex '%s' must at least provide title and episode")
+	ErrTorrentParserInsufficientLanguageData   = adverr.NewErrTmpl("ErrTorrentParserInsufficientLanguageData", "no language reference in regex and no default language set")
+	ErrTorrentParserInsufficientSubtitleData   = adverr.NewErrTmpl("ErrTorrentParserInsufficientSubtitleData", "no subtitle reference in regex and no default subtitle set")
+	ErrTorrentParserInsufficientResolutionData = adverr.NewErrTmpl("ErrTorrentParserInsufficientResolutionData", "no resolution reference in regex and no default resolution set")
+	ErrTorrentParserInvalidGroupReference      = adverr.NewErrTmpl("ErrTorrentParserInvalidGroupReference", "options references group %d but regex only has %d groups")
+)
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..e9497f6
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,11 @@
+module git.tordarus.net/tordarus/parsers
+
+go 1.18
+
+require (
+	git.tordarus.net/nyaanime/model v0.0.1
+	git.tordarus.net/tordarus/adverr/v2 v2.0.2
+	gopkg.in/vansante/go-ffprobe.v2 v2.2.1
+)
+
+require git.tordarus.net/tordarus/anilist v1.5.2 // indirect
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..205a139
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,8 @@
+git.tordarus.net/nyaanime/model v0.0.1 h1:/I+87Z6eEw/o2adltKnCk4FZai2mPekjYlzEjY1ppyQ=
+git.tordarus.net/nyaanime/model v0.0.1/go.mod h1:oHV82UMNy4XgPHkI6tZiwabdi6myqHXgjMi9sNZ+rG4=
+git.tordarus.net/tordarus/adverr/v2 v2.0.2 h1:7nvNjMMjtGPq0EY6duMiv+seJ7MacNvKSBmckHl6Erg=
+git.tordarus.net/tordarus/adverr/v2 v2.0.2/go.mod h1:gCC46KsWosZJh7MVNDEU99hKQoxEWZgHITDHtmFwwiQ=
+git.tordarus.net/tordarus/anilist v1.5.2 h1:SxlovS+e3lgL2SowQQwj8dQrIZzRFPomcGCw3V+My0Q=
+git.tordarus.net/tordarus/anilist v1.5.2/go.mod h1:Mrhx/9+8HJVj5ebQ5fJuXqL220tEJhgQIqFK2WKPXgA=
+gopkg.in/vansante/go-ffprobe.v2 v2.2.1 h1:sFV08OT1eZ1yroLCZVClIVd9YySgCh9eGjBWO0oRayI=
+gopkg.in/vansante/go-ffprobe.v2 v2.2.1/go.mod h1:qF0AlAjk7Nqzqf3y333Ly+KxN3cKF2JqA3JT5ZheUGE=
diff --git a/lang_codes.go b/lang_codes.go
new file mode 100644
index 0000000..7b6bb7a
--- /dev/null
+++ b/lang_codes.go
@@ -0,0 +1,148 @@
+package parsers
+
+import "strings"
+
+// langSynonyms converts all irregular lang codes to ISO 639-1
+var langSynonyms = map[string]string{
+	// english
+	"eng": "en", // Erai-Raws | ffprobe
+	"us":  "en", // Erai-Raws
+
+	// portuguese
+	"por-br": "pt", // Erai-Raws
+	"por":    "pt", // Erai-Raws | ffprobe
+	"br":     "pt", // Erai-Raws
+
+	// spanish
+	"spa-la": "es", // Erai-Raws
+	"spa":    "es", // Erai-Raws | ffprobe
+	"mx":     "es", // Erai-Raws
+
+	// arabic
+	"ara": "ar", // Erai-Raws | ffprobe
+	"sa":  "ar", // Erai-Raws
+
+	// french
+	"fre": "fr", // Erai-Raws | ffprobe
+
+	// german
+	"ger": "de", // Erai-Raws | ffprobe
+
+	// italian
+	"ita": "it", // Erai-Raws | ffprobe
+
+	// finnish
+	"fin": "fi", // ffprobe | ffprobe
+
+	// russian
+	"rus": "ru", // Erai-Raws
+
+	// japanese
+	"jpn": "ja", // Erai-Raws | ffprobe
+	"jp":  "ja", // Erai-Raws
+
+	// polish
+	"pol": "pl", // Erai-Raws | ffprobe
+
+	// dutch
+	"dut": "nl", // Erai-Raws | ffprobe
+
+	// norwegian
+	"nob": "no", // Erai-Raws | ffprobe
+
+	// turkish
+	"tur": "tr", // Erai-Raws | ffprobe
+
+	// swedish
+	"swe": "sv", // Erai-Raws | ffprobe
+	"se":  "sv", // Erai-Raws
+
+	// greek
+	"gre": "el", // Erai-Raws | ffprobe
+	"gr":  "el", // Erai-Raws
+
+	// hebrew
+	"heb": "he", // Erai-Raws | ffprobe
+	"il":  "he", // Erai-Raws
+
+	// romanian
+	"rum": "ro", // Erai-Raws
+	"rom": "ro", // ffprobe
+
+	// indonesian
+	"ind": "id", // Erai-Raws
+
+	// thai
+	"tha": "th", // Erai-Raws | ffprobe
+
+	// korean
+	"kor": "ko", // Erai-Raws | ffprobe
+	"kr":  "ko", // Erai-Raws
+
+	// danish
+	"dan": "da", // Erai-Raws | ffprobe
+	"dk":  "da", // Erai-Raws
+
+	// chinese (simplified & traditional)
+	"chi": "zh", // Erai-Raws | ffprobe
+	"cn":  "zh", // Erai-Raws
+
+	// bulgarian
+	"bul": "bg", // Erai-Raws | ffprobe
+
+	// vietnamese
+	"vie": "vi", // Erai-Raws
+	"vn":  "vi", // Erai-Raws
+
+	// hindi
+	"hin": "hi", // Erai-Raws
+	"in":  "hi", // Erai-Raws
+
+	// tamil
+	"tel": "ta", // Erai-Raws
+	"lk":  "ta", // Erai-Raws
+
+	// ukrainian
+	"ukr": "uk", // Erai-Raws
+	"ua":  "uk", // Erai-Raws
+
+	// hungarian
+	"hun": "hu", // Erai-Raws
+
+	// czech
+	"ces": "cs", // Erai-Raws
+	"cz":  "cs", // Erai-Raws
+
+	// croatian
+	"hrv": "hr", // Erai-Raws
+
+	// malaysian
+	"may": "ms", // Erai-Raws
+	"my":  "ms", // Erai-Raws
+
+	// slovakian
+	"slk": "sk", // Erai-Raws
+
+	// filipino
+	"fil": "tl", // Erai-Raws
+	"ph":  "tl", // Erai-Raws
+}
+
+// ParseLanguage converts irregular language codes to ISO 639-1
+func ParseLanguage(str string) string {
+	if code, ok := langSynonyms[strings.ToLower(str)]; ok {
+		return code
+	}
+
+	return strings.ToLower(str)
+}
+
+// ParseLanguages converts multiple irregular language codes to ISO 639-1.
+// It simply calls ParseLanguage for each language code
+func ParseLanguages(langCodes []string) []string {
+	codes := make([]string, 0, len(langCodes))
+	for _, irregularLangCode := range langCodes {
+		codes = append(codes, ParseLanguage(irregularLangCode))
+	}
+	return codes
+}
diff --git a/parse_file.go b/parse_file.go
new file mode 100644
index 0000000..afc08f1
--- /dev/null
+++ b/parse_file.go
@@ -0,0 +1,56 @@
+package parsers
+
+import (
+	"path/filepath"
+	"regexp"
+
+	"git.tordarus.net/nyaanime/model"
+)
+
+// FileParseOptions holds the subgroup index in which information can be found in a given regex
+type FileParseOptions struct {
+	Name    int
+	Episode int
+}
+
+func RegexFileParser(regex string, options FileParseOptions) model.FileParserFunc {
+	pattern := regexp.MustCompile(regex)
+
+	// handle faulty regexes
+	if options.Name == 0 || options.Episode == 0 {
+		panic(ErrTorrentParserInsufficientData.New(regex))
+	}
+
+	// handle faulty group references
+	for _, g := range []int{options.Name, options.Episode} {
+		if g > pattern.NumSubexp() {
+			panic(ErrTorrentParserInvalidGroupReference.New(g, pattern.NumSubexp()))
+		}
+	}
+
+	return func(parser *model.Parser, path string) (file *model.ParsedFile, ok bool) {
+		filename := filepath.Base(path)
+		matches := pattern.FindStringSubmatch(filename)
+
+		if matches == nil {
+			return nil, false
+		}
+
+		episode, ok := atoi(matches[options.Episode])
+		if !ok {
+			return nil, false
+		}
+
+		parsedFile, err := AnalyzeFile(path)
+		if err != nil {
+			return nil, false
+		}
+
+		parsedFile.OriginalAnimeTitle = matches[options.Name]
+		parsedFile.Episode = episode
+		parsedFile.Parser = parser
+		parsedFile.File = path
+
+		return parsedFile, true
+	}
+}
diff --git a/parse_language.go b/parse_language.go
new file mode 100644
index 0000000..1ecdbcb
--- /dev/null
+++ b/parse_language.go
@@ -0,0 +1,11 @@
+package parsers
+
+import "regexp"
+
+type LanguageParserFunc func(str string) []string
+
+var SquareBracketsLanguageParserRegex = regexp.MustCompile(`\[.+?\]`)
+
+func SquareBracketsLanguageParser(str string) []string {
+	return TrimPrefixSuffix(SquareBracketsLanguageParserRegex.FindAllString(str, -1), "[", "]")
+}
diff --git a/parse_torrent.go b/parse_torrent.go
new file mode 100644
index 0000000..49f1e65
--- /dev/null
+++ b/parse_torrent.go
@@ -0,0 +1,93 @@
+package parsers
+
+import (
+	"regexp"
+
+	"git.tordarus.net/nyaanime/model"
+)
+
+// TorrentParseOptions holds the subgroup index in which information can be found in a given regex
+// as well as some parser specific functions
+type TorrentParseOptions struct {
+	// regex group references
+	Name       int
+	Episode    int
+	Languages  int
+	Subtitles  int
+	Resolution int
+
+	// language parsers
+	LanguageParser LanguageParserFunc
+	SubtitleParser LanguageParserFunc
+
+	// default values used when group reference is 0
+	DefaultLanguages  []string
+	DefaultSubtitles  []string
+	DefaultResolution model.Resolution
+}
+
+func RegexTorrentParser(regex string, options TorrentParseOptions) model.TorrentParserFunc {
+	pattern := regexp.MustCompile(regex)
+
+	// handle faulty regexes
+	if options.Name == 0 || options.Episode == 0 {
+		panic(ErrTorrentParserInsufficientData.New(regex))
+	} else if options.Languages == 0 && options.DefaultLanguages == nil {
+		panic(ErrTorrentParserInsufficientLanguageData.New(regex))
+	} else if options.Subtitles == 0 && options.DefaultSubtitles == nil {
+		panic(ErrTorrentParserInsufficientSubtitleData.New(regex))
+	} else if options.Resolution == 0 && options.DefaultResolution == 0 {
+		panic(ErrTorrentParserInsufficientResolutionData.New(regex))
+	}
+
+	// handle faulty group references
+	for _, g := range []int{options.Name, options.Episode, options.Languages, options.Subtitles, options.Resolution} {
+		if g > pattern.NumSubexp() {
+			panic(ErrTorrentParserInvalidGroupReference.New(g, pattern.NumSubexp()))
+		}
+	}
+
+	return func(parser *model.Parser, torrent *model.Torrent) (ParsedTorrent *model.ParsedTorrent, ok bool) {
+		var err error
+
+		matches := pattern.FindStringSubmatch(torrent.Title)
+
+		if matches == nil {
+			return nil, false
+		}
+
+		episode, ok := atoi(matches[options.Episode])
+		if !ok {
+			return nil, false
+		}
+
+		resolution := options.DefaultResolution
+		if options.Resolution != 0 {
+			resolution, err = model.ParseResolution(matches[options.Resolution])
+			if err != nil {
+				return nil, false
+			}
+		}
+
+		languages := options.DefaultLanguages
+		if options.Languages != 0 {
+			languages = options.LanguageParser(matches[options.Languages])
+		}
+
+		subtitles := options.DefaultSubtitles
+		if options.Subtitles != 0 {
+			subtitles = options.SubtitleParser(matches[options.Subtitles])
+		}
+
+		return &model.ParsedTorrent{
+			OriginalAnimeTitle: matches[options.Name],
+			Episode:            episode,
+			Resolution:         resolution,
+			Parser:             parser,
+			Languages:          ParseLanguages(languages),
+			Subtitles:          ParseLanguages(subtitles),
+
+			Torrent: torrent,
+		}, true
+	}
+}
diff --git a/parsers.go b/parsers.go
new file mode 100644
index 0000000..39ddd62
--- /dev/null
+++ b/parsers.go
@@ -0,0 +1,128 @@
+package parsers
+
+import "git.tordarus.net/nyaanime/model"
+
+/*
+how to get all torrent names on a nyaa page:
+
+let s = "";
+document.querySelectorAll("tr > td:nth-child(2) > a:not(.comments)").forEach(element => {
+  s += element.textContent + "\n";
+})
+console.log(s);
+
+*/
+
+var Parsers = []model.Parser{
+	{
+		Identity: "Erai-Raws",
+		TorrentParser: RegexTorrentParser(
+			`^\[Erai-raws\] (.*) - (.*?) (?:END )?(?:\[v\d+\])?\[(.*?)p\](?:\[HEVC\])?(?:\[Multiple Subtitle\])?(?:\s(\[.*?\]+)?|\[[A-Z0-9]{8}\]\.mkv)$`,
+			TorrentParseOptions{
+				Name:             1,
+				Episode:          2,
+				Resolution:       3,
+				Subtitles:        4,
+				SubtitleParser:   SquareBracketsLanguageParser,
+				DefaultLanguages: []string{"ja"},
+			},
+		),
+		FileParser: RegexFileParser(
+			`^\[Erai-raws\] (.*?) - (\d+?) .*?\.mkv$`,
+			FileParseOptions{
+				Name:    1,
+				Episode: 2,
+			},
+		),
+	},
+
+	{
+		Identity: "SubsPlease",
+		TorrentParser: RegexTorrentParser(
+			`^\[SubsPlease\] (.*) - (\d+?) \((.*?)\) \[.*?\].mkv$`,
+			TorrentParseOptions{
+				Name:             1,
+				Episode:          2,
+				Resolution:       3,
+				DefaultLanguages: []string{"ja"},
+				DefaultSubtitles: []string{"en"},
+			},
+		),
+		FileParser: RegexFileParser(
+			`^\[SubsPlease\] (.*?) - (\d+?) .*?\.mkv$`,
+			FileParseOptions{
+				Name:    1,
+				Episode: 2,
+			},
+		),
+	},
+
+	{
+		Identity: "PuyaSubs!",
+		TorrentParser: RegexTorrentParser(
+			`^\[PuyaSubs!\] (.*) - (\d+?) \[ESP-ENG\]\[(.*?)\]\[.*?\]\.mkv$`,
+			TorrentParseOptions{
+				Name:             1,
+				Episode:          2,
+				Resolution:       3,
+				DefaultLanguages: []string{"ja"},
+				DefaultSubtitles: []string{"en"},
+			},
+		),
+		FileParser: RegexFileParser(
+			`^\[PuyaSubs!\] (.*?) - (\d+?) .*?\.mkv$`,
+			FileParseOptions{
+				Name:    1,
+				Episode: 2,
+			},
+		),
+
+		// tag 0th audio stream as japanese language and copy all other streams unchanged into output file
+		FileEncoding: "-map 0 -c:v copy -c:a copy -c:s copy -metadata:s:a:0 language=jpn",
+	},
+
+	{
+		Identity: "NanakoRaws-JP",
+		TorrentParser: RegexTorrentParser(
+			`^\[NanakoRaws\] (.*?) - (\d+?)(?:v\d+)?(?: END)? \((.*?)p\)(?: \(.*?\))?\.mkv \(include JPsub.*?\)$`,
+			TorrentParseOptions{
+				Name:             1,
+				Episode:          2,
+				Resolution:       3,
+				DefaultLanguages: []string{"ja"},
+				DefaultSubtitles: []string{"ja"},
+			},
+		),
+		FileParser: RegexFileParser(
+			`^\[NanakoRaws\] (.*?) - (\d+?)(?:v\d+)?(?: END)? \((.*?)p\)(?: \(.*?\))?\.mkv$`,
+			FileParseOptions{
+				Name:    1,
+				Episode: 2,
+			},
+		),
+
+		// tag 0th audio stream and subtitle stream as japanese language and copy all other streams unchanged into output file
+		FileEncoding: "-map 0 -c:v copy -c:a copy -c:s copy -metadata:s:a:0 language=jpn -metadata:s:s:0 language=jpn",
+	},
+
+	{
+		Identity: "Ohys-Raws",
+		TorrentParser: RegexTorrentParser(
+			`^\[Ohys-Raws\] (.*?) - (\d+?) \(.*? \d+x(\d+?) .*?\)(?: v2)?\.mp4$`,
+			TorrentParseOptions{
+				Name:             1,
+				Episode:          2,
+				Resolution:       3,
+				DefaultLanguages: []string{"ja"},
+				DefaultSubtitles: []string{},
+			},
+		),
+		FileParser: RegexFileParser(
+			`^\[Ohys-Raws\] (.*) - (.*?) (?:END )?\(.*?\)(?: v2)?\.mp4$`,
+			FileParseOptions{
+				Name:    1,
+				Episode: 2,
+			},
+		),
+	},
+}
diff --git a/utils.go b/utils.go
new file mode 100644
index 0000000..e60493b
--- /dev/null
+++ b/utils.go
@@ -0,0 +1,22 @@
+package parsers
+
+import (
+	"strconv"
+	"strings"
+)
+
+func atoi(s string) (int, bool) {
+	v, err := strconv.Atoi(s)
+	if err != nil {
+		return 0, false
+	}
+	return v, true
+}
+
+func TrimPrefixSuffix(arr []string, prefix, suffix string) []string {
+	trims := make([]string, 0, len(arr))
+	for _, str := range arr {
+		trims = append(trims, strings.TrimSuffix(strings.TrimPrefix(str, prefix), suffix))
+	}
+	return trims
+}