Archived
1
This repository has been archived on 2022-11-04. You can view files and clone it, but cannot push or open issues or pull requests.
wind8_fetcher/api/fetch_request/parser.go

404 lines
15 KiB
Go
Raw Permalink Normal View History

2017-10-27 07:45:21 +04:00
package fetchrequest
import (
// stdlib
"bytes"
"io/ioutil"
"net/http"
"regexp"
"strconv"
"strings"
// 3rd-party
"golang.org/x/net/html"
)
func getChronicle(profilePageTokenizer *html.Tokenizer) string {
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
chronicle := ""
for {
tt := profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if tt == html.EndTagToken && txt.Data == "div" {
break
} else {
chronicle += txt.String()
}
}
return chronicle
}
func getSimpleString(profilePageTokenizer *html.Tokenizer, item string, data map[string]string) map[string]string {
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if data[item] == "" {
data[item] = txt.Data
}
return data
}
func getSkill(profilePageTokenizer *html.Tokenizer, data map[string]string) map[string]string {
numbersRegexp := regexp.MustCompile("[0-9]+")
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
possiblySkillName := txt.Data
profilePageTokenizer.Next()
txt = profilePageTokenizer.Token()
profilePageTokenizer.Next()
txt = profilePageTokenizer.Token()
if strings.Contains(txt.Data, "уровня") {
label := strconv.Itoa(int(len(data) / 2))
labelLvl := strconv.Itoa(int(len(data))/2) + "_level"
data[label] = possiblySkillName
data[labelLvl] = strings.Join(numbersRegexp.FindAllString(txt.Data, -1), "")
}
return data
}
func getPantheon(profilePageTokenizer *html.Tokenizer, name string, pantheons map[string]string) map[string]string {
numbersRegexp := regexp.MustCompile("[0-9]+")
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
pantheons[name] = strings.Join(numbersRegexp.FindAllString(txt.Data, -1), "")
return pantheons
}
func getWeapon(profilePageTokenizer *html.Tokenizer, weaponPrefix string, data map[string]string) map[string]string {
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if data[weaponPrefix] == "" {
data[weaponPrefix] = txt.Data
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt = profilePageTokenizer.Token()
data[weaponPrefix+"_stat"] = txt.Data
}
return data
}
func parseHTML(data []byte) (map[string]string, map[string]string, map[string]string, bool) {
c.Log.Info("Profile parsing started...")
profileHTML := bytes.NewReader(data)
numbersRegexp := regexp.MustCompile("[0-9]+")
profilePageTokenizer := html.NewTokenizer(profileHTML)
profile := make(map[string]string)
skills := make(map[string]string)
pantheons := make(map[string]string)
for {
tt := profilePageTokenizer.Next()
switch {
case tt == html.ErrorToken:
if profile["god_name"] == "Первая полоса" {
return profile, skills, pantheons, false
} else {
return profile, skills, pantheons, true
}
case tt == html.StartTagToken:
t := profilePageTokenizer.Token()
switch {
case t.Data == "h2":
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["god_name"] == "" {
profile["god_name"] = strings.Join(strings.Fields(txt.Data)[:], " ")
}
case t.Data == "h3":
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["hero_name"] == "" {
profile["hero_name"] = strings.Join(strings.Fields(txt.Data)[:], " ")
}
case t.Data == "p":
for _, attr := range t.Attr {
if attr.Key == "class" {
switch {
case attr.Val == "level":
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["level"] == "" {
profile["level"] = strings.Split(txt.Data, "-")[0]
}
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt = profilePageTokenizer.Token()
if strings.Contains(txt.Data, "торговец") {
if profile["shop_level"] == "" {
profile["shop_level"] = strings.Join(numbersRegexp.FindAllString(txt.Data, -1), "")
}
}
case attr.Val == "motto":
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["motto"] == "" {
profile["motto"] = strings.Join(strings.Fields(txt.Data)[:], " ")
}
}
}
}
case t.Data == "td":
for _, attr := range t.Attr {
if attr.Key == "class" {
switch {
case attr.Val == "label":
profilePageTokenizer.Next()
lbl := profilePageTokenizer.Token()
switch {
case lbl.Data == "Возраст":
profile = getSimpleString(profilePageTokenizer, "age", profile)
case lbl.Data == "Характер":
profile = getSimpleString(profilePageTokenizer, "personality", profile)
case lbl.Data == "Гильдия":
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["guild"] == "" {
profile["guild"] = strings.Join(strings.Fields(txt.Data)[:], " ")
}
case lbl.Data == "Смертей":
profile = getSimpleString(profilePageTokenizer, "deaths", profile)
case lbl.Data == "Побед / Поражений":
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
datas := strings.Join(strings.Fields(txt.Data)[:], " ")
if profile["arena_wins"] == "" {
profile["arena_wins"] = strings.Split(string(datas), " / ")[0]
profile["arena_loss"] = strings.Split(string(datas), " / ")[1]
}
case lbl.Data == "Храм достроен":
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["bricks"] == "" {
profile["church_done"] = txt.Data
profile["bricks"] = "1000"
}
case lbl.Data == "Кирпичей для храма":
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["bricks"] == "" {
profile["church_done"] = ""
profile["bricks"] = strings.Join(numbersRegexp.FindAllString(txt.Data, -1), "")
}
case lbl.Data == "Дерева для ковчега":
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["woods"] == "" {
profile["boat_done"] = ""
profile["woods"] = strings.Join(numbersRegexp.FindAllString(txt.Data, -1), "")
}
case lbl.Data == "Ковчег достроен":
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["woods"] == "" {
profile["boat_done"] = strings.Split(txt.Data, " ")[0]
profile["woods"] = strings.Join(numbersRegexp.FindAllString(strings.Split(txt.Data, " ")[1], -1), "")
}
case lbl.Data == "Твари по паре":
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["beasts_male"] == "" {
profile["beasts_done"] = ""
profile["beasts_male"] = strings.Join(numbersRegexp.FindAllString(strings.Fields(txt.Data)[0], -1), "")
profile["beasts_female"] = strings.Join(numbersRegexp.FindAllString(strings.Fields(txt.Data)[1], -1), "")
profile["beasts_pairs"] = strings.Join(numbersRegexp.FindAllString(strings.Split(strings.Split(txt.Data, " ")[1], "ж")[0], -1), "")
}
case lbl.Data == "Твари собраны":
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["beasts_male"] == "" {
profile["beasts_done"] = txt.Data
profile["beasts_male"] = "1000"
profile["beasts_female"] = "1000"
profile["beasts_pairs"] = "1000"
}
case lbl.Data == "Сбережения":
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["pension"] == "" {
profile["pension"] = strings.Join(numbersRegexp.FindAllString(strings.Fields(txt.Data)[0], -1), "")
}
case lbl.Data == "Лавка":
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["pension"] == "" {
profile["pension"] = "30000"
profile["shop"] = txt.Data
}
case lbl.Data == "Питомец":
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["pet_name"] == "" {
profile["pet_type"] = txt.Data
profilePageTokenizer.Next()
profilePageTokenizer.Next()
txt = profilePageTokenizer.Token()
profile["pet_name"] = strings.Fields(txt.Data)[0]
if strings.Contains(txt.Data, "уровня") {
profile["pet_level"] = strings.Join(numbersRegexp.FindAllString(strings.Fields(txt.Data)[1], -1), "")
}
}
case lbl.Data == "Оружие":
profile = getWeapon(profilePageTokenizer, "weapon", profile)
case lbl.Data == "Щит":
profile = getWeapon(profilePageTokenizer, "coat", profile)
case lbl.Data == "Голова":
profile = getWeapon(profilePageTokenizer, "head", profile)
case lbl.Data == "Тело":
profile = getWeapon(profilePageTokenizer, "body", profile)
case lbl.Data == "Руки":
profile = getWeapon(profilePageTokenizer, "hands", profile)
case lbl.Data == "Ноги":
profile = getWeapon(profilePageTokenizer, "legs", profile)
case lbl.Data == "Талисман":
profile = getWeapon(profilePageTokenizer, "talisman", profile)
}
case attr.Val == "name":
profilePageTokenizer.Next()
nm := profilePageTokenizer.Token()
switch {
case nm.Data == "Благодарности":
pantheons = getPantheon(profilePageTokenizer, "gratitude", pantheons)
case nm.Data == "Мощи":
pantheons = getPantheon(profilePageTokenizer, "might", pantheons)
case nm.Data == "Храмовничества":
pantheons = getPantheon(profilePageTokenizer, "templehood", pantheons)
case nm.Data == "Гладиаторства":
pantheons = getPantheon(profilePageTokenizer, "gladiatorship", pantheons)
case nm.Data == "Сказаний":
pantheons = getPantheon(profilePageTokenizer, "storytelling", pantheons)
case nm.Data == "Поддержки":
pantheons = getPantheon(profilePageTokenizer, "donation", pantheons)
case nm.Data == "Мастерства":
pantheons = getPantheon(profilePageTokenizer, "mastery", pantheons)
case nm.Data == "Строительства":
pantheons = getPantheon(profilePageTokenizer, "construction", pantheons)
case nm.Data == "Звероводства":
pantheons = getPantheon(profilePageTokenizer, "taming", pantheons)
case nm.Data == "Живучести":
pantheons = getPantheon(profilePageTokenizer, "survival", pantheons)
case nm.Data == "Зажиточности":
pantheons = getPantheon(profilePageTokenizer, "savings", pantheons)
case nm.Data == "Величия":
pantheons = getPantheon(profilePageTokenizer, "glory", pantheons)
case nm.Data == "Созидания":
pantheons = getPantheon(profilePageTokenizer, "creation", pantheons)
case nm.Data == "Разрушения":
pantheons = getPantheon(profilePageTokenizer, "destruction", pantheons)
case nm.Data == "Плотничества":
pantheons = getPantheon(profilePageTokenizer, "wood", pantheons)
case nm.Data == "Отлова":
pantheons = getPantheon(profilePageTokenizer, "pairs", pantheons)
case nm.Data == "Соперничества":
pantheons = getPantheon(profilePageTokenizer, "duelers", pantheons)
case nm.Data == "Солидарности":
pantheons = getPantheon(profilePageTokenizer, "unity", pantheons)
case nm.Data == "Влиятельности":
pantheons = getPantheon(profilePageTokenizer, "popularity", pantheons)
case nm.Data == "Воинственности":
pantheons = getPantheon(profilePageTokenizer, "duelery", pantheons)
}
}
}
}
case t.Data == "div":
for _, attr := range t.Attr {
switch {
case attr.Key == "class":
if attr.Val == "guild_status" {
profilePageTokenizer.Next()
txt := profilePageTokenizer.Token()
if profile["guild_status"] == "" {
profile["guild_status"] = strings.Split(strings.Split(txt.Data, "(")[1], ")")[0]
}
}
case attr.Key == "id":
if attr.Val == "post_content" {
profile["chronicle"] = getChronicle(profilePageTokenizer)
}
}
}
case t.Data == "li":
skills = getSkill(profilePageTokenizer, skills)
}
}
}
}
func startParsing(godName string) (map[string]string, map[string]string, map[string]string, string) {
profile := make(map[string]string)
skills := make(map[string]string)
pantheons := make(map[string]string)
site := "https://godville.net"
client := http.Client{}
resp, err := client.Get(site + "/gods/" + godName)
if err != nil {
c.Log.Errorln(err)
return profile, skills, pantheons, "error"
}
data, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
c.Log.Errorln(err)
return profile, skills, pantheons, "error"
}
profile, skills, pantheons, ok := parseHTML(data)
if !ok {
return profile, skills, pantheons, "error"
}
return profile, skills, pantheons, "success"
}