-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpoetry_test.go
74 lines (65 loc) · 1.7 KB
/
poetry_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
package random
import (
"log"
"testing"
)
type cPoetry struct {
Title string `exp:"./div[@class='pL-tit']//a[1]/text()"`
Dynasty string `exp:"./div[@class='pL-tit']//a[2]/text()"`
Writer string `exp:"./div[@class='pL-tit']//a[3]/text()"`
Content []string `exp:"./div[@class='cont']//div[@class='sj']/text()"`
}
func estPoetry(t *testing.T) {
r := New()
Use(DataPoetryChina)
for i := 0; i < 1000; i++ {
p := r.Extend().Poetry()
log.Println(p.Title)
log.Println(p.Writer)
log.Println(p.Dynasty)
log.Println(p.Content)
}
}
// func estGetUrlPoetry(t *testing.T) {
// u := "https://www.y5000.com/poetry/list-0-0-1.html"
// tp := requests.NewSession().Get(u)
// p := tp.PathParam("list-(\\d+)-0-(\\d+)\\.")
// var ps []*cPoetry
// for c := int64(1); c < 13; c++ {
// for i := int64(1); ; i++ {
// p.IntArraySet(0, c)
// p.IntArraySet(1, i)
// resp, err := tp.Execute()
// if err != nil {
// panic(err)
// }
// etor := extractor.ExtractHtml(resp.Content())
// xps, err := etor.XPaths("//ul[@class='pL-ul']")
// if err != nil {
// panic(err)
// }
// lis, errs := xps.ForEach("//li")
// if len(errs) != 0 {
// panic(errs)
// }
// if lis == nil {
// log.Println("page:", i)
// break
// }
// for _, poetry := range lis.ForEachObjectByTag(cPoetry{}) {
// i := poetry.(*cPoetry)
// i.Dynasty = strings.Trim(i.Dynasty, "【】 ")
// ps = append(ps, i)
// }
// }
// }
// for _, cp := range ps {
// p := &Poetry{}
// p.Title = cp.Title
// p.Dynasty = cp.Dynasty
// p.Writer = cp.Writer
// p.Content = strings.Join(cp.Content, "\n")
// poetrys = append(poetrys, p)
// }
// CompressData("poetry.gob.zst", poetrys)
// }