-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.go
137 lines (109 loc) · 2.73 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
package main
import (
"errors"
"flag"
"fmt"
"io"
"log"
"mime"
"net/http"
"net/url"
"os"
"path/filepath"
"code.sajari.com/docconv"
"github.com/gnames/gnfinder"
"github.com/gnames/gnfinder/config"
"github.com/gnames/gnfinder/ent/nlp"
"github.com/gnames/gnfinder/io/dict"
"github.com/gnames/gnfmt"
"github.com/gofiber/fiber/v2"
gonanoid "github.com/matoous/go-nanoid/v2"
"github.com/tidwall/gjson"
)
func getFilePath(response *http.Response) string {
mimeType := response.Header.Get("Content-Type")
extensions, err := mime.ExtensionsByType(mimeType)
filePrefix, _ := gonanoid.New()
if err != nil || len(extensions) == 0 {
return filePrefix
}
return filepath.Join(os.TempDir(), filePrefix+extensions[0])
}
func downloadFile(URL string) (string, error) {
//Get the response bytes from the url
response, err := http.Get(URL)
if err != nil {
return "", err
}
defer response.Body.Close()
tmpFile := getFilePath(response)
if response.StatusCode != 200 {
return "", errors.New("received non 200 response code")
}
//Create a empty file
file, err := os.Create(tmpFile)
if err != nil {
return "", err
}
defer file.Close()
//Write the bytes to the fiel
_, err = io.Copy(file, response.Body)
if err != nil {
return "", err
}
return tmpFile, nil
}
// find document and extract text from it
func parseDocument(filePath string) string {
txt, err := docconv.ConvertPath(filePath)
if err != nil {
log.Fatal(err)
}
return parseText(txt.Body)
}
// parse names from text through gnfinder
func parseText(txt string) string {
cfg := config.New()
gnf := gnfinder.New(cfg, dict.LoadDictionary(), nlp.BayesWeights())
output := gnf.Find("", txt)
return output.Format(gnfmt.PrettyJSON)
}
// HTTP Server
func server(serverPort string) {
app := fiber.New()
app.Get("/parse", func(c *fiber.Ctx) error {
fullFilePath := c.Query("file")
bodyText := gjson.Get(string(c.Body()), "text").String()
if bodyText == "" {
bodyText = c.Query("text")
}
if bodyText != "" {
return c.Type("json").SendString(parseText(bodyText))
}
_, err := url.ParseRequestURI(fullFilePath)
if err != nil {
return c.SendStatus(fiber.StatusBadRequest)
}
localPath, _ := downloadFile(fullFilePath)
return c.Type("json").SendString(parseDocument(localPath))
})
app.Listen(":" + serverPort)
}
func main() {
// Parse CLI Arguements
filePath := flag.String("file", "_", "File Path")
serverPort := flag.String("port", "3006", "Server Port")
flag.Parse()
if *filePath == "_" {
server(*serverPort)
}
_, err := url.ParseRequestURI(*filePath)
if err != nil {
// local file
fmt.Println(parseDocument(*filePath))
} else {
// remote server
localPath, _ := downloadFile(*filePath)
fmt.Println(parseDocument(localPath))
}
}