From 9b0caebc1b4c1a2740bc41d08ec30395480b6681 Mon Sep 17 00:00:00 2001 From: Harsh Zalavadiya Date: Thu, 26 Aug 2021 16:20:24 +0000 Subject: [PATCH] refactor: :recycle: added http api --- README.md | 15 ++++++++++++++- go.mod | 7 +++++++ go.sum | 24 ++++++++++++++++++++++++ main.go | 47 ++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 85 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 1c370ff..d2b4d05 100644 --- a/README.md +++ b/README.md @@ -11,14 +11,27 @@ pre-built binaries by GitHub Actions can be downloaded from [releases](https://g ### Usage +#### Web + +```sh +# below command starts gnfinder-plus on 3006 port +# different port can be given by -port=3006 etc. +./gnfinder-plus + +# in another window +curl -F file=@687.pdf http://localhost:3006/parse ``` + +#### CLI + +```sh wget https://indiabiodiversity.org/biodiv/content/documents/document-0162468a-7ce7-499e-ac6d-ead2dc273c35/687.pdf ./gnfinder-plus -file=687.pdf ``` ### Note -in some cases `pdftotext` bin might be missing please install according to your platform +in some cases `pdftotext` binary might be missing please install according to your os ```sh sudo apt install poppler-utils # debian diff --git a/go.mod b/go.mod index f91edd3..e471158 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( code.sajari.com/docconv v1.2.0 github.com/gnames/gnfinder v0.15.0 github.com/gnames/gnfmt v0.2.0 + github.com/gofiber/fiber/v2 v2.18.0 ) require ( @@ -13,6 +14,7 @@ require ( github.com/PuerkitoBio/goquery v1.5.1 // indirect github.com/abadojack/whatlanggo v1.0.1 // indirect github.com/advancedlogic/GoOse v0.0.0-20191112112754-e742535969c1 // indirect + github.com/andybalholm/brotli v1.0.2 // indirect github.com/andybalholm/cascadia v1.2.0 // indirect github.com/araddon/dateparse v0.0.0-20200409225146-d820a6159ab1 // indirect github.com/fatih/set v0.2.1 // indirect @@ -24,6 +26,7 @@ require ( github.com/golang/protobuf v1.4.2 // indirect github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7 // indirect github.com/json-iterator/go v1.1.11 // indirect + github.com/klauspost/compress v1.13.4 // indirect github.com/levigross/exp-html v0.0.0-20120902181939-8df60c69a8f5 // indirect github.com/mattn/go-runewidth v0.0.9 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect @@ -34,7 +37,11 @@ require ( github.com/richardlehane/mscfb v1.0.3 // indirect github.com/richardlehane/msoleps v1.0.1 // indirect github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect + github.com/valyala/bytebufferpool v1.0.0 // indirect + github.com/valyala/fasthttp v1.29.0 // indirect + github.com/valyala/tcplisten v1.0.0 // indirect golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d // indirect + golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf // indirect golang.org/x/text v0.3.7 // indirect google.golang.org/protobuf v1.23.0 // indirect ) diff --git a/go.sum b/go.sum index dc8ceec..32a5e7b 100644 --- a/go.sum +++ b/go.sum @@ -9,6 +9,8 @@ github.com/abadojack/whatlanggo v1.0.1 h1:19N6YogDnf71CTHm3Mp2qhYfkRdyvbgwWdd2EP github.com/abadojack/whatlanggo v1.0.1/go.mod h1:66WiQbSbJBIlOZMsvbKe5m6pzQovxCH9B/K8tQB2uoc= github.com/advancedlogic/GoOse v0.0.0-20191112112754-e742535969c1 h1:d0Ct1dZwgwMO0Llf81Eu+Lyj6kwqXdqHP/WsSkEria0= github.com/advancedlogic/GoOse v0.0.0-20191112112754-e742535969c1/go.mod h1:f3HCSN1fBWjcpGtXyM119MJgeQl838v6so/PQOqvE1w= +github.com/andybalholm/brotli v1.0.2 h1:JKnhI/XQ75uFBTiuzXpzFrUriDPiZjlOSzh6wXogP0E= +github.com/andybalholm/brotli v1.0.2/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE= @@ -38,6 +40,8 @@ github.com/gnames/gnlib v0.2.2/go.mod h1:RZs+/sQHGlyMsHK7pgbd0Zt3QlYQXFPURc+yKuO github.com/go-resty/resty/v2 v2.0.0/go.mod h1:dZGr0i9PLlaaTD4H/hoZIDjQ+r6xq8mgbRzHZf7f2J8= github.com/go-resty/resty/v2 v2.3.0 h1:JOOeAvjSlapTT92p8xiS19Zxev1neGikoHsXJeOq8So= github.com/go-resty/resty/v2 v2.3.0/go.mod h1:UpN9CgLZNsv4e9XG50UU8xdI0F43UQ4HmxLBDwaroHU= +github.com/gofiber/fiber/v2 v2.18.0 h1:xCWYSVoTNibHpzfciPwUSZGiTyTpTXYchCwynuJU09s= +github.com/gofiber/fiber/v2 v2.18.0/go.mod h1:/LdZHMUXZvTTo7gU4+b1hclqCAdoQphNQ9bi9gutPyI= github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= @@ -45,6 +49,7 @@ github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:W github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0= github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= @@ -56,6 +61,8 @@ github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7/go.mod h1:CVKl github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11 h1:uVUAXhF2To8cbw/3xN3pxj6kk7TYKs98NIrTqPlMWAQ= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/klauspost/compress v1.13.4 h1:0zhec2I8zGnjWcKyLl6i3gPqKANCCn5e9xmviEEeX6s= +github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= @@ -101,18 +108,35 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/tj/assert v0.0.3 h1:Df/BlaZ20mq6kuai7f5z2TvPFiwC3xaWJSDQNiIS3Rk= +github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasthttp v1.29.0 h1:F5GKpytwFk5OhCuRh6H+d4vZAcEeNAwPTdwQnm6IERY= +github.com/valyala/fasthttp v1.29.0/go.mod h1:2rsYD01CKFrjjsvFxx75KlEUNpWNBY9JWD3K/7o2Cus= +github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= +github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210510120150-4163338589ed/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d h1:LO7XpTYMwTqxjLcGWPijK3vRXg1aWdlNOVOHRq45d7c= golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf h1:2ucpDCmfkl8Bd/FsLtiD653Wf96cW37s+iGx93zsu4k= +golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/main.go b/main.go index 30aa493..214f9ab 100644 --- a/main.go +++ b/main.go @@ -4,6 +4,7 @@ import ( "flag" "fmt" "log" + "os" "code.sajari.com/docconv" "github.com/gnames/gnfinder" @@ -11,15 +12,12 @@ import ( "github.com/gnames/gnfinder/ent/nlp" "github.com/gnames/gnfinder/io/dict" "github.com/gnames/gnfmt" + "github.com/gofiber/fiber/v2" ) -func main() { - // Parse CLI Arguements - filePath := flag.String("file", "_", "File Path") - flag.Parse() - +func parse(filePath string) string { // Attempt to read file - txt, err := docconv.ConvertPath(*filePath) + txt, err := docconv.ConvertPath(filePath) if err != nil { log.Fatal(err) } @@ -28,5 +26,40 @@ func main() { cfg := config.New() gnf := gnfinder.New(cfg, dict.LoadDictionary(), nlp.BayesWeights()) output := gnf.Find("", txt.Body) - fmt.Println(output.Format(gnfmt.PrettyJSON)) + return output.Format(gnfmt.PrettyJSON) +} + +func server(serverPort string) { + app := fiber.New() + + app.Post("/parse", func(c *fiber.Ctx) error { + + file, err := c.FormFile("file") + fullFilePath := os.TempDir() + string(os.PathSeparator) + file.Filename + + if err == nil { + c.SaveFile(file, fullFilePath) + parsedResponse := parse(fullFilePath) + os.Remove(fullFilePath) // housekeeping + + return c.Type("json").SendString(parsedResponse) + } + + return c.SendStatus(fiber.StatusBadRequest) + }) + + app.Listen(":" + serverPort) +} + +func main() { + // Parse CLI Arguements + filePath := flag.String("file", "_", "File Path") + serverPort := flag.String("port", "3006", "Server Port") + flag.Parse() + + if *filePath == "_" { + server(*serverPort) + } + + fmt.Println(parse(*filePath)) }