Skip to content

Commit

Permalink
Add logging and yaml configuration loading
Browse files Browse the repository at this point in the history
  • Loading branch information
YashMeh committed May 22, 2021
1 parent baf6731 commit e8147c0
Show file tree
Hide file tree
Showing 11 changed files with 414 additions and 40 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
*.pdf
*.png
*.csv
*.env
*.yaml
*.env
8 changes: 8 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
services:
elasticSearch: http://localhost:9200
apacheTika: http://localhost:9998

# Path for which you want to index the documents
appConfig:
filePath: ./assets/

37 changes: 37 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package config

import (
log "github.com/sirupsen/logrus"
"github.com/spf13/viper"
)

type Config struct {
config *viper.Viper
}

func NewConfig() *Config {
c := new(Config)
c.config = readConfig()
return c
}

func (c *Config) Get() *viper.Viper {
if c.config == nil {
log.Fatal("config not initialized")
}
return c.config
}

func readConfig() *viper.Viper {
log.Info("reading environment variables")
v := viper.New()
v.SetConfigName("config")
v.AddConfigPath(".")
v.SetConfigType("yaml")
err := v.ReadInConfig()
if err != nil {
log.Fatalf("error reading config file or env variable '%s'", err.Error())
}

return v
}
4 changes: 2 additions & 2 deletions elasticApi/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ package elasticApi
type Document struct {
Body string
ContentType string
// MetaData string
FileName string
MetaData string
FileName string
}
5 changes: 4 additions & 1 deletion elasticApi/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package elasticApi

import (
"github.com/elastic/go-elasticsearch/v7"
"github.com/yashmeh/doc-rank/config"
)

type ElasticServer interface {
Expand All @@ -12,7 +13,9 @@ type elasticServer struct {
Client *elasticsearch.Client
}

func NewElasticClient(url string) (ElasticServer, error) {
func NewElasticClient(c *config.Config) (ElasticServer, error) {
config := c.Get()
url := config.GetString("services.elasticSearch")
cfg := elasticsearch.Config{
Addresses: []string{
url,
Expand Down
7 changes: 7 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@ module github.com/yashmeh/doc-rank
go 1.15

require (
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6 // indirect
github.com/elastic/go-elasticsearch/v7 v7.12.0 // indirect
github.com/google/go-tika v0.1.21 // indirect
github.com/konsorten/go-windows-terminal-sequences v1.0.1 // indirect
github.com/sirupsen/logrus v1.8.1 // indirect
github.com/spf13/viper v1.7.1 // indirect
github.com/stretchr/objx v0.1.1 // indirect
github.com/ugorji/go v1.1.4 // indirect
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77 // indirect
)
294 changes: 294 additions & 0 deletions go.sum

Large diffs are not rendered by default.

13 changes: 7 additions & 6 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
package main

import (
"fmt"

log "github.com/sirupsen/logrus"
"github.com/yashmeh/doc-rank/config"
"github.com/yashmeh/doc-rank/elasticApi"
"github.com/yashmeh/doc-rank/service"
"github.com/yashmeh/doc-rank/tikaApi"
)

func main() {
aE, err := elasticApi.NewElasticClient("http://localhost:9200")
c := config.NewConfig()
aE, err := elasticApi.NewElasticClient(c)
if err != nil {
fmt.Println("[ERROR] Connecting to elasticsearch")
log.Error("[ERROR] Connecting to elasticsearch")
}
aT := tikaApi.NewTikaClient("http://localhost:9998")
aT := tikaApi.NewTikaClient(c)

sI := service.NewIndexService(aE, aT)
sI.IndexDoc("./assets/")
sI.IndexDoc(c)

// select {
// case j := <-service.Flag:
Expand Down
66 changes: 43 additions & 23 deletions service/indexService.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package service

import (
"context"
"fmt"
"os"
"strconv"
"strings"
Expand All @@ -11,6 +10,8 @@ import (
"github.com/elastic/go-elasticsearch/v7"
"github.com/elastic/go-elasticsearch/v7/esapi"
"github.com/google/go-tika/tika"
log "github.com/sirupsen/logrus"
"github.com/yashmeh/doc-rank/config"
"github.com/yashmeh/doc-rank/elasticApi"
"github.com/yashmeh/doc-rank/tikaApi"
"github.com/yashmeh/doc-rank/utils"
Expand All @@ -22,7 +23,7 @@ var (
)

type IndexService interface {
IndexDoc(dir string) error
IndexDoc(c *config.Config) error
}

type indexService struct {
Expand All @@ -36,32 +37,42 @@ func NewIndexService(elasticC elasticApi.ElasticServer, tikaC tikaApi.TikaServer

func ReadData(s *indexService, fileDir string, fileName string, ch chan<- *elasticApi.Document) {
//Open the file
f, err := os.Open(fileDir)
f1, err := os.Open(fileDir)
if err != nil {
fmt.Println("[ERROR] Opening file")
log.Error("[ERROR] Opening file")
}
defer f.Close()
f2, err := os.Open(fileDir)
if err != nil {
log.Error("[ERROR] Opening file")
}
f3, err := os.Open(fileDir)
if err != nil {
log.Error("[ERROR] Opening file")
}
defer f1.Close()
defer f2.Close()
defer f3.Close()
c := context.Background()
docBody, err := s.TClient.Parse(c, f)
docBody, err := s.TClient.Parse(c, f1)
if err != nil {
log.Error("[ERROR] Reading body")
}
docContent, err := s.TClient.Detect(c, f2)
if err != nil {
log.Error("[ERROR] Reading MIMETYPE")
}
docMeta, err := s.TClient.Meta(c, f3)
if err != nil {
fmt.Println("[ERROR] Reading body")
log.Error("[ERROR] Reading Meta-data")
}
// docContent, err := s.TClient.Detect(c, f)
// if err != nil {
// fmt.Println("[ERROR] Reading MIMETYPE")
// }
// docMeta, err := s.TClient.Meta(c, f)
// if err != nil {
// fmt.Println("[ERROR] Reading Meta-data")
// }
tikaDocument := &elasticApi.Document{
Body: docBody,
ContentType: "", //docContent,
// MetaData: "", //docMeta,
FileName: fileName,
ContentType: docContent,
MetaData: docMeta,
FileName: fileName,
}
defer func() {
fmt.Printf("Tika Processed: %s \n", fileName)
log.Infof("Tika Processed: %s \n", fileName)
}()
//Send the document on the channel
ch <- tikaDocument
Expand All @@ -70,11 +81,12 @@ func ReadData(s *indexService, fileDir string, fileName string, ch chan<- *elast
func IndexData(s *indexService, ch <-chan *elasticApi.Document, index int) {
defer wg.Done()
var fileName string
var statusCode int
for tikaDocument := range ch {
fileName = tikaDocument.FileName
docString, err := utils.JsonStruct(tikaDocument)
if err != nil {
fmt.Println("[ERROR] Converting to JSON string")
log.Error("[ERROR] Converting to JSON string")
}
// Instantiate a request object
req := esapi.IndexRequest{
Expand All @@ -86,20 +98,28 @@ func IndexData(s *indexService, ch <-chan *elasticApi.Document, index int) {

// Return an API response object from request
res, err := req.Do(context.Background(), s.EClient)
statusCode = res.StatusCode

if err != nil {
fmt.Println("[ERROR] Sending the request to elasticsearch")
log.Error("[ERROR] Sending the request to elasticsearch")
}
defer res.Body.Close()
break

}
defer func() {
fmt.Printf("Elastic Indexed: %s \n", fileName)
if statusCode == 200 {
log.Infof("Elastic Indexed: %s \n", fileName)
} else {
log.Errorf("[ERROR] Elastic Indexed: %s \n", fileName)
}
}()
}

//This is the method that loads all the documents to elastic search
func (s *indexService) IndexDoc(dir string) error {
func (s *indexService) IndexDoc(c *config.Config) error {
config := c.Get()
dir := config.GetString("appConfig.filePath")
//Get all the files for the directory
files, err := utils.IOReadDir(dir)
if err != nil {
Expand Down
9 changes: 7 additions & 2 deletions tikaApi/server.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package tikaApi

import "github.com/google/go-tika/tika"
import (
"github.com/google/go-tika/tika"
"github.com/yashmeh/doc-rank/config"
)

type TikaServer interface {
Get() *tika.Client
Expand All @@ -10,7 +13,9 @@ type tikaServer struct {
Client *tika.Client
}

func NewTikaClient(url string) TikaServer {
func NewTikaClient(c *config.Config) TikaServer {
config := c.Get()
url := config.GetString("services.apacheTika")
client := tika.NewClient(nil, url)
return &tikaServer{Client: client}

Expand Down
8 changes: 4 additions & 4 deletions utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ package utils

import (
"encoding/json"
"fmt"
"io/ioutil"

log "github.com/sirupsen/logrus"
"github.com/yashmeh/doc-rank/elasticApi"
)

Expand All @@ -15,14 +15,14 @@ func JsonStruct(doc *elasticApi.Document) (string, error) {
docStruct := &elasticApi.Document{
Body: doc.Body,
ContentType: doc.ContentType,
// MetaData: doc.MetaData,
FileName: doc.FileName,
MetaData: doc.MetaData,
FileName: doc.FileName,
}

// Marshal the struct to JSON and check for errors
b, err := json.Marshal(docStruct)
if err != nil {
fmt.Println("json.Marshal ERROR:", err)
log.Error("json.Marshal ERROR:", err)
return "", err
}
return string(b), nil
Expand Down

0 comments on commit e8147c0

Please sign in to comment.