Skip to content

Commit

Permalink
api: remove un-used code, add go doc
Browse files Browse the repository at this point in the history
Refactoring, consisting of
 - remove unused method `isAuxiliaryLanguage` and `FileCountList`
   in order to reduce public API surfaces (go/java)
 - add GoDoc to public APIs

TEST PLAN:
 - make test

Signed-off-by: Alexander Bezzubov <[email protected]>
  • Loading branch information
bzz committed Feb 4, 2019
1 parent f28fc12 commit ebec601
Show file tree
Hide file tree
Showing 9 changed files with 44 additions and 148 deletions.
2 changes: 1 addition & 1 deletion cmd/enry/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ func printFileAnalysis(file string, limit int64, isJSON bool) error {
// functions below can work on a sample
fileType := getFileType(file, data)
language := enry.GetLanguage(file, data)
mimeType := enry.GetMimeType(file, language)
mimeType := enry.GetMIMEType(file, language)

if isJSON {
return json.NewEncoder(os.Stdout).Encode(map[string]interface{}{
Expand Down
1 change: 1 addition & 0 deletions common.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ var DefaultStrategies = []Strategy{
GetLanguagesByClassifier,
}

// DefaultClassifier is a naive Bayes classifier based on Linguist samples.
var DefaultClassifier Classifier = &classifier{
languagesLogProbabilities: data.LanguagesLogProbabilities,
tokensLogProbabilities: data.TokensLogProbabilities,
Expand Down
14 changes: 14 additions & 0 deletions enry.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
package enry // import "gopkg.in/src-d/enry.v1"

//go:generate make code-generate

/*
Package enry implements multiple strategies for programming language identification.
Identification is made based on file name and file content using a seriece
of strategies to narrow down possible option.
Each strategy is available as a separate API call, as well as a main enty point
GetLanguage(filename string, content []byte) (language string)
It is a port of the https://github.com/github/linguist from Ruby.
Upstream Linguist YAML files are used to generate datastructures for data
package.
*/
2 changes: 1 addition & 1 deletion java/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ $(RESOURCES_DIR): os-shared-lib
cp -R $(RESOURCES_SRC) $(RESOURCES_DIR)

$(JNAERATOR_JAR): $(RESOURCES_DIR)
mkdir $(JNAERATOR_DIR) && \
mkdir -p $(JNAERATOR_DIR) && \
wget $(JNAERATOR_JAR_URL) -O $(JNAERATOR_JAR)

os-shared-lib:
Expand Down
10 changes: 0 additions & 10 deletions java/src/main/java/tech/sourced/enry/Enry.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,6 @@ public class Enry {

private static final EnryLibrary nativeLib = EnryLibrary.INSTANCE;

/**
* Returns whether the given language is auxiliary or not.
*
* @param language name of the language, e.g. PHP, HTML, ...
* @return if it's an auxiliary language
*/
public static synchronized boolean isAuxiliaryLanguage(String language) {
return toJavaBool(nativeLib.IsAuxiliaryLanguage(toGoString(language)));
}

/**
* Returns the language of the given file based on the filename and its
* contents.
Expand Down
6 changes: 0 additions & 6 deletions java/src/test/java/tech/sourced/enry/EnryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,6 @@

public class EnryTest {

@Test
public void isAuxiliaryLanguage() {
assertTrue(Enry.isAuxiliaryLanguage("HTML"));
assertFalse(Enry.isAuxiliaryLanguage("Go"));
}

@Test
public void getLanguage() {
String code = "<?php $foo = bar();";
Expand Down
7 changes: 1 addition & 6 deletions shared/enry.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,7 @@ func GetLanguagesByVimModeline(filename string, content []byte, candidates []str

//export GetMimeType
func GetMimeType(path string, language string) string {
return enry.GetMimeType(path, language)
}

//export IsAuxiliaryLanguage
func IsAuxiliaryLanguage(lang string) bool {
return enry.IsAuxiliaryLanguage(lang)
return enry.GetMIMEType(path, language)
}

//export IsBinary
Expand Down
81 changes: 25 additions & 56 deletions utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,53 +8,20 @@ import (
"gopkg.in/src-d/enry.v1/data"
)

var (
auxiliaryLanguages = map[string]bool{
"Other": true, "XML": true, "YAML": true, "TOML": true, "INI": true,
"JSON": true, "TeX": true, "Public Key": true, "AsciiDoc": true,
"AGS Script": true, "VimL": true, "Diff": true, "CMake": true, "fish": true,
"Awk": true, "Graphviz (DOT)": true, "Markdown": true, "desktop": true,
"XSLT": true, "SQL": true, "RMarkdown": true, "IRC log": true,
"reStructuredText": true, "Twig": true, "CSS": true, "Batchfile": true,
"Text": true, "HTML+ERB": true, "HTML": true, "Gettext Catalog": true,
"Smarty": true, "Raw token data": true,
}

configurationLanguages = map[string]bool{
"XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true,
}
)
const binSniffLen = 8000

// IsAuxiliaryLanguage returns whether or not lang is an auxiliary language.
func IsAuxiliaryLanguage(lang string) bool {
_, ok := auxiliaryLanguages[lang]
return ok
var configurationLanguages = map[string]bool{
"XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true,
}

// IsConfiguration returns whether or not path is using a configuration language.
// IsConfiguration tells if filename is in one of the configuration languages.
func IsConfiguration(path string) bool {
language, _ := GetLanguageByExtension(path)
_, is := configurationLanguages[language]
return is
}

// IsDotFile returns whether or not path has dot as a prefix.
func IsDotFile(path string) bool {
path = filepath.Clean(path)
base := filepath.Base(path)
return strings.HasPrefix(base, ".") && base != "." && base != ".."
}

// IsVendor returns whether or not path is a vendor path.
func IsVendor(path string) bool {
return data.VendorMatchers.Match(path)
}

// IsDocumentation returns whether or not path is a documentation path.
func IsDocumentation(path string) bool {
return data.DocumentationMatchers.Match(path)
}

// IsImage tells if a given file is an image (PNG, JPEG or GIF format).
func IsImage(path string) bool {
extension := filepath.Ext(path)
if extension == ".png" || extension == ".jpg" || extension == ".jpeg" || extension == ".gif" {
Expand All @@ -64,7 +31,8 @@ func IsImage(path string) bool {
return false
}

func GetMimeType(path string, language string) string {
// GetMIMEType returns a MIME type of a given file based on it's languages.
func GetMIMEType(path string, language string) string {
if mime, ok := data.LanguagesMime[language]; ok {
return mime
}
Expand All @@ -76,13 +44,28 @@ func GetMimeType(path string, language string) string {
return "text/plain"
}

const sniffLen = 8000
// IsDocumentation returns whether or not path is a documentation path.
func IsDocumentation(path string) bool {
return data.DocumentationMatchers.Match(path)
}

// IsDotFile returns whether or not path has dot as a prefix.
func IsDotFile(path string) bool {
path = filepath.Clean(path)
base := filepath.Base(path)
return strings.HasPrefix(base, ".") && base != "." && base != ".."
}

// IsVendor returns whether or not path is a vendor path.
func IsVendor(path string) bool {
return data.VendorMatchers.Match(path)
}

// IsBinary detects if data is a binary value based on:
// http://git.kernel.org/cgit/git/git.git/tree/xdiff-interface.c?id=HEAD#n198
func IsBinary(data []byte) bool {
if len(data) > sniffLen {
data = data[:sniffLen]
if len(data) > binSniffLen {
data = data[:binSniffLen]
}

if bytes.IndexByte(data, byte(0)) == -1 {
Expand All @@ -91,17 +74,3 @@ func IsBinary(data []byte) bool {

return true
}

// FileCount type stores language name and count of files belonging to the
// language.
type FileCount struct {
Name string
Count int
}

// FileCountList type is a list of FileCounts.
type FileCountList []FileCount

func (fcl FileCountList) Len() int { return len(fcl) }
func (fcl FileCountList) Less(i, j int) bool { return fcl[i].Count < fcl[j].Count }
func (fcl FileCountList) Swap(i, j int) { fcl[i], fcl[j] = fcl[j], fcl[i] }
69 changes: 1 addition & 68 deletions utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,11 @@ package enry
import (
"bytes"
"fmt"
"sort"
"testing"

"github.com/stretchr/testify/assert"
)

func TestIsAuxiliaryLanguage(t *testing.T) {
type testType struct {
name string
lang string
expected bool
}

tests := []testType{
{name: "TestIsAuxiliaryLanguage_Invalid", lang: "invalid", expected: false},
}
for k := range auxiliaryLanguages {
t := testType{
name: fmt.Sprintf("TestIsAuxiliaryLanguage_%s", k),
lang: k,
expected: true,
}
tests = append(tests, t)
}

for _, test := range tests {
is := IsAuxiliaryLanguage(test.lang)
assert.Equal(t, is, test.expected,
fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
}
}

func TestIsVendor(t *testing.T) {
tests := []struct {
name string
Expand Down Expand Up @@ -106,7 +79,7 @@ func TestGetMimeType(t *testing.T) {
}

for _, test := range tests {
is := GetMimeType(test.path, test.lang)
is := GetMIMEType(test.path, test.lang)
assert.Equal(t, is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
}
}
Expand Down Expand Up @@ -160,43 +133,3 @@ func TestIsDotFile(t *testing.T) {
assert.Equal(t, test.expected, is, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
}
}

func TestFileCountListSort(t *testing.T) {
sampleData := FileCountList{{"a", 8}, {"b", 65}, {"c", 20}, {"d", 90}}
const ascending = "ASC"
const descending = "DESC"

tests := []struct {
name string
data FileCountList
order string
expectedData FileCountList
}{
{
name: "ascending order",
data: sampleData,
order: ascending,
expectedData: FileCountList{{"a", 8}, {"c", 20}, {"b", 65}, {"d", 90}},
},
{
name: "descending order",
data: sampleData,
order: descending,
expectedData: FileCountList{{"d", 90}, {"b", 65}, {"c", 20}, {"a", 8}},
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
if test.order == descending {
sort.Sort(sort.Reverse(test.data))
} else {
sort.Sort(test.data)
}

for i := 0; i < len(test.data); i++ {
assert.Equal(t, test.data[i], test.expectedData[i], fmt.Sprintf("%v: FileCount at position %d = %v, expected: %v", test.name, i, test.data[i], test.expectedData[i]))
}
})
}
}

0 comments on commit ebec601

Please sign in to comment.