Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
krrish-sehgal committed Dec 19, 2024
1 parent c5d7e68 commit 1df23ff
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 81 deletions.
9 changes: 6 additions & 3 deletions blt/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@
vote_count,
)
from website.views.organization import (
CodeSimilarityAnalyze,
CreateHunt,
DomainDetailView,
DomainList,
Expand Down Expand Up @@ -172,7 +173,6 @@
user_sizzle_report,
view_hunt,
weekly_report,
CodeSimilarityAnalyze,
)
from website.views.project import (
ProjectBadgeView,
Expand Down Expand Up @@ -775,8 +775,11 @@
path("teams/delete-team/", delete_team, name="delete_team"),
path("teams/leave-team/", leave_team, name="leave_team"),
path("teams/kick-member/", kick_member, name="kick_member"),

path('api/code-similarity/analyze/', CodeSimilarityAnalyze.as_view(), name='code_similarity_analyze'),
path(
"api/code-similarity/analyze/",
CodeSimilarityAnalyze.as_view(),
name="code_similarity_analyze",
),
]

if settings.DEBUG:
Expand Down
132 changes: 78 additions & 54 deletions website/similarity_utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import difflib
import ast
import difflib
import os
import re
from transformers import AutoTokenizer, AutoModelForSequenceClassification

from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Initialize CodeBERT model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
model = AutoModelForSequenceClassification.from_pretrained("microsoft/codebert-base")

def process_similarity_analysis(repo1_path, repo2_path, use_codebert=False):

def process_similarity_analysis(repo1_path, repo2_path, use_codebert=False):
"""
Process the similarity analysis between two repositories.
:param repo1_path: Path to the first repository
Expand All @@ -35,12 +37,14 @@ def process_similarity_analysis(repo1_path, repo2_path, use_codebert=False):
similarity = difflib.SequenceMatcher(None, f1, f2).ratio() * 100

if similarity > 80: # You can set the threshold here
matching_details["functions"].append({
"name1": f1,
"name2": f2,
"similarity": round(similarity, 2),
})

matching_details["functions"].append(
{
"name1": f1,
"name2": f2,
"similarity": round(similarity, 2),
}
)

# Step 2: Compare method signatures
method_signatures1 = extract_method_signatures(repo1_path)
method_signatures2 = extract_method_signatures(repo2_path)
Expand All @@ -54,52 +58,64 @@ def process_similarity_analysis(repo1_path, repo2_path, use_codebert=False):
similarity = analyze_code_similarity_with_codebert(signature1, signature2)
else:
similarity = difflib.SequenceMatcher(None, str(m1), str(m2)).ratio() * 100

if similarity > 80: # You can set the threshold here
matching_details["functions"].append({
"method1": str(m1),
"method2": str(m2),
"similarity": round(similarity, 2),
})
matching_details["functions"].append(
{
"method1": str(m1),
"method2": str(m2),
"similarity": round(similarity, 2),
}
)

# Step 3: Compare Django models
models1 = extract_django_models(repo1_path)
models2 = extract_django_models(repo2_path)

print(models1)
print(models2)
print(models2)
# Compare models and fields
for model1 in models1:
for model2 in models2:
if use_codebert:
model_similarity = analyze_code_similarity_with_codebert(model1["name"], model2["name"])
model_similarity = analyze_code_similarity_with_codebert(
model1["name"], model2["name"]
)
else:
model_similarity = difflib.SequenceMatcher(None, model1["name"], model2["name"]).ratio() * 100
model_similarity = (
difflib.SequenceMatcher(None, model1["name"], model2["name"]).ratio() * 100
)

if model_similarity > 80: # You can set the threshold here
model_fields_similarity = compare_model_fields(model1, model2)
matching_details["models"].append({
"name1": model1["name"],
"name2": model2["name"],
"similarity": round(model_similarity, 2),
"field_comparison": model_fields_similarity
})
matching_details["models"].append(
{
"name1": model1["name"],
"name2": model2["name"],
"similarity": round(model_similarity, 2),
"field_comparison": model_fields_similarity,
}
)

return similarity_score, matching_details


def analyze_code_similarity_with_codebert(code1, code2):
"""
Analyze the semantic similarity between two code snippets using CodeBERT.
:param code1: First code snippet
:param code2: Second code snippet
:return: Similarity score (0-100)
"""
inputs = tokenizer(f"{code1} [SEP] {code2}", return_tensors="pt", truncation=True, max_length=512)
inputs = tokenizer(
f"{code1} [SEP] {code2}", return_tensors="pt", truncation=True, max_length=512
)
outputs = model(**inputs)
logits = outputs.logits
similarity_score = logits.softmax(dim=1)[0][1].item() * 100
return round(similarity_score, 2)


def extract_function_names(repo_path):
"""
Extract function names from Python files in the given repo.
Expand All @@ -109,9 +125,9 @@ def extract_function_names(repo_path):
function_names = []
for root, dirs, files in os.walk(repo_path):
for file in files:
if file.endswith('.py'):
if file.endswith(".py"):
file_path = os.path.join(root, file)
with open(file_path, 'r') as f:
with open(file_path, "r") as f:
tree = ast.parse(f.read(), filename=file)
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
Expand All @@ -128,94 +144,102 @@ def extract_method_signatures(repo_path):
method_signatures = []
for root, dirs, files in os.walk(repo_path):
for file in files:
if file.endswith('.py'):
if file.endswith(".py"):
file_path = os.path.join(root, file)
with open(file_path, 'r') as f:
with open(file_path, "r") as f:
tree = ast.parse(f.read(), filename=file)
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
signature = {
'name': node.name,
'args': [arg.arg for arg in node.args.args],
'defaults': [ast.dump(default) for default in node.args.defaults]
"name": node.name,
"args": [arg.arg for arg in node.args.args],
"defaults": [ast.dump(default) for default in node.args.defaults],
}
method_signatures.append(signature)
return method_signatures


def extract_django_models(repo_path):
"""
Extract Django model names and fields from the given repository.
:param repo_path: Path to the repository
:return: List of models with their fields
"""
models = []

# Walk through the repository directory
for root, dirs, files in os.walk(repo_path):
for file in files:
if file.endswith('.py'): # Only process Python files
if file.endswith(".py"): # Only process Python files
file_path = os.path.join(root, file)

# Open the file and read its contents
with open(file_path, 'r') as f:
with open(file_path, "r") as f:
lines = f.readlines()
model_name = None
fields = []
inside_model = False # To check if we are inside a model definition

for line in lines:
line = line.strip()

# Look for class definition that inherits from models.Model
if line.startswith('class ') and 'models.Model' in line:
model_name = line.split('(')[0].replace('class ', '').strip()
if line.startswith("class ") and "models.Model" in line:
model_name = line.split("(")[0].replace("class ", "").strip()
inside_model = True
fields = [] # Reset fields when a new model starts

# Look for field definitions inside a model
if inside_model:
# Match field definitions like: name = models.CharField(max_length=...)
match = re.match(r'^\s*(\w+)\s*=\s*models\.(\w+)', line)
match = re.match(r"^\s*(\w+)\s*=\s*models\.(\w+)", line)
if match:
field_name = match.group(1)
field_type = match.group(2)
fields.append({'field_name': field_name, 'field_type': field_type})
fields.append({"field_name": field_name, "field_type": field_type})

# Match other field types like ForeignKey, ManyToManyField, etc.
match_complex = re.match(r'^\s*(\w+)\s*=\s*models\.(ForeignKey|ManyToManyField|OneToOneField)\((.*)\)', line)
match_complex = re.match(
r"^\s*(\w+)\s*=\s*models\.(ForeignKey|ManyToManyField|OneToOneField)\((.*)\)",
line,
)
if match_complex:
field_name = match_complex.group(1)
field_type = match_complex.group(2)
field_params = match_complex.group(3).strip()
fields.append({'field_name': field_name, 'field_type': field_type, 'parameters': field_params})

fields.append(
{
"field_name": field_name,
"field_type": field_type,
"parameters": field_params,
}
)

# Check for the end of the class definition
if line.startswith('class ') and model_name:
models.append({'name': model_name, 'fields': fields})
if line.startswith("class ") and model_name:
models.append({"name": model_name, "fields": fields})
inside_model = False # Reset for next class

# Add the last model if the file ends without another class
if model_name:
models.append({'name': model_name, 'fields': fields})
models.append({"name": model_name, "fields": fields})

return models


def compare_model_fields(model1, model2):
"""
Compare fields of two Django models.
:param model1: First model's field details
:param model2: Second model's field details
:return: Field comparison details
"""
fields1 = set(model1['fields'])
fields2 = set(model2['fields'])
fields1 = set(model1["fields"])
fields2 = set(model2["fields"])
common_fields = fields1.intersection(fields2)
total_fields = len(fields1) + len(fields2) - len(common_fields)
if total_fields == 0:
field_similarity = 0.0
else:
field_similarity = len(common_fields) / float(total_fields) * 100
return {
'common_fields': list(common_fields),
'field_similarity': round(field_similarity, 2)
}
return {"common_fields": list(common_fields), "field_similarity": round(field_similarity, 2)}
3 changes: 1 addition & 2 deletions website/templates/includes/sidenav.html
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,7 @@
</a>
</li>
<!-- <li class="mb-2 {% if request.path == '/similarity-check/' %}bg-gray-200{% endif %}">
<a href="{% url 'similartiy_check' %}"
class="flex items-center w-full no-underline p-2 {% if request.path == '/similartiy-check/' %}text-black{% else %}hover:text-red-500 text-black{% endif %}">
<a href="{% url 'similartiy_check' %}" class="flex items-center w-full no-underline p-2 {% if request.path == '/similartiy-check/' %}text-black{% else %}hover:text-red-500 text-black{% endif %}">
<div class="icon text-indigo-500">
<i class="fas fa-clone fa-lg"></i>
</div>
Expand Down
Loading

0 comments on commit 1df23ff

Please sign in to comment.