Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Handle empty lines in patch parser #6208

Merged
merged 1 commit into from
Jan 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 28 additions & 27 deletions openhands/resolver/patching/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
unified_header_old_line = re.compile(r'^--- ' + file_timestamp_str + '$')
unified_header_new_line = re.compile(r'^\+\+\+ ' + file_timestamp_str + '$')
unified_hunk_start = re.compile(r'^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@(.*)$')
unified_change = re.compile('^([-+ ])(.*)$')
unified_change = re.compile('^([-+ ])(.*)$', re.MULTILINE)

context_header_old_line = re.compile(r'^\*\*\* ' + file_timestamp_str + '$')
context_header_new_line = re.compile('^--- ' + file_timestamp_str + '$')
Expand Down Expand Up @@ -606,38 +606,39 @@ def parse_unified_diff(text):
h = unified_hunk_start.match(hunk[0])
del hunk[0]
if h:
old = int(h.group(1))
if len(h.group(2)) > 0:
old_len = int(h.group(2))
else:
old_len = 0
# The hunk header @@ -1,6 +1,6 @@ means:
# - Start at line 1 in the old file and show 6 lines
# - Start at line 1 in the new file and show 6 lines
old = int(h.group(1)) # Starting line in old file
old_len = int(h.group(2)) if len(h.group(2)) > 0 else 1 # Number of lines in old file

new = int(h.group(3))
if len(h.group(4)) > 0:
new_len = int(h.group(4))
else:
new_len = 0
new = int(h.group(3)) # Starting line in new file
new_len = int(h.group(4)) if len(h.group(4)) > 0 else 1 # Number of lines in new file

h = None
break

# Process each line in the hunk
for n in hunk:
c = unified_change.match(n)
if c:
kind = c.group(1)
line = c.group(2)

if kind == '-' and (r != old_len or r == 0):
changes.append(Change(old + r, None, line, hunk_n))
r += 1
elif kind == '+' and (i != new_len or i == 0):
changes.append(Change(None, new + i, line, hunk_n))
i += 1
elif kind == ' ':
if r != old_len and i != new_len:
changes.append(Change(old + r, new + i, line, hunk_n))
r += 1
i += 1
# Each line in a unified diff starts with a space (context), + (addition), or - (deletion)
# The first character is the kind, the rest is the line content
kind = n[0] if len(n) > 0 else ' ' # Empty lines in the hunk are treated as context lines
line = n[1:] if len(n) > 1 else ''

# Process the line based on its kind
if kind == '-' and (r != old_len or r == 0):
# Line was removed from the old file
changes.append(Change(old + r, None, line, hunk_n))
r += 1
elif kind == '+' and (i != new_len or i == 0):
# Line was added in the new file
changes.append(Change(None, new + i, line, hunk_n))
i += 1
elif kind == ' ':
# Context line - exists in both old and new file
changes.append(Change(old + r, new + i, line, hunk_n))
r += 1
i += 1

if len(changes) > 0:
return changes
Expand Down
47 changes: 47 additions & 0 deletions tests/unit/resolver/test_patch_apply.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import pytest
from openhands.resolver.patching.apply import apply_diff
from openhands.resolver.patching.exceptions import HunkApplyException
from openhands.resolver.patching.patch import parse_diff, diffobj


def test_patch_apply_with_empty_lines():
# The original file has no indentation and uses \n line endings
original_content = "# PR Viewer\n\nThis React application allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization.\n\n## Setup"

# The patch has spaces at the start of each line and uses \n line endings
patch = """diff --git a/README.md b/README.md
index b760a53..5071727 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,3 @@
# PR Viewer

-This React application allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization.
+This React application was created by Graham Neubig and OpenHands. It allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization."""

print("Original content lines:")
for i, line in enumerate(original_content.splitlines(), 1):
print(f"{i}: {repr(line)}")

print("\nPatch lines:")
for i, line in enumerate(patch.splitlines(), 1):
print(f"{i}: {repr(line)}")

changes = parse_diff(patch)
print("\nParsed changes:")
for change in changes:
print(f"Change(old={change.old}, new={change.new}, line={repr(change.line)}, hunk={change.hunk})")
diff = diffobj(header=None, changes=changes, text=patch)

# Apply the patch
result = apply_diff(diff, original_content)

# The patch should be applied successfully
expected_result = [
"# PR Viewer",
"",
"This React application was created by Graham Neubig and OpenHands. It allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization.",
"",
"## Setup"
]
assert result == expected_result
Loading