Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Google Sheets integration for GitHub user verification #4671

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
111 changes: 82 additions & 29 deletions openhands/server/github.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,104 @@
import os
from typing import List, Optional
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are not necessary, just use list and | None.


import httpx

from openhands.core.logger import openhands_logger as logger
from openhands.server.sheets_client import GoogleSheetsClient

GITHUB_CLIENT_ID = os.getenv('GITHUB_CLIENT_ID', '').strip()
GITHUB_CLIENT_SECRET = os.getenv('GITHUB_CLIENT_SECRET', '').strip()
GITHUB_USER_LIST = None


def load_github_user_list():
global GITHUB_USER_LIST
waitlist = os.getenv('GITHUB_USER_LIST_FILE')
if waitlist:
with open(waitlist, 'r') as f:
GITHUB_USER_LIST = [line.strip() for line in f if line.strip()]
class UserVerifier:
def __init__(self):
logger.info('Initializing UserVerifier')
self.file_users: Optional[List[str]] = None
self.sheets_client: Optional[GoogleSheetsClient] = None
self.spreadsheet_id: Optional[str] = None

# Initialize from environment variables
self._init_file_users()
self._init_sheets_client()

def _init_file_users(self):
"""Load users from text file if configured"""
waitlist = os.getenv('GITHUB_USER_LIST_FILE')
if not waitlist:
logger.info('GITHUB_USER_LIST_FILE not configured')
return

if not os.path.exists(waitlist):
logger.error(f'User list file not found: {waitlist}')
raise FileNotFoundError(f'User list file not found: {waitlist}')

try:
with open(waitlist, 'r') as f:
self.file_users = [line.strip() for line in f if line.strip()]
logger.info(
f'Successfully loaded {len(self.file_users)} users from {waitlist}'
)
except Exception as e:
logger.error(f'Error reading user list file {waitlist}: {str(e)}')

def _init_sheets_client(self):
"""Initialize Google Sheets client if configured"""
sheet_id = os.getenv('GITHUB_USERS_SHEET_ID')

if not sheet_id:
logger.info('GITHUB_USERS_SHEET_ID not configured')
return

logger.info('Initializing Google Sheets integration')
self.sheets_client = GoogleSheetsClient()
self.spreadsheet_id = sheet_id

def is_user_allowed(self, username: str) -> bool:
"""Check if user is allowed based on file and/or sheet configuration"""
if not self.file_users and not self.sheets_client:
logger.debug('No verification sources configured - allowing all users')
return True
logger.info(f'Checking if GitHub user {username} is allowed')

if self.file_users:
if username in self.file_users:
logger.info(f'User {username} found in text file allowlist')
return True
logger.debug(f'User {username} not found in text file allowlist')

if self.sheets_client and self.spreadsheet_id:
sheet_users = self.sheets_client.get_usernames(self.spreadsheet_id)
if username in sheet_users:
logger.info(f'User {username} found in Google Sheets allowlist')
return True
logger.debug(f'User {username} not found in Google Sheets allowlist')

logger.info(f'User {username} not found in any allowlist')
return False


load_github_user_list()
# Global instance of user verifier
user_verifier = UserVerifier()


async def authenticate_github_user(auth_token) -> bool:
logger.info('Checking GitHub token')
if not GITHUB_USER_LIST:
return True

if not auth_token:
logger.warning('No GitHub token provided')
return False

login, error = await get_github_user(auth_token)
if error:
logger.warning(f'Invalid GitHub token: {error}')
return False
if login not in GITHUB_USER_LIST:
login = await get_github_user(auth_token)

if not user_verifier.is_user_allowed(login):
logger.warning(f'GitHub user {login} not in allow list')
return False

logger.info(f'GitHub user {login} authenticated')
return True


async def get_github_user(token: str) -> tuple[str | None, str | None]:
async def get_github_user(token: str) -> str:
"""Get GitHub user info from token.
Args:
Expand All @@ -52,21 +109,17 @@ async def get_github_user(token: str) -> tuple[str | None, str | None]:
If successful, error_message is None
If failed, login is None and error_message contains the error
"""
logger.info('Fetching GitHub user info from token')
headers = {
'Accept': 'application/vnd.github+json',
'Authorization': f'Bearer {token}',
'X-GitHub-Api-Version': '2022-11-28',
}
try:
async with httpx.AsyncClient() as client:
response = await client.get('https://api.github.com/user', headers=headers)
if response.status_code == 200:
user_data = response.json()
return user_data.get('login'), None
else:
return (
None,
f'GitHub API error: {response.status_code} - {response.text}',
)
except Exception as e:
return None, f'Error connecting to GitHub: {str(e)}'
async with httpx.AsyncClient() as client:
logger.debug('Making request to GitHub API')
response = await client.get('https://api.github.com/user', headers=headers)
response.raise_for_status()
user_data = response.json()
login = user_data.get('login')
logger.info(f'Successfully retrieved GitHub user: {login}')
return login
68 changes: 68 additions & 0 deletions openhands/server/sheets_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from typing import List

from google.auth import default
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

from openhands.core.logger import openhands_logger as logger


class GoogleSheetsClient:
def __init__(self):
"""Initialize Google Sheets client using workload identity.
Uses application default credentials which supports workload identity when running in GCP.
"""
logger.info('Initializing Google Sheets client with workload identity')
try:
credentials, project = default(
scopes=['https://www.googleapis.com/auth/spreadsheets.readonly']
)
logger.info(f'Successfully obtained credentials for project: {project}')
self.service = build('sheets', 'v4', credentials=credentials)
logger.info('Successfully initialized Google Sheets API service')
except Exception as e:
logger.error(f'Failed to initialize Google Sheets client: {str(e)}')
self.service = None

def get_usernames(self, spreadsheet_id: str, range_name: str = 'A:A') -> List[str]:
"""Get list of usernames from specified Google Sheet.
Args:
spreadsheet_id: The ID of the Google Sheet
range_name: The A1 notation of the range to fetch
Returns:
List of usernames from the sheet
"""
if not self.service:
logger.error('Google Sheets service not initialized')
return []

try:
logger.info(
f'Fetching usernames from sheet {spreadsheet_id}, range {range_name}'
)
result = (
self.service.spreadsheets()
.values()
.get(spreadsheetId=spreadsheet_id, range=range_name)
.execute()
)

values = result.get('values', [])
usernames = [
str(cell[0]).strip() for cell in values if cell and cell[0].strip()
]
logger.info(
f'Successfully fetched {len(usernames)} usernames from Google Sheet'
)
return usernames

except HttpError as err:
logger.error(f'Error accessing Google Sheet {spreadsheet_id}: {err}')
return []
except Exception as e:
logger.error(
f'Unexpected error accessing Google Sheet {spreadsheet_id}: {str(e)}'
)
return []
22 changes: 20 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ datasets = "*"
pandas = "*"
litellm = "^1.51.1"
google-generativeai = "*" # To use litellm with Gemini Pro API
google-api-python-client = "*" # For Google Sheets API
google-auth-httplib2 = "*" # For Google Sheets authentication
google-auth-oauthlib = "*" # For Google Sheets OAuth
termcolor = "*"
seaborn = "*"
docker = "*"
Expand Down
Loading