
class EnhancedPythonToRConverter:
"""
Enhanced Python to R converter with Gemini AI validation
"""
def __init__(self, gemini_api_key: str = None):
self.validator = GeminiValidator(gemini_api_key)
self.import_mappings = {
'pandas': 'library(dplyr)\nlibrary(tidyr)\nlibrary(readr)',
'numpy': 'library(base)',
'matplotlib.pyplot': 'library(ggplot2)',
'seaborn': 'library(ggplot2)\nlibrary(RColorBrewer)',
'scipy.stats': 'library(stats)',
'sklearn': 'library(caret)\nlibrary(randomForest)\nlibrary(e1071)',
'statsmodels': 'library(stats)\nlibrary(lmtest)',
'plotly': 'library(plotly)',
}
self.function_mappings = {
'pd.DataFrame': 'data.frame',
'pd.read_csv': 'read.csv',
'pd.read_excel': 'read_excel',
'df.head': 'head',
'df.tail': 'tail',
'df.shape': 'dim',
'df.info': 'str',
'df.describe': 'summary',
'df.mean': 'mean',
'df.median': 'median',
'df.std': 'sd',
'df.var': 'var',
'df.sum': 'sum',
'df.count': 'length',
'df.groupby': 'group_by',
'df.merge': 'merge',
'df.drop': 'select',
'df.dropna': 'na.omit',
'df.fillna': 'replace_na',
'df.sort_values': 'arrange',
'df.value_counts': 'table',
'np.array': 'c',
'np.mean': 'mean',
'np.median': 'median',
'np.std': 'sd',
'np.var': 'var',
'np.sum': 'sum',
'np.min': 'min',
'np.max': 'max',
'np.sqrt': 'sqrt',
'np.log': 'log',
'np.exp': 'exp',
'np.random.normal': 'rnorm',
'np.random.uniform': 'runif',
'np.linspace': 'seq',
'np.arange': 'seq',
'plt.figure': 'ggplot',
'plt.plot': 'geom_line',
'plt.scatter': 'geom_point',
'plt.hist': 'geom_histogram',
'plt.bar': 'geom_bar',
'plt.boxplot': 'geom_boxplot',
'plt.show': 'print',
'sns.scatterplot': 'geom_point',
'sns.histplot': 'geom_histogram',
'sns.boxplot': 'geom_boxplot',
'sns.heatmap': 'geom_tile',
'scipy.stats.ttest_ind': 't.test',
'scipy.stats.chi2_contingency': 'chisq.test',
'scipy.stats.pearsonr': 'cor.test',
'scipy.stats.spearmanr': 'cor.test',
'scipy.stats.normaltest': 'shapiro.test',
'stats.ttest_ind': 't.test',
'sklearn.linear_model.LinearRegression': 'lm',
'sklearn.ensemble.RandomForestRegressor': 'randomForest',
'sklearn.model_selection.train_test_split': 'sample',
}
self.syntax_patterns = [
(r'\bTrue\b', 'TRUE'),
(r'\bFalse\b', 'FALSE'),
(r'\bNone\b', 'NULL'),
(r'\blen\(', 'length('),
(r'range\((\d+)\)', r'1:\1'),
(r'range\((\d+),\s*(\d+)\)', r'\1:\2'),
(r'\.split\(', '.strsplit('),
(r'\.strip\(\)', '.str_trim()'),
(r'\.lower\(\)', '.str_to_lower()'),
(r'\.upper\(\)', '.str_to_upper()'),
(r'\[0\]', '[1]'),
(r'f"([^"]*)"', r'paste0("\1")'),
(r"f'([^']*)'", r"paste0('\1')"),
]
def convert_imports(self, code: str) -> str:
"""Convert Python import statements to R library statements."""
lines = code.split('\n')
converted_lines = []
for line in lines:
line = line.strip()
if line.startswith('import ') or line.startswith('from '):
if ' as ' in line:
if 'import' in line and 'as' in line:
parts = line.split(' as ')
module = parts[0].replace('import ', '').strip()
if module in self.import_mappings:
converted_lines.append(f"# {line}")
converted_lines.append(self.import_mappings[module])
else:
converted_lines.append(f"# {line} # No direct R equivalent")
elif 'from' in line and 'import' in line and 'as' in line:
converted_lines.append(f"# {line} # Handle specific imports manually")
elif line.startswith('from '):
parts = line.split(' import ')
module = parts[0].replace('from ', '').strip()
if module in self.import_mappings:
converted_lines.append(f"# {line}")
converted_lines.append(self.import_mappings[module])
else:
converted_lines.append(f"# {line} # No direct R equivalent")
else:
module = line.replace('import ', '').strip()
if module in self.import_mappings:
converted_lines.append(f"# {line}")
converted_lines.append(self.import_mappings[module])
else:
converted_lines.append(f"# {line} # No direct R equivalent")
else:
converted_lines.append(line)
return '\n'.join(converted_lines)
def convert_functions(self, code: str) -> str:
"""Convert Python function calls to R equivalents."""
for py_func, r_func in self.function_mappings.items():
code = code.replace(py_func, r_func)
return code
def apply_syntax_patterns(self, code: str) -> str:
"""Apply regex patterns to convert Python syntax to R syntax."""
for pattern, replacement in self.syntax_patterns:
code = re.sub(pattern, replacement, code)
return code
def convert_pandas_operations(self, code: str) -> str:
"""Convert common pandas operations to dplyr/tidyr equivalents."""
code = re.sub(r'df\[[\'"](.*?)[\'"]\]', r'df$\1', code)
code = re.sub(r'df\.(\w+)', r'df$\1', code)
code = re.sub(r'df\[df\[[\'"](.*?)[\'"]\]\s*([><=!]+)\s*([^]]+)\]', r'df[df$\1 \2 \3, ]', code)
return code
def convert_plotting(self, code: str) -> str:
"""Convert matplotlib/seaborn plotting to ggplot2."""
conversions = [
(r'plt\.figure\(figsize=\((\d+),\s*(\d+)\)\)', r'# Set figure size in ggplot theme'),
(r'plt\.title\([\'"](.*?)[\'\"]\)', r'+ ggtitle("\1")'),
(r'plt\.xlabel\([\'"](.*?)[\'\"]\)', r'+ xlab("\1")'),
(r'plt\.ylabel\([\'"](.*?)[\'\"]\)', r'+ ylab("\1")'),
(r'plt\.legend\(\)', r'+ theme(legend.position="right")'),
(r'plt\.grid\(True\)', r'+ theme(panel.grid.major = element_line())'),
]
for pattern, replacement in conversions:
code = re.sub(pattern, replacement, code)
return code
def add_r_context(self, code: str) -> str:
"""Add R-specific context and comments."""
r_header=""'# R Statistical Analysis Code
# Converted from Python using Enhanced Converter with Gemini AI Validation
# Install required packages: install.packages(c("dplyr", "ggplot2", "tidyr", "readr"))
'''
return r_header + code
def convert_code(self, python_code: str) -> str:
"""Main conversion method that applies all transformations."""
code = python_code.strip()
code = self.convert_imports(code)
code = self.convert_functions(code)
code = self.convert_pandas_operations(code)
code = self.convert_plotting(code)
code = self.apply_syntax_patterns(code)
code = self.add_r_context(code)
return code
def convert_and_validate(self, python_code: str, use_gemini: bool = True) -> Dict:
"""
Convert Python code to R and validate with Gemini AI
"""
r_code = self.convert_code(python_code)
result = {
"original_python": python_code,
"converted_r": r_code,
"validation": None
}
if use_gemini and self.validator.api_key:
print("🔍 Validating conversion with Gemini AI...")
validation = self.validator.validate_conversion(python_code, r_code)
result["validation"] = validation
if validation.get("improved_code") and validation.get("improved_code") != r_code:
result["final_r_code"] = validation["improved_code"]
else:
result["final_r_code"] = r_code
else:
result["final_r_code"] = r_code
if not self.validator.api_key:
result["validation"] = {"note": "Set GEMINI_API_KEY for AI validation"}
return result
def print_results(self, results: Dict):
"""Pretty print the conversion results"""
print("=" * 80)
print("🐍 ORIGINAL PYTHON CODE")
print("=" * 80)
print(results["original_python"])
print("\n" + "=" * 80)
print("📊 CONVERTED R CODE")
print("=" * 80)
print(results["final_r_code"])
if results.get("validation"):
validation = results["validation"]
print("\n" + "=" * 80)
print("🤖 GEMINI AI VALIDATION")
print("=" * 80)
if validation.get("validation_score"):
print(f"📈 Score: {validation['validation_score']}/100")
if validation.get("summary"):
print(f"📝 Summary: {validation['summary']}")
if validation.get("issues_found"):
print("\n⚠️ Issues Found:")
for issue in validation["issues_found"]:
print(f" • {issue}")
if validation.get("suggestions"):
print("\n💡 Suggestions:")
for suggestion in validation["suggestions"]:
print(f" • {suggestion}")