Building a Smart Python-to-R Code Converter with Gemini AI-Powered Validation and Feedback


class EnhancedPythonToRConverter:
    """
    Enhanced Python to R converter with Gemini AI validation
    """


    def __init__(self, gemini_api_key: str = None):
        self.validator = GeminiValidator(gemini_api_key)


        self.import_mappings = {
            'pandas': 'library(dplyr)\nlibrary(tidyr)\nlibrary(readr)',
            'numpy': 'library(base)',
            'matplotlib.pyplot': 'library(ggplot2)',
            'seaborn': 'library(ggplot2)\nlibrary(RColorBrewer)',
            'scipy.stats': 'library(stats)',
            'sklearn': 'library(caret)\nlibrary(randomForest)\nlibrary(e1071)',
            'statsmodels': 'library(stats)\nlibrary(lmtest)',
            'plotly': 'library(plotly)',
        }


        self.function_mappings = {
            'pd.DataFrame': 'data.frame',
            'pd.read_csv': 'read.csv',
            'pd.read_excel': 'read_excel',
            'df.head': 'head',
            'df.tail': 'tail',
            'df.shape': 'dim',
            'df.info': 'str',
            'df.describe': 'summary',
            'df.mean': 'mean',
            'df.median': 'median',
            'df.std': 'sd',
            'df.var': 'var',
            'df.sum': 'sum',
            'df.count': 'length',
            'df.groupby': 'group_by',
            'df.merge': 'merge',
            'df.drop': 'select',
            'df.dropna': 'na.omit',
            'df.fillna': 'replace_na',
            'df.sort_values': 'arrange',
            'df.value_counts': 'table',


            'np.array': 'c',
            'np.mean': 'mean',
            'np.median': 'median',
            'np.std': 'sd',
            'np.var': 'var',
            'np.sum': 'sum',
            'np.min': 'min',
            'np.max': 'max',
            'np.sqrt': 'sqrt',
            'np.log': 'log',
            'np.exp': 'exp',
            'np.random.normal': 'rnorm',
            'np.random.uniform': 'runif',
            'np.linspace': 'seq',
            'np.arange': 'seq',


            'plt.figure': 'ggplot',
            'plt.plot': 'geom_line',
            'plt.scatter': 'geom_point',
            'plt.hist': 'geom_histogram',
            'plt.bar': 'geom_bar',
            'plt.boxplot': 'geom_boxplot',
            'plt.show': 'print',
            'sns.scatterplot': 'geom_point',
            'sns.histplot': 'geom_histogram',
            'sns.boxplot': 'geom_boxplot',
            'sns.heatmap': 'geom_tile',


            'scipy.stats.ttest_ind': 't.test',
            'scipy.stats.chi2_contingency': 'chisq.test',
            'scipy.stats.pearsonr': 'cor.test',
            'scipy.stats.spearmanr': 'cor.test',
            'scipy.stats.normaltest': 'shapiro.test',
            'stats.ttest_ind': 't.test',


            'sklearn.linear_model.LinearRegression': 'lm',
            'sklearn.ensemble.RandomForestRegressor': 'randomForest',
            'sklearn.model_selection.train_test_split': 'sample',
        }


        self.syntax_patterns = [
            (r'\bTrue\b', 'TRUE'),
            (r'\bFalse\b', 'FALSE'),
            (r'\bNone\b', 'NULL'),
            (r'\blen\(', 'length('),
            (r'range\((\d+)\)', r'1:\1'),
            (r'range\((\d+),\s*(\d+)\)', r'\1:\2'),
            (r'\.split\(', '.strsplit('),
            (r'\.strip\(\)', '.str_trim()'),
            (r'\.lower\(\)', '.str_to_lower()'),
            (r'\.upper\(\)', '.str_to_upper()'),
            (r'\[0\]', '[1]'),
            (r'f"([^"]*)"', r'paste0("\1")'),
            (r"f'([^']*)'", r"paste0('\1')"),
        ]


    def convert_imports(self, code: str) -> str:
        """Convert Python import statements to R library statements."""
        lines = code.split('\n')
        converted_lines = []


        for line in lines:
            line = line.strip()
            if line.startswith('import ') or line.startswith('from '):
                if ' as ' in line:
                    if 'import' in line and 'as' in line:
                        parts = line.split(' as ')
                        module = parts[0].replace('import ', '').strip()
                        if module in self.import_mappings:
                            converted_lines.append(f"# {line}")
                            converted_lines.append(self.import_mappings[module])
                        else:
                            converted_lines.append(f"# {line} # No direct R equivalent")
                    elif 'from' in line and 'import' in line and 'as' in line:
                        converted_lines.append(f"# {line} # Handle specific imports manually")
                elif line.startswith('from '):
                    parts = line.split(' import ')
                    module = parts[0].replace('from ', '').strip()
                    if module in self.import_mappings:
                        converted_lines.append(f"# {line}")
                        converted_lines.append(self.import_mappings[module])
                    else:
                        converted_lines.append(f"# {line} # No direct R equivalent")
                else:
                    module = line.replace('import ', '').strip()
                    if module in self.import_mappings:
                        converted_lines.append(f"# {line}")
                        converted_lines.append(self.import_mappings[module])
                    else:
                        converted_lines.append(f"# {line} # No direct R equivalent")
            else:
                converted_lines.append(line)


        return '\n'.join(converted_lines)


    def convert_functions(self, code: str) -> str:
        """Convert Python function calls to R equivalents."""
        for py_func, r_func in self.function_mappings.items():
            code = code.replace(py_func, r_func)
        return code


    def apply_syntax_patterns(self, code: str) -> str:
        """Apply regex patterns to convert Python syntax to R syntax."""
        for pattern, replacement in self.syntax_patterns:
            code = re.sub(pattern, replacement, code)
        return code


    def convert_pandas_operations(self, code: str) -> str:
        """Convert common pandas operations to dplyr/tidyr equivalents."""
        code = re.sub(r'df\[[\'"](.*?)[\'"]\]', r'df$\1', code)
        code = re.sub(r'df\.(\w+)', r'df$\1', code)


        code = re.sub(r'df\[df\[[\'"](.*?)[\'"]\]\s*([><=!]+)\s*([^]]+)\]', r'df[df$\1 \2 \3, ]', code)


        return code


    def convert_plotting(self, code: str) -> str:
        """Convert matplotlib/seaborn plotting to ggplot2."""
        conversions = [
            (r'plt\.figure\(figsize=\((\d+),\s*(\d+)\)\)', r'# Set figure size in ggplot theme'),
            (r'plt\.title\([\'"](.*?)[\'\"]\)', r'+ ggtitle("\1")'),
            (r'plt\.xlabel\([\'"](.*?)[\'\"]\)', r'+ xlab("\1")'),
            (r'plt\.ylabel\([\'"](.*?)[\'\"]\)', r'+ ylab("\1")'),
            (r'plt\.legend\(\)', r'+ theme(legend.position="right")'),
            (r'plt\.grid\(True\)', r'+ theme(panel.grid.major = element_line())'),
        ]


        for pattern, replacement in conversions:
            code = re.sub(pattern, replacement, code)


        return code


    def add_r_context(self, code: str) -> str:
        """Add R-specific context and comments."""
        r_header=""'# R Statistical Analysis Code
# Converted from Python using Enhanced Converter with Gemini AI Validation
# Install required packages: install.packages(c("dplyr", "ggplot2", "tidyr", "readr"))


'''
        return r_header + code


    def convert_code(self, python_code: str) -> str:
        """Main conversion method that applies all transformations."""
        code = python_code.strip()


        code = self.convert_imports(code)
        code = self.convert_functions(code)
        code = self.convert_pandas_operations(code)
        code = self.convert_plotting(code)
        code = self.apply_syntax_patterns(code)
        code = self.add_r_context(code)


        return code


    def convert_and_validate(self, python_code: str, use_gemini: bool = True) -> Dict:
        """
        Convert Python code to R and validate with Gemini AI
        """
        r_code = self.convert_code(python_code)


        result = {
            "original_python": python_code,
            "converted_r": r_code,
            "validation": None
        }


        if use_gemini and self.validator.api_key:
            print("🔍 Validating conversion with Gemini AI...")
            validation = self.validator.validate_conversion(python_code, r_code)
            result["validation"] = validation


            if validation.get("improved_code") and validation.get("improved_code") != r_code:
                result["final_r_code"] = validation["improved_code"]
            else:
                result["final_r_code"] = r_code
        else:
            result["final_r_code"] = r_code
            if not self.validator.api_key:
                result["validation"] = {"note": "Set GEMINI_API_KEY for AI validation"}


        return result


    def print_results(self, results: Dict):
        """Pretty print the conversion results"""
        print("=" * 80)
        print("🐍 ORIGINAL PYTHON CODE")
        print("=" * 80)
        print(results["original_python"])


        print("\n" + "=" * 80)
        print("📊 CONVERTED R CODE")
        print("=" * 80)
        print(results["final_r_code"])


        if results.get("validation"):
            validation = results["validation"]
            print("\n" + "=" * 80)
            print("🤖 GEMINI AI VALIDATION")
            print("=" * 80)


            if validation.get("validation_score"):
                print(f"📈 Score: {validation['validation_score']}/100")


            if validation.get("summary"):
                print(f"📝 Summary: {validation['summary']}")


            if validation.get("issues_found"):
                print("\n⚠️  Issues Found:")
                for issue in validation["issues_found"]:
                    print(f"   • {issue}")


            if validation.get("suggestions"):
                print("\n💡 Suggestions:")
                for suggestion in validation["suggestions"]:
                    print(f"   • {suggestion}")



Source link

  • Related Posts

    Are We Ready for Production-Grade Apps With Vibe Coding? A Look at the Replit Fiasco

    The Allure and The Hype Vibe coding—constructing applications through conversational AI rather than writing traditional code—has surged in popularity, with platforms like Replit promoting themselves as safe havens for this…

    Building a Versatile Multi‑Tool AI Agent Using Lightweight Hugging Face Models

    In this tutorial, we begin by setting up a compact yet capable AI agent that runs smoothly, leveraging Hugging Face transformers. We integrate dialog generation, question‑answering, sentiment analysis, web search stubs,…

    Leave a Reply

    Your email address will not be published. Required fields are marked *