URL Cleaner
Public
.PY
Guest
Dec 27, 2025
13
.PY
Copy
Save
Share
QR
New
#!/usr/bin/env python3 import re import os from typing import Tuple, List def print_banner(): """Print a nice banner for the application""" print("=" * 60) print(" ? URL CLEANER TOOL ?") print(" Remove URLs and keep Email:Pass or Username:Pass") print("=" * 60) print() def print_separator(): """Print a separator line""" print("-" * 60) def clean_line(line: str) -> str: """ Clean a line by removing URLs and keeping only email:password or username:password """ line = line.strip() if not line: return "" # Split by colon parts = line.split(':') if len(parts) < 2: return "" # Look for email or username pattern in the parts for i in range(len(parts) - 1): potential_user = parts[i] potential_password = ':'.join(parts[i+1:]) # Skip parts that look like URL components if any(url_part in potential_user.lower() for url_part in [ 'http', 'https', 'www.', '.com/', '.net/', '.org/', 'quillbot.com', 'legacy-word', 'app.', 'auth.', 'mobile.', 'signup', 'login', 'recovery', 'settings', 'upgrade', 'grammar-check', 'extension' ]): continue # Skip parts that start with // or contain URL patterns if potential_user.startswith('//') or potential_user.startswith('/'): continue # Check if it's a valid email or username if ('@' in potential_user and len(potential_user) > 3) or ( len(potential_user) > 2 and not any(char in potential_user for char in ['/', '\\', 'http']) and potential_password and len(potential_password) > 0 ): return f"{potential_user}:{potential_password}" return "" def is_valid_credential(line: str) -> bool: """ Check if a line contains valid email:password or username:password format """ if not line or ':' not in line: return False parts = line.split(':', 1) if len(parts) != 2: return False username_part, password_part = parts # Basic validation if not username_part or not password_part: return False # Remove common unwanted patterns unwanted_patterns = [ 'http', 'https', 'www.', '.com/', '.net/', '.org/', 'login', 'signup', 'recovery', 'settings', 'upgrade' ] for pattern in unwanted_patterns: if pattern in username_part.lower(): return False return True def categorize_credential(line: str) -> str: """ Categorize if the credential is email:pass or username:pass """ if ':' not in line: return "unknown" username_part = line.split(':', 1)[0] if '@' in username_part: return "email" else: return "username" def process_file(filename: str) -> Tuple[int, int, int, int, List[str]]: """ Process the file and return statistics Returns: (total_lines, cleaned_lines, email_count, username_count, cleaned_data) """ if not os.path.exists(filename): raise FileNotFoundError(f"File '{filename}' not found!") print(f"? Processing file: {filename}") print("⏳ Cleaning URLs and processing...") cleaned_data = [] total_lines = 0 with open(filename, 'r', encoding='utf-8', errors='ignore') as file: for line_num, line in enumerate(file, 1): total_lines += 1 # Show progress for large files (overwrite same line) if line_num % 10000 == 0: print(f"\r ⏳ Processing line {line_num:,}...", end='', flush=True) cleaned_line = clean_line(line) if cleaned_line and is_valid_credential(cleaned_line): cleaned_data.append(cleaned_line) # Clear progress line print("\r" + " " * 50 + "\r", end='') # Count email vs username credentials email_count = 0 username_count = 0 for line in cleaned_data: if categorize_credential(line) == "email": email_count += 1 else: username_count += 1 return total_lines, len(cleaned_data), email_count, username_count, cleaned_data def save_cleaned_file(filename: str, cleaned_data: List[str]): """ Save cleaned data to the same input file (overwrite original) """ with open(filename, 'w', encoding='utf-8') as file: for line in cleaned_data: file.write(line + '\n') print(f"? Cleaned data saved back to: {filename}") def print_statistics(total_lines: int, cleaned_lines: int, email_count: int, username_count: int): """ Print detailed statistics with nice formatting """ print_separator() print("? CLEANING SUMMARY") print_separator() print(f"? Total lines processed: {total_lines:,}") print(f"?️ Lines removed: {(total_lines - cleaned_lines):,}") print(f"✅ Lines remaining: {cleaned_lines:,}") print() print("? CREDENTIAL BREAKDOWN:") print(f"? Email:Password entries: {email_count:,}") print(f"? Username:Password entries: {username_count:,}") print(f"? Total clean entries: {cleaned_lines:,}") if total_lines > 0: retention_rate = (cleaned_lines / total_lines) * 100 print(f"? Data retention rate: {retention_rate:.1f}%") def main(): """ Main function to run the URL cleaner """ print_banner() try: # Get filename from user filename = input("? Enter filename to clean: ").strip() if not filename: print("❌ No filename provided!") return print() print_separator() # Process the file total_lines, cleaned_lines, email_count, username_count, cleaned_data = process_file(filename) # Save cleaned file save_cleaned_file(filename, cleaned_data) # Print statistics print_statistics(total_lines, cleaned_lines, email_count, username_count) print_separator() print("? Process completed successfully!") print("=" * 60) except FileNotFoundError as e: print(f"❌ Error: {e}") except KeyboardInterrupt: print("\n❌ Process interrupted by user!") except Exception as e: print(f"❌ Unexpected error: {e}") if __name__ == "__main__": main()
Scan to View
Use your phone's camera or a QR scanner app to open this paste instantly.