Snippet content copied to clipboard.
Are you sure to delete this snippet? No, don't delete
  1. import pandas as pd
  2. import subprocess
  3. from pathlib import Path
  4. # PowerShell script to select a file
  5. # Run the PowerShell script and capture the output
  6. result = subprocess.run(
  7. ["powershell", "-File", Path(__file__).parent / "folder_select.ps1"],
  8. capture_output=True,
  9. text=True,
  10. )
  11. # Print the full path of the selected file
  12. folder_path = result.stdout.strip()
  13. dir_path = Path(
  14. folder_path
  15. ) # Change to the path of the directory containing CSV and Excel files
  16. # Select all CSV and Excel files in the directory
  17. file_paths = [
  18. *dir_path.glob("*.csv"),
  19. *dir_path.glob("*.xlsx"),
  20. *dir_path.glob("*.xls"),
  21. ]
  22. file_paths = [
  23. str(path) for path in file_paths
  24. ] # Convert Path objects to strings
  25. with open(Path(__file__).parent / "columns.txt", "r") as f:
  26. column_names = [line.strip().upper() for line in f.readlines()]
  27. def get_df(path):
  28. if path.endswith(".csv"):
  29. df = pd.read_csv(path)
  30. elif path.endswith(".xlsx") or path.endswith(".xls"):
  31. df = pd.read_excel(path)
  32. else:
  33. raise ValueError("Error: Unsupported file type")
  34. df.columns = [col.strip().upper() for col in df.columns]
  35. df = df[df.PAN.notna()]
  36. df = df[column_names]
  37. df["FILENAME"] = Path(path).name[:9]
  38. return df
  39. try:
  40. df_complete = pd.concat(
  41. [get_df(path) for path in file_paths], ignore_index=True
  42. )
  43. result = subprocess.run(
  44. ["powershell", "-File", Path(__file__).parent / "save_path.ps1"],
  45. capture_output=True,
  46. text=True,
  47. )
  48. output_file = result.stdout.strip()
  49. df_complete.to_excel(output_file, index=False)
  50. print(f"Successfully merged files to {output_file}")
  51. except ValueError as err:
  52. print(err)

Edit this Snippet