- import pandas as pd
- import subprocess
- from pathlib import Path
-
- # PowerShell script to select a file
-
-
- # Run the PowerShell script and capture the output
- result = subprocess.run(
- ["powershell", "-File", Path(__file__).parent / "folder_select.ps1"],
- capture_output=True,
- text=True,
- )
-
- # Print the full path of the selected file
- folder_path = result.stdout.strip()
-
- dir_path = Path(
- folder_path
- ) # Change to the path of the directory containing CSV and Excel files
-
- # Select all CSV and Excel files in the directory
- file_paths = [
- *dir_path.glob("*.csv"),
- *dir_path.glob("*.xlsx"),
- *dir_path.glob("*.xls"),
- ]
- file_paths = [
- str(path) for path in file_paths
- ] # Convert Path objects to strings
-
-
- with open(Path(__file__).parent / "columns.txt", "r") as f:
- column_names = [line.strip().upper() for line in f.readlines()]
-
-
- def get_df(path):
- if path.endswith(".csv"):
- df = pd.read_csv(path)
- elif path.endswith(".xlsx") or path.endswith(".xls"):
- df = pd.read_excel(path)
- else:
- raise ValueError("Error: Unsupported file type")
- df.columns = [col.strip().upper() for col in df.columns]
- df = df[df.PAN.notna()]
- df = df[column_names]
- df["FILENAME"] = Path(path).name[:9]
- return df
-
-
- try:
- df_complete = pd.concat(
- [get_df(path) for path in file_paths], ignore_index=True
- )
- result = subprocess.run(
- ["powershell", "-File", Path(__file__).parent / "save_path.ps1"],
- capture_output=True,
- text=True,
- )
-
- output_file = result.stdout.strip()
- df_complete.to_excel(output_file, index=False)
- print(f"Successfully merged files to {output_file}")
- except ValueError as err:
- print(err)