update
This commit is contained in:
297
Scripts/python/pdf_import/import_pdf_from_o365_de.v2.2-orig.py
Normal file
297
Scripts/python/pdf_import/import_pdf_from_o365_de.v2.2-orig.py
Normal file
@@ -0,0 +1,297 @@
|
|||||||
|
import os
|
||||||
|
import requests
|
||||||
|
import msal
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
import mysql.connector
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.stdout.reconfigure(encoding='utf-8')
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# MySQL KONFIGURATION
|
||||||
|
# ==============================================================================
|
||||||
|
MYSQL_HOST = "10.102.1.65"
|
||||||
|
MYSQL_USER = "svc.emailtopdf"
|
||||||
|
MYSQL_PASSWORD = "zZUHrps62skLKfr9yQwQ"
|
||||||
|
MYSQL_DATABASE = "emailtopdf"
|
||||||
|
MYSQL_TABLE = "emailtopdf"
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
def create_log_table_if_not_exists():
|
||||||
|
"""Létrehozza a naplótáblát, ha még nem létezik."""
|
||||||
|
try:
|
||||||
|
cnx = mysql.connector.connect(
|
||||||
|
host=MYSQL_HOST,
|
||||||
|
user=MYSQL_USER,
|
||||||
|
password=MYSQL_PASSWORD,
|
||||||
|
database=MYSQL_DATABASE
|
||||||
|
)
|
||||||
|
cursor = cnx.cursor()
|
||||||
|
|
||||||
|
# A tábla struktúrája
|
||||||
|
create_table_query = f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS `{MYSQL_TABLE}` (
|
||||||
|
`id` INT AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
`timestamp` DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
`level` VARCHAR(10),
|
||||||
|
`message` TEXT
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
cursor.execute(create_table_query)
|
||||||
|
cnx.commit()
|
||||||
|
logging.info(f"MySQL naplótábla '{MYSQL_TABLE}' ellenőrizve/létrehozva.")
|
||||||
|
except mysql.connector.Error as err:
|
||||||
|
logging.error(f"Hiba a MySQL naplótábla létrehozásakor/ellenőrzésekor: {err}")
|
||||||
|
finally:
|
||||||
|
if 'cnx' in locals() and cnx.is_connected():
|
||||||
|
cursor.close()
|
||||||
|
cnx.close()
|
||||||
|
|
||||||
|
class MySQLHandler(logging.Handler):
|
||||||
|
"""Egyéni naplózó kezelő, amely MySQL adatbázisba ír."""
|
||||||
|
def emit(self, record):
|
||||||
|
# Szűrjük ki a mysql.connector naplókat, hogy elkerüljük a rekurziót
|
||||||
|
if record.name.startswith('mysql.connector'):
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
cnx = mysql.connector.connect(
|
||||||
|
host=MYSQL_HOST,
|
||||||
|
user=MYSQL_USER,
|
||||||
|
password=MYSQL_PASSWORD,
|
||||||
|
database=MYSQL_DATABASE
|
||||||
|
)
|
||||||
|
cursor = cnx.cursor()
|
||||||
|
|
||||||
|
sql = f"INSERT INTO `{MYSQL_TABLE}` (level, message) VALUES (%s, %s)"
|
||||||
|
# A rekord szintjének és üzenetének használata
|
||||||
|
cursor.execute(sql, (record.levelname, self.format(record)))
|
||||||
|
cnx.commit()
|
||||||
|
except mysql.connector.Error as err:
|
||||||
|
# Ne naplózzunk a MySQLHandlerben, hogy elkerüljük a végtelen ciklust
|
||||||
|
# Helyette printeljük ki a hibát, hogy debuggolható legyen.
|
||||||
|
print(f"Hiba a MySQL naplóbejegyzés beszúrásakor: {err}")
|
||||||
|
finally:
|
||||||
|
if 'cnx' in locals() and cnx.is_connected():
|
||||||
|
cursor.close()
|
||||||
|
cnx.close()
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# LOGGING KONFIGURATION
|
||||||
|
# ==============================================================================
|
||||||
|
LOG_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'import_pdf_from_o365_de.v2.1.log')
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||||
|
handlers=[
|
||||||
|
logging.FileHandler(LOG_FILE, encoding='utf-8'),
|
||||||
|
logging.StreamHandler(), # Revert to default StreamHandler, encoding is handled by sys.stdout.reconfigure
|
||||||
|
MySQLHandler() # Re-enabled MySQLHandler
|
||||||
|
]
|
||||||
|
)
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# KONFIGURATION
|
||||||
|
# ==============================================================================
|
||||||
|
# Daten aus der Azure App-Registrierung
|
||||||
|
# Tennant ID : caee3499-03f8-4175-9fa8-a935248d0ece
|
||||||
|
TENANT_ID = "caee3499-03f8-4175-9fa8-a935248d0ece"
|
||||||
|
# Client Id : 3a08b279-1fc3-419f-a77e-31f12a0f65f7
|
||||||
|
CLIENT_ID = "3a08b279-1fc3-419f-a77e-31f12a0f65f7"
|
||||||
|
# Key : 3cd0be8b-e58f-4e0c-9856-5c9788183b2c
|
||||||
|
CLIENT_SECRET = "Rk-8Q~nJ.sZ-xUiNxtEDdzVgoFFosODLVHX~jdrh" # Der "Wert", nicht die "Secret ID"
|
||||||
|
|
||||||
|
# Das zu überwachende Postfach
|
||||||
|
# USER_EMAIL = "Bestellung-Fax-Eingang@aps-hh.de"
|
||||||
|
USER_EMAIL = "i.meszely@aps-hh.de"
|
||||||
|
|
||||||
|
# Speicherort für heruntergeladene PDFs
|
||||||
|
# DOWNLOAD_DIR = r"\\aps-nb090\test"
|
||||||
|
# DOWNLOAD_DIR = r"C:\Users\YourUsername\Downloads\PDFs"
|
||||||
|
DOWNLOAD_DIR = "C:/Tools/PDF"
|
||||||
|
|
||||||
|
# Name des Ordners, in den verarbeitete E-Mails verschoben werden
|
||||||
|
PROCESSED_FOLDER_NAME = "erledigt"
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
# Microsoft Graph API Endpunkte
|
||||||
|
GRAPH_API_ENDPOINT = "https://graph.microsoft.com/v1.0"
|
||||||
|
AUTHORITY_URL = f"https://login.microsoftonline.com/{TENANT_ID}"
|
||||||
|
SCOPES = ["https://graph.microsoft.com/.default"]
|
||||||
|
|
||||||
|
|
||||||
|
def get_graph_api_token():
|
||||||
|
"""Ruft das Zugriffstoken für die Microsoft Graph API ab."""
|
||||||
|
app = msal.ConfidentialClientApplication(
|
||||||
|
client_id=CLIENT_ID,
|
||||||
|
authority=AUTHORITY_URL,
|
||||||
|
client_credential=CLIENT_SECRET
|
||||||
|
)
|
||||||
|
result = app.acquire_token_silent(scopes=SCOPES, account=None)
|
||||||
|
if not result:
|
||||||
|
result = app.acquire_token_for_client(scopes=SCOPES)
|
||||||
|
|
||||||
|
if "access_token" in result:
|
||||||
|
logging.info("Graph API-Token erfolgreich abgerufen.")
|
||||||
|
return result["access_token"]
|
||||||
|
else:
|
||||||
|
logging.error("Fehler beim Abrufen des Tokens!")
|
||||||
|
logging.error(result.get("error"))
|
||||||
|
logging.error(result.get("error_description"))
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_folder_id(access_token, folder_name):
|
||||||
|
"""Sucht die ID eines Ordners anhand seines Namens."""
|
||||||
|
headers = {"Authorization": f"Bearer {access_token}"}
|
||||||
|
url = f"{GRAPH_API_ENDPOINT}/users/{USER_EMAIL}/mailFolders"
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
response.raise_for_status()
|
||||||
|
folders = response.json().get("value", [])
|
||||||
|
|
||||||
|
for folder in folders:
|
||||||
|
if folder["displayName"].lower() == folder_name.lower():
|
||||||
|
return folder["id"]
|
||||||
|
|
||||||
|
# Man könnte auch den Fall behandeln, dass der Ordner nicht existiert, und ihn erstellen.
|
||||||
|
# Vorerst wird einfach ein Fehler ausgelöst.
|
||||||
|
raise ValueError(f"Der Ordner '{folder_name}' wurde nicht gefunden.")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Hauptverarbeitungsfunktion."""
|
||||||
|
logging.info("Graph API-Token wird abgerufen...")
|
||||||
|
access_token = get_graph_api_token()
|
||||||
|
if not access_token:
|
||||||
|
logging.error("Kein Zugriffstoken erhalten. Beende das Skript.")
|
||||||
|
return
|
||||||
|
|
||||||
|
headers = {"Authorization": f"Bearer {access_token}"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
logging.info(f"Suche nach der ID für den Ordner '{PROCESSED_FOLDER_NAME}'...")
|
||||||
|
processed_folder_id = get_folder_id(access_token, PROCESSED_FOLDER_NAME)
|
||||||
|
logging.info("Ordner-ID erfolgreich abgerufen.")
|
||||||
|
except requests.exceptions.HTTPError as e:
|
||||||
|
logging.error(f"Fehler beim Abrufen der Ordner-ID: {e}")
|
||||||
|
if e.response.status_code == 403:
|
||||||
|
logging.error("Mögliche Ursache: Fehlende oder unzureichende Graph API-Berechtigungen. Bitte stellen Sie sicher, dass die Anwendung die erforderlichen 'Application Permissions' (z.B. Mail.Read, Mail.ReadWrite) in Azure AD hat und dass der Administrator die Zustimmung erteilt hat.")
|
||||||
|
return
|
||||||
|
except ValueError as e:
|
||||||
|
logging.error(f"Fehler beim Abrufen der Ordner-ID: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Nur ungelesene E-Mails mit Anhängen abfragen
|
||||||
|
# $select=id,subject -> Nur die notwendigen Felder für mehr Effizienz abfragen
|
||||||
|
messages_url = (
|
||||||
|
f"{GRAPH_API_ENDPOINT}/users/{USER_EMAIL}/mailFolders/inbox/messages?"
|
||||||
|
f"$filter=isRead eq false and hasAttachments eq true&"
|
||||||
|
f"$select=id,subject"
|
||||||
|
)
|
||||||
|
|
||||||
|
response = requests.get(messages_url, headers=headers)
|
||||||
|
response.raise_for_status()
|
||||||
|
messages = response.json().get("value", [])
|
||||||
|
|
||||||
|
if not messages:
|
||||||
|
logging.info("Keine neuen E-Mails zur Verarbeitung gefunden.")
|
||||||
|
return
|
||||||
|
|
||||||
|
logging.info(f"{len(messages)} neue E-Mail(s) mit Anhängen gefunden.")
|
||||||
|
|
||||||
|
for message in messages:
|
||||||
|
msg_id = message["id"]
|
||||||
|
subject = message.get("subject", "N/A")
|
||||||
|
logging.info(f"\n--- In Verarbeitung: '{subject}' (ID: {msg_id}) ---")
|
||||||
|
|
||||||
|
attachments_url = f"{GRAPH_API_ENDPOINT}/users/{USER_EMAIL}/messages/{msg_id}/attachments"
|
||||||
|
response = requests.get(attachments_url, headers=headers)
|
||||||
|
if response.status_code != 200:
|
||||||
|
logging.error(f" Fehler beim Abrufen der Anhänge: {response.json()}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
attachments = response.json().get("value", [])
|
||||||
|
pdf_found = False
|
||||||
|
|
||||||
|
for att in attachments:
|
||||||
|
filename = att.get("name", "unknown")
|
||||||
|
content_type = att.get("contentType", "")
|
||||||
|
|
||||||
|
if filename == "Safe Attachments Scan In Progress" and content_type is None:
|
||||||
|
logging.info(f" Placeholder melléklet kihagyása: '{filename}'")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if filename.lower().endswith(".pdf") or content_type == "application/pdf":
|
||||||
|
logging.info(f" PDF-Anhang gefunden: {filename}")
|
||||||
|
|
||||||
|
# Der Inhalt des Anhangs befindet sich im Feld 'contentBytes' und ist Base64-kodiert
|
||||||
|
file_content = base64.b64decode(att["contentBytes"])
|
||||||
|
|
||||||
|
# Dateinamen bereinigen
|
||||||
|
safe_filename = "".join(c if c.isalnum() or c in (" ", ".", "_", "-") else "_" for c in filename)
|
||||||
|
|
||||||
|
# Ellenőrizze, hogy a fájl létezik-e, és szükség esetén adjon hozzá sorszámot
|
||||||
|
base_name, extension = os.path.splitext(safe_filename)
|
||||||
|
counter = 0
|
||||||
|
while True:
|
||||||
|
if counter == 0:
|
||||||
|
final_filename = safe_filename
|
||||||
|
else:
|
||||||
|
final_filename = f"{base_name} ({counter}){extension}"
|
||||||
|
|
||||||
|
filepath = os.path.join(DOWNLOAD_DIR, final_filename)
|
||||||
|
if not os.path.exists(filepath):
|
||||||
|
break
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(filepath, "wb") as f:
|
||||||
|
f.write(file_content)
|
||||||
|
logging.info(f" PDF erfolgreich gespeichert: {filepath}")
|
||||||
|
pdf_found = True
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f" Fehler beim Speichern der Datei: {e}")
|
||||||
|
|
||||||
|
if pdf_found:
|
||||||
|
logging.info(f" E-Mail wird in den Ordner '{PROCESSED_FOLDER_NAME}' verschoben...")
|
||||||
|
move_url = f"{GRAPH_API_ENDPOINT}/users/{USER_EMAIL}/messages/{msg_id}/move"
|
||||||
|
move_payload = {"destinationId": processed_folder_id}
|
||||||
|
|
||||||
|
response = requests.post(move_url, headers=headers, json=move_payload)
|
||||||
|
if response.status_code == 201:
|
||||||
|
logging.info(" E-Mail erfolgreich verschoben.")
|
||||||
|
# Az áthelyezett e-mail új ID-jának lekérése a válaszból
|
||||||
|
moved_message_id = response.json().get("id")
|
||||||
|
if moved_message_id:
|
||||||
|
logging.info(f" E-Mail új ID-ja: {moved_message_id}")
|
||||||
|
# E-Mail mint olvasottként való megjelölés az új ID-val
|
||||||
|
logging.info(f" Markiere E-Mail '{subject}' (ID: {moved_message_id}) als gelesen...")
|
||||||
|
mark_as_read_url = f"{GRAPH_API_ENDPOINT}/users/{USER_EMAIL}/messages/{moved_message_id}"
|
||||||
|
mark_as_read_payload = {"isRead": True}
|
||||||
|
|
||||||
|
mark_response = requests.patch(mark_as_read_url, headers=headers, json=mark_as_read_payload)
|
||||||
|
if mark_response.status_code == 200:
|
||||||
|
logging.info(" E-Mail erfolgreich als gelesen markiert.")
|
||||||
|
else:
|
||||||
|
logging.error(f" Fehler beim Markieren der E-Mail als gelesen: {mark_response.status_code} - {mark_response.text}")
|
||||||
|
else:
|
||||||
|
logging.error(" Hiba: Nem sikerült lekérni az áthelyezett e-mail új ID-ját a válaszból.")
|
||||||
|
else:
|
||||||
|
# Für das Verschieben ist die Berechtigung Mail.ReadWrite erforderlich!
|
||||||
|
logging.error(f" Fehler beim Verschieben der E-Mail: {response.status_code} - {response.text}")
|
||||||
|
|
||||||
|
# Az eredeti olvasottként jelölési kód eltávolítva, mivel feljebb már megcsináljuk
|
||||||
|
|
||||||
|
|
||||||
|
logging.info("\nVerarbeitung abgeschlossen.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
logging.info("Skript gestartet.")
|
||||||
|
create_log_table_if_not_exists() # Call to create table
|
||||||
|
if not os.path.exists(DOWNLOAD_DIR):
|
||||||
|
logging.error(f"Fehler: Der Download-Ordner existiert nicht: {DOWNLOAD_DIR}")
|
||||||
|
else:
|
||||||
|
main()
|
||||||
|
logging.info("Skript beendet.")
|
||||||
@@ -105,12 +105,12 @@ CLIENT_SECRET = "Rk-8Q~nJ.sZ-xUiNxtEDdzVgoFFosODLVHX~jdrh" # Der "Wert", nicht d
|
|||||||
|
|
||||||
# Das zu überwachende Postfach
|
# Das zu überwachende Postfach
|
||||||
# USER_EMAIL = "Bestellung-Fax-Eingang@aps-hh.de"
|
# USER_EMAIL = "Bestellung-Fax-Eingang@aps-hh.de"
|
||||||
USER_EMAIL = "i.meszely@aps-hh.de"
|
USER_EMAIL = "fax-bestellung@antares-apo.de"
|
||||||
|
|
||||||
# Speicherort für heruntergeladene PDFs
|
# Speicherort für heruntergeladene PDFs
|
||||||
# DOWNLOAD_DIR = r"\\aps-nb090\test"
|
# DOWNLOAD_DIR = r"\\aps-nb090\test"
|
||||||
# DOWNLOAD_DIR = r"C:\Users\YourUsername\Downloads\PDFs"
|
# DOWNLOAD_DIR = r"C:\Users\YourUsername\Downloads\PDFs"
|
||||||
DOWNLOAD_DIR = "C:/Tools/PDF"
|
DOWNLOAD_DIR = r"\\APS-FILE01\Faxe\Austausch"
|
||||||
|
|
||||||
# Name des Ordners, in den verarbeitete E-Mails verschoben werden
|
# Name des Ordners, in den verarbeitete E-Mails verschoben werden
|
||||||
PROCESSED_FOLDER_NAME = "erledigt"
|
PROCESSED_FOLDER_NAME = "erledigt"
|
||||||
|
|||||||
Reference in New Issue
Block a user