Initial commit

93256347 · blackirfan · 93256347 · 93256347 · 93256347 · 93256347
Commit 93256347 authored Jan 18, 2026 by blackirfan
33 changed files
--- a/README.md
+++ b/README.md
--- a/__pycache__/main.cpython-310.pyc
+++ b/__pycache__/main.cpython-310.pyc
--- a/api/__pycache__/routes.cpython-310.pyc
+++ b/api/__pycache__/routes.cpython-310.pyc
--- a/api/routes.py
+++ b/api/routes.py
+from fastapi import APIRouter
+from app.services.pipeline import process_call
+from app.models.schemas import CallRequest
+
+router = APIRouter()
+
+@router.post("/process-call")
+def analyze_call(request: CallRequest):
+    return process_call(request.audio_path)
+
--- a/config.py
+++ b/config.py
--- a/create_tables.py
+++ b/create_tables.py
+from db.database import engine
+from db.models import Base
+
+Base.metadata.create_all(bind=engine)
+print("PostgreSQL tables created successfully")
--- a/db/__pycache__/database.cpython-310.pyc
+++ b/db/__pycache__/database.cpython-310.pyc
--- a/db/__pycache__/models.cpython-310.pyc
+++ b/db/__pycache__/models.cpython-310.pyc
--- a/db/database.py
+++ b/db/database.py
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+
+DATABASE_URL = "postgresql://postgres:123321@localhost:5432/voice_ai"
+
+engine = create_engine(
+    DATABASE_URL,
+    pool_pre_ping=True
+)
+
+SessionLocal = sessionmaker(
+    autocommit=False,
+    autoflush=False,
+    bind=engine
+)
--- a/db/models.py
+++ b/db/models.py
+from sqlalchemy import Column, Integer, String, Text, DateTime
+from sqlalchemy.sql import func
+from sqlalchemy.ext.declarative import declarative_base
+from datetime import datetime
+
+Base = declarative_base()
+
+class CallRecord(Base):
+    __tablename__ = "call_records"
+
+    id = Column(Integer, primary_key=True, index=True)
+    original_audio_path = Column(String, nullable=False)  # keep path required
+    processed_audio_path = Column(String, nullable=True)
+    original_file_name = Column(String, nullable=True)
+    file_title = Column(String, nullable=True)
+    ai_title = Column(String, nullable=True)
+    transcription_text = Column(Text, nullable=True)
+    language = Column(String, nullable=True)
+    sentiment = Column(String, nullable=True)
+    category = Column(String, nullable=True)
+    created_at = Column(DateTime(timezone=True), server_default=func.now())
--- a/main.py
+++ b/main.py
+from fastapi import FastAPI
+from app.api.routes import router
+
+app = FastAPI()
+
+app.include_router(router)
--- a/models/__pycache__/schemas.cpython-310.pyc
+++ b/models/__pycache__/schemas.cpython-310.pyc
--- a/models/schemas.py
+++ b/models/schemas.py
+from pydantic import BaseModel
+
+class CallRequest(BaseModel):
+    audio_path: str
\ No newline at end of file
--- a/requirements.txt
+++ b/requirements.txt
+fastapi
+uvicorn
+openai-whisper
+torch
+transformers
+librosa
+numpy
+scikit-learn
+pydantic
+sqlalchemy
+psycopg2-binary
\ No newline at end of file
--- a/services/__pycache__/audio_loader.cpython-310.pyc
+++ b/services/__pycache__/audio_loader.cpython-310.pyc
--- a/services/__pycache__/emotion_analysis.cpython-310.pyc
+++ b/services/__pycache__/emotion_analysis.cpython-310.pyc
--- a/services/__pycache__/pipeline.cpython-310.pyc
+++ b/services/__pycache__/pipeline.cpython-310.pyc
--- a/services/__pycache__/text_analysis.cpython-310.pyc
+++ b/services/__pycache__/text_analysis.cpython-310.pyc
--- a/services/__pycache__/transcription.cpython-310.pyc
+++ b/services/__pycache__/transcription.cpython-310.pyc
--- a/services/audio_loader.py
+++ b/services/audio_loader.py
+import subprocess
+import os
+import uuid
+
+def load_audio(input_path: str) -> str:
+    """
+    Converts any audio format to WAV (16kHz, mono) using FFmpeg.
+    Returns path to converted WAV file.
+    """
+
+    if not os.path.exists(input_path):
+        raise FileNotFoundError("Audio file not found")
+
+    output_dir = "app/storage/processed"
+    os.makedirs(output_dir, exist_ok=True)
+
+    output_path = os.path.join(
+        output_dir,
+        f"{uuid.uuid4().hex}.wav"
+    )
+
+    command = [
+        "ffmpeg",
+        "-y",
+        "-i", input_path,
+        "-ac", "1",        # mono
+        "-ar", "16000",    # 16kHz
+        output_path
+    ]
+
+    subprocess.run(
+        command,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+        check=True
+    )
+
+    return output_path
--- a/services/emotion_analysis.py
+++ b/services/emotion_analysis.py
+import librosa
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+
+def extract_features(audio_path):
+    y, sr = librosa.load(audio_path, sr=16000)
+    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
+    return np.mean(mfcc.T, axis=0)
+
+def detect_emotion(audio_path):
+    features = extract_features(audio_path)
+
+    # MVP logic (rule-based threshold)
+    energy = np.mean(np.abs(features))
+
+    if energy > 50:
+        emotion = "Angry"
+    elif energy > 20:
+        emotion = "Frustrated"
+    else:
+        emotion = "Calm"
+
+    return {
+        "emotion": emotion,
+        "energy_score": float(energy)
+    }
--- a/services/pipeline.py
+++ b/services/pipeline.py
+from app.services.audio_loader import load_audio
+from app.services.transcription import transcribe
+from app.db.database import SessionLocal
+from app.db.models import CallRecord
+
+def process_call(audio_path: str):
+    db = SessionLocal()
+
+    try:
+        wav_path = load_audio(audio_path)
+        result = transcribe(wav_path)
+
+        record = CallRecord(
+            original_audio_path=audio_path,
+            processed_audio_path=wav_path,
+            transcription_text=result["text"],
+            language=result["language"]
+        )
+
+        db.add(record)
+        db.commit()
+        db.refresh(record)
+
+        return {
+            "id": record.id,
+            "text": record.transcription_text,
+            "language": record.language
+        }
+
+    finally:
+        db.close()
--- a/services/text_analysis.py
+++ b/services/text_analysis.py
+from transformers import pipeline
+
+sentiment_model = pipeline(
+    "sentiment-analysis",
+    model="nlptown/bert-base-multilingual-uncased-sentiment"
+)
+
+def analyze_text(text: str):
+    sentiment = sentiment_model(text[:512])[0]
+
+    summary = text[:300] + "..."  # replace with LLM later
+
+    return {
+        "summary": summary,
+        "sentiment": sentiment["label"],
+        "sentiment_score": sentiment["score"]
+    }
--- a/services/transcription.py
+++ b/services/transcription.py
+import whisper
+
+model = whisper.load_model("base")
+
+def transcribe(wav_path: str):
+    result = model.transcribe(wav_path)
+    return {
+        "text": result["text"],
+        "language": result["language"]
+    }
--- a/storage/calls/call_001.wav
+++ b/storage/calls/call_001.wav
--- a/storage/calls/call_002.mp3
+++ b/storage/calls/call_002.mp3
--- a/storage/processed/3c2b7e946800452984e6591786edabdb.wav
+++ b/storage/processed/3c2b7e946800452984e6591786edabdb.wav
--- a/storage/processed/4723a0d132204c02af3a42023665b7ac.wav
+++ b/storage/processed/4723a0d132204c02af3a42023665b7ac.wav
--- a/storage/processed/5a917b68618e4dcca97c10f212f70df3.wav
+++ b/storage/processed/5a917b68618e4dcca97c10f212f70df3.wav
--- a/storage/processed/5bde1f450912473ca92fac11162181c5.wav
+++ b/storage/processed/5bde1f450912473ca92fac11162181c5.wav
--- a/storage/processed/79f2c94711b644aba2bf8fc54d78c8b1.wav
+++ b/storage/processed/79f2c94711b644aba2bf8fc54d78c8b1.wav
--- a/storage/processed/f4c5cd8356c8462886440712f818fbea.wav
+++ b/storage/processed/f4c5cd8356c8462886440712f818fbea.wav
--- a/utils/logger.py
+++ b/utils/logger.py