# M4A to Text Transcription Project Guide ## Project Overview Create a web application that transcribes M4A audio files (commonly used by iPhones) into text. The app will use React for the frontend and Node.js for the backend, with OpenAI's Whisper for transcription. ## File Structure ``` m4a-transcription-app/ ├── client/ # React frontend │ ├── public/ │ │ └── index.html │ ├── src/ │ │ ├── components/ │ │ │ ├── FileUpload.js │ │ │ └── TranscriptDisplay.js │ │ ├── App.js │ │ └── index.js │ └── package.json ├── server/ # Node.js backend │ ├── src/ │ │ ├── routes/ │ │ │ └── transcription.js │ │ ├── services/ │ │ │ └── transcriptionService.js │ │ └── app.js │ ├── uploads/ # Temporary storage for uploaded files │ └── package.json └── README.md ``` ## Detailed Steps ### 1. Set Up Project Structure ```bash mkdir m4a-transcription-app cd m4a-transcription-app git init echo "node_modules/\n.env\nuploads/\n*.log" > .gitignore ``` ### 2. Set Up Backend (Node.js) ```bash mkdir server cd server npm init -y npm install express cors multer dotenv openai npm install --save-dev nodemon mkdir src uploads ``` Create `src/app.js`: ```javascript // src/app.js require('dotenv').config(); const express = require('express'); const cors = require('cors'); const transcriptionRoutes = require('./routes/transcription'); const app = express(); app.use(cors()); app.use(express.json()); app.use('/api', transcriptionRoutes); const PORT = process.env.PORT || 5000; app.listen(PORT, () => console.log(`Server running on port ${PORT}`)); ``` Create `src/routes/transcription.js`: ```javascript // src/routes/transcription.js const express = require('express'); const multer = require('multer'); const path = require('path'); const { transcribeAudio } = require('../services/transcriptionService'); const router = express.Router(); const upload = multer({ dest: 'uploads/', fileFilter: (req, file, cb) => { if (path.extname(file.originalname).toLowerCase() === '.m4a') { cb(null, true); } else { cb(new Error('Only M4A files are allowed')); } } }); router.post('/transcribe', upload.single('audio'), async (req, res) => { try { if (!req.file) { return res.status(400).json({ error: 'No file uploaded or invalid file type' }); } const { path: filePath } = req.file; const transcription = await transcribeAudio(filePath); res.json({ transcription }); } catch (error) { res.status(500).json({ error: error.message }); } }); module.exports = router; ``` Create `src/services/transcriptionService.js`: ```javascript // src/services/transcriptionService.js const { Configuration, OpenAIApi } = require("openai"); const fs = require('fs'); const configuration = new Configuration({ apiKey: process.env.OPENAI_API_KEY, }); const openai = new OpenAIApi(configuration); async function transcribeAudio(filePath) { try { const resp = await openai.createTranscription( fs.createReadStream(filePath), "whisper-1" ); return resp.data.text; } catch (error) { console.error('Transcription error:', error); throw new Error('Failed to transcribe audio'); } finally { // Clean up the uploaded file fs.unlink(filePath, (err) => { if (err) console.error('Error deleting file:', err); }); } } module.exports = { transcribeAudio }; ``` Create `.env` file: ``` PORT=5000 OPENAI_API_KEY=your_openai_api_key_here ``` Update `package.json` scripts: ```json "scripts": { "start": "node src/app.js", "dev": "nodemon src/app.js" } ``` ### 3. Set Up Frontend (React) ```bash npx create-react-app client cd client npm install axios ``` Replace `src/App.js`: ```jsx // src/App.js import React, { useState } from 'react'; import FileUpload from './components/FileUpload'; import TranscriptDisplay from './components/TranscriptDisplay'; function App() { const [transcript, setTranscript] = useState(''); return (
{transcript}