# M4A to Text Transcription Project Guide ## Project Overview Create a web application that transcribes M4A audio files (commonly used by iPhones) into text. The app will use React for the frontend and Node.js for the backend, with OpenAI's Whisper for transcription. ## File Structure ``` m4a-transcription-app/ ├── client/ # React frontend │ ├── public/ │ │ └── index.html │ ├── src/ │ │ ├── components/ │ │ │ ├── FileUpload.js │ │ │ └── TranscriptDisplay.js │ │ ├── App.js │ │ └── index.js │ └── package.json ├── server/ # Node.js backend │ ├── src/ │ │ ├── routes/ │ │ │ └── transcription.js │ │ ├── services/ │ │ │ └── transcriptionService.js │ │ └── app.js │ ├── uploads/ # Temporary storage for uploaded files │ └── package.json └── README.md ``` ## Detailed Steps ### 1. Set Up Project Structure ```bash mkdir m4a-transcription-app cd m4a-transcription-app git init echo "node_modules/\n.env\nuploads/\n*.log" > .gitignore ``` ### 2. Set Up Backend (Node.js) ```bash mkdir server cd server npm init -y npm install express cors multer dotenv openai npm install --save-dev nodemon mkdir src uploads ``` Create `src/app.js`: ```javascript // src/app.js require('dotenv').config(); const express = require('express'); const cors = require('cors'); const transcriptionRoutes = require('./routes/transcription'); const app = express(); app.use(cors()); app.use(express.json()); app.use('/api', transcriptionRoutes); const PORT = process.env.PORT || 5000; app.listen(PORT, () => console.log(`Server running on port ${PORT}`)); ``` Create `src/routes/transcription.js`: ```javascript // src/routes/transcription.js const express = require('express'); const multer = require('multer'); const path = require('path'); const { transcribeAudio } = require('../services/transcriptionService'); const router = express.Router(); const upload = multer({ dest: 'uploads/', fileFilter: (req, file, cb) => { if (path.extname(file.originalname).toLowerCase() === '.m4a') { cb(null, true); } else { cb(new Error('Only M4A files are allowed')); } } }); router.post('/transcribe', upload.single('audio'), async (req, res) => { try { if (!req.file) { return res.status(400).json({ error: 'No file uploaded or invalid file type' }); } const { path: filePath } = req.file; const transcription = await transcribeAudio(filePath); res.json({ transcription }); } catch (error) { res.status(500).json({ error: error.message }); } }); module.exports = router; ``` Create `src/services/transcriptionService.js`: ```javascript // src/services/transcriptionService.js const { Configuration, OpenAIApi } = require("openai"); const fs = require('fs'); const configuration = new Configuration({ apiKey: process.env.OPENAI_API_KEY, }); const openai = new OpenAIApi(configuration); async function transcribeAudio(filePath) { try { const resp = await openai.createTranscription( fs.createReadStream(filePath), "whisper-1" ); return resp.data.text; } catch (error) { console.error('Transcription error:', error); throw new Error('Failed to transcribe audio'); } finally { // Clean up the uploaded file fs.unlink(filePath, (err) => { if (err) console.error('Error deleting file:', err); }); } } module.exports = { transcribeAudio }; ``` Create `.env` file: ``` PORT=5000 OPENAI_API_KEY=your_openai_api_key_here ``` Update `package.json` scripts: ```json "scripts": { "start": "node src/app.js", "dev": "nodemon src/app.js" } ``` ### 3. Set Up Frontend (React) ```bash npx create-react-app client cd client npm install axios ``` Replace `src/App.js`: ```jsx // src/App.js import React, { useState } from 'react'; import FileUpload from './components/FileUpload'; import TranscriptDisplay from './components/TranscriptDisplay'; function App() { const [transcript, setTranscript] = useState(''); return (

M4A Transcription App

); } export default App; ``` Create `src/components/FileUpload.js`: ```jsx // src/components/FileUpload.js import React, { useState } from 'react'; import axios from 'axios'; function FileUpload({ setTranscript }) { const [file, setFile] = useState(null); const [loading, setLoading] = useState(false); const handleFileChange = (e) => { setFile(e.target.files[0]); }; const handleSubmit = async (e) => { e.preventDefault(); if (!file) { alert('Please select a file'); return; } const formData = new FormData(); formData.append('audio', file); setLoading(true); try { const response = await axios.post('http://localhost:5000/api/transcribe', formData, { headers: { 'Content-Type': 'multipart/form-data' } }); setTranscript(response.data.transcription); } catch (error) { console.error('Error uploading file:', error); alert('Error transcribing file'); } finally { setLoading(false); } }; return (
); } export default FileUpload; ``` Create `src/components/TranscriptDisplay.js`: ```jsx // src/components/TranscriptDisplay.js import React from 'react'; function TranscriptDisplay({ transcript }) { return (

Transcript

{transcript}
); } export default TranscriptDisplay; ``` ### 4. Running the Application 1. Start the backend: ```bash cd server npm run dev ``` 2. Start the frontend (in a new terminal): ```bash cd client npm start ``` ### 5. Testing 1. Prepare an M4A audio file (you can record one using an iPhone). 2. Open the app in your browser (usually at http://localhost:3000). 3. Upload the M4A file and click "Transcribe". 4. Verify that the transcription appears in the TranscriptDisplay component. ### 6. Next Steps - Improve error handling and user feedback - Add loading indicators and progress updates - Implement file size limits and other security measures - Style the frontend for a better user experience - Consider adding support for other audio formats in the future