# M4A to Text Transcription Project Guide

## Project Overview
Create a web application that transcribes M4A audio files (commonly used by iPhones) into text. The app will use React for the frontend and Node.js for the backend, with OpenAI's Whisper for transcription.

## File Structure
```
m4a-transcription-app/
├── client/                 # React frontend
│   ├── public/
│   │   └── index.html
│   ├── src/
│   │   ├── components/
│   │   │   ├── FileUpload.js
│   │   │   └── TranscriptDisplay.js
│   │   ├── App.js
│   │   └── index.js
│   └── package.json
├── server/                 # Node.js backend
│   ├── src/
│   │   ├── routes/
│   │   │   └── transcription.js
│   │   ├── services/
│   │   │   └── transcriptionService.js
│   │   └── app.js
│   ├── uploads/            # Temporary storage for uploaded files
│   └── package.json
└── README.md
```

## Detailed Steps

### 1. Set Up Project Structure
```bash
mkdir m4a-transcription-app
cd m4a-transcription-app
git init
echo "node_modules/\n.env\nuploads/\n*.log" > .gitignore
```

### 2. Set Up Backend (Node.js)
```bash
mkdir server
cd server
npm init -y
npm install express cors multer dotenv openai
npm install --save-dev nodemon
mkdir src uploads
```

Create `src/app.js`:
```javascript
// src/app.js
require('dotenv').config();
const express = require('express');
const cors = require('cors');
const transcriptionRoutes = require('./routes/transcription');

const app = express();

app.use(cors());
app.use(express.json());

app.use('/api', transcriptionRoutes);

const PORT = process.env.PORT || 5000;
app.listen(PORT, () => console.log(`Server running on port ${PORT}`));
```

Create `src/routes/transcription.js`:
```javascript
// src/routes/transcription.js
const express = require('express');
const multer = require('multer');
const path = require('path');
const { transcribeAudio } = require('../services/transcriptionService');

const router = express.Router();
const upload = multer({ 
  dest: 'uploads/',
  fileFilter: (req, file, cb) => {
    if (path.extname(file.originalname).toLowerCase() === '.m4a') {
      cb(null, true);
    } else {
      cb(new Error('Only M4A files are allowed'));
    }
  }
});

router.post('/transcribe', upload.single('audio'), async (req, res) => {
  try {
    if (!req.file) {
      return res.status(400).json({ error: 'No file uploaded or invalid file type' });
    }
    const { path: filePath } = req.file;
    const transcription = await transcribeAudio(filePath);
    res.json({ transcription });
  } catch (error) {
    res.status(500).json({ error: error.message });
  }
});

module.exports = router;
```

Create `src/services/transcriptionService.js`:
```javascript
// src/services/transcriptionService.js
const { Configuration, OpenAIApi } = require("openai");
const fs = require('fs');

const configuration = new Configuration({
  apiKey: process.env.OPENAI_API_KEY,
});
const openai = new OpenAIApi(configuration);

async function transcribeAudio(filePath) {
  try {
    const resp = await openai.createTranscription(
      fs.createReadStream(filePath),
      "whisper-1"
    );
    return resp.data.text;
  } catch (error) {
    console.error('Transcription error:', error);
    throw new Error('Failed to transcribe audio');
  } finally {
    // Clean up the uploaded file
    fs.unlink(filePath, (err) => {
      if (err) console.error('Error deleting file:', err);
    });
  }
}

module.exports = { transcribeAudio };
```

Create `.env` file:
```
PORT=5000
OPENAI_API_KEY=your_openai_api_key_here
```

Update `package.json` scripts:
```json
"scripts": {
  "start": "node src/app.js",
  "dev": "nodemon src/app.js"
}
```

### 3. Set Up Frontend (React)
```bash
npx create-react-app client
cd client
npm install axios
```

Replace `src/App.js`:
```jsx
// src/App.js
import React, { useState } from 'react';
import FileUpload from './components/FileUpload';
import TranscriptDisplay from './components/TranscriptDisplay';

function App() {
  const [transcript, setTranscript] = useState('');

  return (
    <div className="App">
      <h1>M4A Transcription App</h1>
      <FileUpload setTranscript={setTranscript} />
      <TranscriptDisplay transcript={transcript} />
    </div>
  );
}

export default App;
```

Create `src/components/FileUpload.js`:
```jsx
// src/components/FileUpload.js
import React, { useState } from 'react';
import axios from 'axios';

function FileUpload({ setTranscript }) {
  const [file, setFile] = useState(null);
  const [loading, setLoading] = useState(false);

  const handleFileChange = (e) => {
    setFile(e.target.files[0]);
  };

  const handleSubmit = async (e) => {
    e.preventDefault();
    if (!file) {
      alert('Please select a file');
      return;
    }
    
    const formData = new FormData();
    formData.append('audio', file);

    setLoading(true);
    try {
      const response = await axios.post('http://localhost:5000/api/transcribe', formData, {
        headers: { 'Content-Type': 'multipart/form-data' }
      });
      setTranscript(response.data.transcription);
    } catch (error) {
      console.error('Error uploading file:', error);
      alert('Error transcribing file');
    } finally {
      setLoading(false);
    }
  };

  return (
    <form onSubmit={handleSubmit}>
      <input type="file" accept=".m4a" onChange={handleFileChange} />
      <button type="submit" disabled={loading}>
        {loading ? 'Transcribing...' : 'Transcribe'}
      </button>
    </form>
  );
}

export default FileUpload;
```

Create `src/components/TranscriptDisplay.js`:
```jsx
// src/components/TranscriptDisplay.js
import React from 'react';

function TranscriptDisplay({ transcript }) {
  return (
    <div>
      <h2>Transcript</h2>
      <pre>{transcript}</pre>
    </div>
  );
}

export default TranscriptDisplay;
```

### 4. Running the Application
1. Start the backend:
   ```bash
   cd server
   npm run dev
   ```

2. Start the frontend (in a new terminal):
   ```bash
   cd client
   npm start
   ```

### 5. Testing
1. Prepare an M4A audio file (you can record one using an iPhone).
2. Open the app in your browser (usually at http://localhost:3000).
3. Upload the M4A file and click "Transcribe".
4. Verify that the transcription appears in the TranscriptDisplay component.

### 6. Next Steps
- Improve error handling and user feedback
- Add loading indicators and progress updates
- Implement file size limits and other security measures
- Style the frontend for a better user experience
- Consider adding support for other audio formats in the future