Directory structure: └── koellabs-koellabs.com/ ├── README.md ├── ACKNOWLEDGEMENTS.md ├── components.json ├── CONTRIBUTING.md ├── DEVELOPMENT.md ├── drizzle.config.ts ├── jsconfig.json ├── LICENSE ├── middleware.ts ├── next-env.d.ts ├── next.config.mjs ├── package.json ├── postcss.config.mjs ├── tsconfig.json ├── .editorconfig ├── .env.example ├── .eslintrc.json ├── .gitleaksignore ├── .prettierignore ├── .prettierrc ├── app/ │ ├── globals.css │ ├── layout.js │ ├── not-found.js │ ├── page.js │ ├── about/ │ │ └── page.tsx │ ├── api/ │ │ ├── auth/ │ │ │ ├── [...all]/ │ │ │ │ └── route.ts │ │ │ └── update-user/ │ │ │ └── route.ts │ │ └── user/ │ │ └── preferences/ │ │ └── route.ts │ ├── blog/ │ │ ├── bloglist.tsx │ │ ├── page.tsx │ │ ├── posts.ts │ │ ├── [slug]/ │ │ │ └── page.tsx │ │ ├── blog-components/ │ │ │ ├── body.tsx │ │ │ ├── code.tsx │ │ │ ├── decorations.tsx │ │ │ ├── heading.tsx │ │ │ ├── hero-image.tsx │ │ │ ├── image.tsx │ │ │ ├── subheading.tsx │ │ │ └── utils.ts │ │ └── posts/ │ │ ├── building-open-source-leaderboards.tsx │ │ ├── dialect-sensitivity.tsx │ │ ├── feature-extraction-deep-dive.tsx │ │ ├── new-look-for-koel-labs.tsx │ │ ├── open-source-announcement.tsx │ │ ├── phonemic-transcription-metrics.tsx │ │ └── transformer-deep-dive.tsx │ ├── contact/ │ │ └── page.tsx │ ├── dashboard/ │ │ ├── customTimeline.tsx │ │ ├── layout.js │ │ ├── microphone.tsx │ │ ├── page.jsx │ │ ├── practiceMode.tsx │ │ ├── videoPlayer.tsx │ │ └── settings/ │ │ └── page.jsx │ ├── onboarding/ │ │ ├── layout.js │ │ ├── page.jsx │ │ ├── getting-to-know-you/ │ │ │ └── page.jsx │ │ ├── language-goals/ │ │ │ └── page.jsx │ │ ├── speech-assessment/ │ │ │ └── page.jsx │ │ └── welcome/ │ │ └── page.jsx │ ├── previews/ │ │ ├── page.tsx │ │ └── sections-previews/ │ │ ├── leaderboard.tsx │ │ ├── previews-hero.tsx │ │ └── previews-models.tsx │ ├── pricing/ │ │ └── page.tsx │ ├── research/ │ │ ├── page.tsx │ │ └── [slug]/ │ │ └── page.tsx │ ├── reset-password/ │ │ └── page.js │ ├── sign-in/ │ │ └── page.tsx │ └── sign-up/ │ └── page.tsx ├── components/ │ ├── comp-331.jsx │ ├── comp-433.jsx │ ├── magic-highlighter.tsx │ ├── react-scan.tsx │ ├── actions/ │ │ └── youtube-video-validate.ts │ ├── forms/ │ │ ├── challenging-words-selector.jsx │ │ ├── city-selector.jsx │ │ ├── country-selector.jsx │ │ ├── experience-level-selector.jsx │ │ ├── language-selector.jsx │ │ └── target-language-selector.jsx │ ├── sections/ │ │ ├── 1 - Hero.jsx │ │ ├── 2 - Bento.jsx │ │ ├── 3 - CTA.tsx │ │ ├── 4 - Footer.tsx │ │ ├── bento.tsx │ │ ├── cta.tsx │ │ ├── footer.tsx │ │ ├── hero.tsx │ │ ├── models.tsx │ │ ├── previews.tsx │ │ └── research.tsx │ ├── sidebar/ │ │ ├── app-sidebar.jsx │ │ ├── beta-card.jsx │ │ ├── nav-main.jsx │ │ ├── nav-secondary.jsx │ │ ├── nav-user.jsx │ │ ├── nav-videos.jsx │ │ └── streak-card.jsx │ └── ui/ │ ├── audio-playback.tsx │ ├── calendar-rac.tsx │ ├── code-block.tsx │ ├── dashboard-stat-chart.jsx │ ├── dashboard.jsx │ ├── datefield-rac.tsx │ ├── header.jsx │ ├── koellabs-onboarding.tsx │ ├── sidebar.jsx │ ├── toast.jsx │ ├── toaster.jsx │ ├── base/ │ │ ├── avatar.jsx │ │ ├── badge.jsx │ │ ├── button.tsx │ │ ├── card.tsx │ │ ├── carousel.tsx │ │ ├── chart.tsx │ │ ├── checkbox.tsx │ │ ├── collapsible.jsx │ │ ├── command.tsx │ │ ├── context-menu.jsx │ │ ├── date.tsx │ │ ├── dialog.tsx │ │ ├── drawer.tsx │ │ ├── dropdown-menu.jsx │ │ ├── info-card.jsx │ │ ├── input.tsx │ │ ├── koel-bird-rounded.jsx │ │ ├── koel-labs-bird.jsx │ │ ├── label.tsx │ │ ├── model-svg.tsx │ │ ├── popover.jsx │ │ ├── progress.jsx │ │ ├── radio-group.jsx │ │ ├── scroll-area.jsx │ │ ├── select.tsx │ │ ├── separator.jsx │ │ ├── sheet.jsx │ │ ├── skeleton.jsx │ │ ├── slider.jsx │ │ ├── tabs.jsx │ │ ├── textarea.tsx │ │ ├── theme-provider.tsx │ │ └── tooltip.tsx │ ├── dashboard/ │ │ ├── ClipsList.tsx │ │ ├── ImportVideos.tsx │ │ ├── SearchCenter.tsx │ │ └── outdated/ │ │ ├── RecommendedClips.tsx │ │ └── RevisitClips.tsx │ ├── magicui/ │ │ ├── animated-beam.jsx │ │ ├── bento-grid.jsx │ │ └── hero-video-dialog.tsx │ └── onboarding/ │ ├── gradient-logo.jsx │ ├── grid-background.jsx │ └── onboarding-provider.jsx ├── db/ │ └── schema.ts ├── hooks/ │ ├── use-character-limit.js │ ├── use-image-upload.js │ ├── use-mobile.jsx │ ├── use-sidebar.jsx │ ├── use-toast.js │ └── use-user-videos.ts ├── lib/ │ ├── auth-client.ts │ ├── auth.ts │ ├── styles.js │ ├── use-character-limit.tsx │ ├── videos.ts │ └── hooks/ │ └── use-scroll.js ├── migrations/ │ ├── 0000_married_dark_beast.sql │ ├── 0001_zippy_snowbird.sql │ ├── 0002_orange_klaw.sql │ ├── 0003_bouncy_betty_brant.sql │ ├── 0004_late_patriot.sql │ ├── 0005_sticky_phil_sheldon.sql │ └── meta/ │ ├── 0000_snapshot.json │ ├── 0001_snapshot.json │ ├── 0002_snapshot.json │ ├── 0003_snapshot.json │ ├── 0004_snapshot.json │ ├── 0005_snapshot.json │ └── _journal.json ├── pages/ │ └── api/ │ └── submitGoogleFormWithReCaptcha.ts ├── public/ │ ├── news-sitemap.xml │ ├── sitemap.xml │ ├── WavWorklet.js │ ├── images/ │ │ └── models.webp │ ├── videos/ │ │ ├── jumanji-next-level.vtt │ │ ├── the-office-michael-pyramid-scheme.captions.vtt │ │ ├── the-office-michael-pyramid-scheme.VTT │ │ └── the-office-michael-secret-keeper-full-res.VTT │ └── .well-known/ │ └── microsoft-identity-association.json ├── scripts/ │ ├── docker-compose.dev.yaml │ ├── docker-run-dev-cmd.sh │ ├── docker-run-dev.sh │ ├── docker-stop-dev.sh │ └── Dockerfile.dev ├── styles/ │ └── animations.css └── .github/ ├── pull_request_template.md ├── ISSUE_TEMPLATE/ │ ├── bug_report.md │ └── feature_request.md └── workflows/ ├── azure-static-web-apps-salmon-sky-0a6b6c10f.yml ├── gitleaks.yml └── zizmor.yml ================================================ FILE: README.md ================================================ Koel Lab's Logomark [![Mozilla Builders](https://img.shields.io/badge/Mozilla-000000.svg?style=for-the-badge&logo=Mozilla&logoColor=white)](https://future.mozilla.org/builders/) ![Patreon](https://img.shields.io/badge/Patreon-F96854?style=for-the-badge&logo=patreon&logoColor=white) ![PayPal](https://img.shields.io/badge/PayPal-00457C?style=for-the-badge&logo=paypal&logoColor=white) # Koel Labs - Web Application ![Zizmor](https://github.com/KoelLabs/webapp/actions/workflows/zizmor.yml/badge.svg) ![Gitleaks Secret Scanning](https://github.com/KoelLabs/webapp/actions/workflows/gitleaks.yml/badge.svg) [![Automatic Deploys](https://github.com/KoelLabs/webapp/actions/workflows/azure-static-web-apps-salmon-sky-0a6b6c10f.yml/badge.svg)](https://koellabs.com) Contains the website for Koel Labs and the web interface for the pronunciation learning application. Read about all our repositories [here](https://github.com/KoelLabs). ## Setup See [DEVELOPMENT.md](DEVELOPMENT.md) for how to setup your local development environment. If you just want to run the application locally not for development purposes, follow the instructions below. 0. `git clone --recurse-submodules https://github.com/KoelLabs/webapp.git` 1. Install [Docker and Docker Compose](https://www.docker.com/get-started/) 2. Duplicate the `.env.example` file and rename it to `.env.local` with `cp .env.example .env.local`. 3. Duplicate the `server/.env.example` file and rename it to `server/.env` with `cp server/.env.example server/.env`. 4. Run the application `. ./scripts/docker-run-dev.sh` ## Contributing Check out [CONTRIBUTING.md](CONTRIBUTING.md) for specific guidelines on contributing to this repository. We are grateful for all of [our contributors](ACKNOWLEDGEMENTS.md). If you contribute to this repository and wish to be acknowledged, please add your name to the list in your pull request. ## License Application code in this repository is open-sourced under the [FSL-1.1-Apache-2.0](https://fsl.software/). We retain all rights to the Koel Labs brand, logos, blog posts, and website content. ================================================ FILE: ACKNOWLEDGEMENTS.md ================================================ # Individual Contributors If you wish to be acknowledged for your contributions, please list your name with a short description of your contribution(s) below. For example: - Jane Smith: Added the `foo` and `bar` ops. The Koel Labs Web Application was developed with contributions from the following individuals: - [Ruslan Mukhamedvaleev](https://www.ruslan.in/): Koel Labs co-founder and CPO — lead web-developer and designer. - [Alexander Metzger](https://www.linkedin.com/in/alexander-le-metzger): Koel Labs co-founder and CEO — lead backend+infrastructure+devops+ML engineer. # Third-Party Software | Name | License period | License type | Link | Author | | :---------------------------------- | :------------- | :----------- | :----------------------------------------------------------------------------- | :-------------------------------------------------------------------- | | @custom-react-hooks/use-media-query | perpetual | MIT | git+https://github.com/djkepa/custom-react-hooks.git | Bane Grozdanovic | | @headlessui/react | perpetual | MIT | git+https://github.com/tailwindlabs/headlessui.git | n/a | | @radix-ui/react-avatar | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-checkbox | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-collapsible | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-context-menu | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-dialog | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-dropdown-menu | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-icons | perpetual | MIT | https://registry.npmjs.org/@radix-ui/react-icons/-/react-icons-1.3.2.tgz | n/a | | @radix-ui/react-label | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-popover | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-progress | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-select | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-separator | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-slider | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-slot | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-tabs | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-toast | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @radix-ui/react-tooltip | perpetual | MIT | git+https://github.com/radix-ui/primitives.git | n/a | | @vidstack/react | perpetual | MIT | https://github.com/vidstack/player.git | n/a | | class-variance-authority | perpetual | Apache-2.0 | git+https://github.com/joe-bell/cva.git | Joe Bell (https://joebell.co.uk) | | clsx | perpetual | MIT | git+https://github.com/lukeed/clsx.git | Luke Edwards luke.edwards05@gmail.com https://lukeed.com | | drizzle-orm | perpetual | Apache-2.0 | git+https://github.com/drizzle-team/drizzle-orm.git | Drizzle Team | | embla-carousel-react | perpetual | MIT | git+https://github.com/davidjerleke/embla-carousel | David Jerleke | | firebase | perpetual | Apache-2.0 | git+https://github.com/firebase/firebase-js-sdk.git | Firebase (https://firebase.google.com/) | | firebase-admin | perpetual | Apache-2.0 | git+https://github.com/firebase/firebase-admin-node.git | Firebase (https://firebase.google.com/) | | formidable | perpetual | MIT | git+https://github.com/node-formidable/formidable.git | n/a | | framer-motion | perpetual | MIT | git+https://github.com/motiondivision/motion.git | Framer | | lucide-react | perpetual | ISC | git+https://github.com/lucide-icons/lucide.git | Eric Fennis | | media-chrome | perpetual | MIT | git+https://github.com/muxinc/media-chrome.git | @muxinc | | next | perpetual | MIT | git+https://github.com/vercel/next.js.git | n/a | | next-themes | perpetual | MIT | git+https://github.com/pacocoursey/next-themes.git | n/a | | pg | perpetual | MIT | git://github.com/brianc/node-postgres.git | Brian Carlson | | postcss | perpetual | MIT | git+https://github.com/postcss/postcss.git | Andrey Sitnik | | react | perpetual | MIT | git+https://github.com/facebook/react.git | n/a | | react-dom | perpetual | MIT | git+https://github.com/facebook/react.git | n/a | | react-google-recaptcha-v3 | perpetual | MIT | git+https://github.com/t49tran/react-google-recaptcha-v3.git | Duong Tran | | recharts | perpetual | MIT | git+https://github.com/recharts/recharts.git | recharts group | | tailwind-merge | perpetual | MIT | git+https://github.com/dcastil/tailwind-merge.git | Dany Castillo | | tailwind-scrollbar-hide | perpetual | MIT | git+https://github.com/reslear/tailwind-scrollbar-hide.git | reslear | | tailwindcss | perpetual | MIT | git+https://github.com/tailwindlabs/tailwindcss.git | n/a | | tailwindcss-animate | perpetual | MIT | https://registry.npmjs.org/tailwindcss-animate/-/tailwindcss-animate-1.0.7.tgz | Jamie Kyle | | vaul | perpetual | MIT | git+https://github.com/emilkowalski/vaul.git | Emil Kowalski | | zod | perpetual | MIT | git+https://github.com/colinhacks/zod.git | Colin McDonnell | | @types/node | perpetual | MIT | https://github.com/DefinitelyTyped/DefinitelyTyped.git | n/a | | @types/react | perpetual | MIT | https://github.com/DefinitelyTyped/DefinitelyTyped.git | n/a | | css-minimizer-webpack-plugin | perpetual | MIT | git+https://github.com/webpack-contrib/css-minimizer-webpack-plugin.git | Loann Neveu | | dotenv | perpetual | BSD-2-Clause | git://github.com/motdotla/dotenv.git | n/a | | drizzle-kit | perpetual | MIT | git+https://github.com/drizzle-team/drizzle-orm.git | Drizzle Team | | eslint | perpetual | MIT | git+https://github.com/eslint/eslint.git | Nicholas C. Zakas | | eslint-config-next | perpetual | MIT | git+https://github.com/vercel/next.js.git | n/a | | eslint-config-prettier | perpetual | MIT | git+https://github.com/prettier/eslint-config-prettier.git | Simon Lydell | | fixpack | perpetual | MIT | git://github.com/henrikjoreteg/fixpack.git | Henrik Joreteg | | prettier | perpetual | MIT | git+https://github.com/prettier/prettier.git | James Long | ================================================ FILE: components.json ================================================ { "$schema": "https://ui.shadcn.com/schema.json", "style": "default", "rsc": true, "tsx": true, "tailwind": { "config": "tailwind.config.js", "css": "app/globals.css", "baseColor": "neutral", "cssVariables": true, "prefix": "" }, "aliases": { "components": "@/components", "utils": "@/lib/utils", "ui": "@/components/ui", "lib": "@/lib", "hooks": "@/hooks", "magicui": "@/components/magicui" } } ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to Koel Labs - Web Application 👍🎉 First off, thanks for taking the time to contribute! 🎉👍 If you have great ideas for improving UI/UX, conveying pronunciation feedback more clearly, or making the application more accessible (including translations), this is the repo for you. Checkout our [general contributing guidelines here](https://github.com/KoelLabs/.github/blob/main/CONTRIBUTING.md). ## Where to Start 1. Read the [README.md](README.md) file to understand the purpose of this repository. 2. Read the [DEVELOPMENT.md](DEVELOPMENT.md) file to understand how to set up your local development environment. Important design decisions will also be documented here. 3. Read the [Code of Conduct](https://github.com/KoelLabs/.github/blob/main/CONTRIBUTING.md#code-of-conduct) to understand expectations while contributing to this project and take a look at the [FAQ](https://github.com/KoelLabs/.github/blob/main/CONTRIBUTING.md#faq) if you have any questions. 4. Read through the [issues](https://github.com/KoelLabs/webapp/issues) to understand the current development priorities. 5. All contributions start with an issue to discuss the change. See [how to file a bug report](https://github.com/KoelLabs/.github/blob/main/CONTRIBUTING.md#reporting-bugs), [suggest a new feature](https://github.com/KoelLabs/.github/blob/main/CONTRIBUTING.md#suggesting-enhancements), or [make your first code contribution](https://github.com/KoelLabs/.github/blob/main/CONTRIBUTING.md#your-first-code-contribution). - Look for the [issues](https://github.com/KoelLabs/webapp/issues) tagged `good first issue` and `help wanted` for things to work on. - Make sure to check the [existing issues](https://github.com/KoelLabs/webapp/issues) before creating a new one. 6. Maintainers with write access to the repository will assign willing contributors to issues they request. Once assigned, fork the repository and create a branch for your work. 7. When you are ready, submit a pull request to the `main` branch. The [pull request template](.github/pull_request_template.md) contains a checklist to go through. ## Point of Contact If you have any questions, feel free to reach out to alex@koellabs.com or ruslan@koellabs.com or open an issue. Security vulnerabilities should be reported to info@koellabs.com. ## Legal Boilerplate By making contributions to the Koel Labs project, you agree to retain all rights, title and interest in and to your contributions and confirm that Koel Labs can use, modify, copy, and redistribute said contributions, under its choice of terms. ================================================ FILE: DEVELOPMENT.md ================================================ # Development ## Setup ### Run with Docker (Recommended) 0. `git clone --recurse-submodules https://github.com/KoelLabs/webapp.git` 1. Install Docker and Docker Compose - [Docker Desktop for Mac](https://docs.docker.com/docker-for-mac/install/) or `brew install --cask docker` with [Homebrew](https://brew.sh/) - If it repeatedly complains about the daemon not running, make sure Docker Desktop is running and add `export DOCKER_HOST=unix:///Users/$USER/Library/Containers/com.docker.docker/Data/docker.raw.sock` to your shell profile (e.g. `~/.zshrc`) - [Docker Desktop for Windows](https://docs.docker.com/docker-for-windows/install/) or `choco install docker-desktop` with [Chocolatey](https://chocolatey.org/) - [Docker Engine for Linux](https://docs.docker.com/engine/install/) or `sudo apt install docker.io` with APT on Ubuntu 2. Duplicate the `.env.example` file and rename it to `.env.local` with `cp .env.example .env.local`. 3. Duplicate the `server/.env.example` file and rename it to `server/.env` with `cp server/.env.example server/.env`. - You can find your `HF_TOKEN` on your [Settings Page](https://huggingface.co/settings/tokens). It just needs read access to `gated repos`. 4. Run the application - `. ./scripts/docker-run-dev.sh` to start the development server (or `npm run docker` if you have Node.js installed) - `ctrl+c` to stop the server or `. ./scripts/docker-stop-dev.sh` if you've closed the terminal Run commands inside the docker container with `. scripts/docker-run-dev-cmd.sh `, e.g., `. scripts/docker-run-dev-cmd.sh npm install react`. The docker container will also automatically `npm ci` from the package-lock.json and apply migrations when it starts. ### Run directly on your machine 0. `git clone https://github.com/KoelLabs/webapp.git` 1. Install Node.js - [Node.js for Mac](https://nodejs.org/en/download/) or `brew install node` with [Homebrew](https://brew.sh/) - [Node.js for Windows](https://nodejs.org/en/download/) or `choco install nodejs` with [Chocolatey](https://chocolatey.org/) - [Node.js for Linux](https://nodejs.org/en/download/) or `sudo apt install nodejs` with APT on Ubuntu 2. Install PostgreSQL - [PostgreSQL for Mac](https://www.postgresql.org/download/macosx/) or `brew install postgresql` with [Homebrew](https://brew.sh/) - [PostgreSQL for Windows](https://www.postgresql.org/download/windows/) or `choco install postgresql` with [Chocolatey](https://chocolatey.org/) - [PostgreSQL for Linux](https://www.postgresql.org/download/linux/) or `sudo apt install postgresql` with APT on Ubuntu 3. Make sure PostgreSQL is running - `brew services start postgresql` with [Homebrew](https://brew.sh/) - `sudo service postgresql start` with APT on Ubuntu 4. Create a new database named `postgres` with a user named `postgres` with the password `postgres` 5. Install dependencies - `npm install` or `npm ci` for a clean install from the package-lock.json 6. Duplicate the `.env.example` file and rename it to `.env.local` with `cp .env.example .env.local`. 7. [optional] Follow [the instructions](https://github.com/KoelLabs/server) to set up the inference server if you want to run the speech models. 8. Run the application - `npm run dev` to start the development server - `ctrl+c` to stop it ### Formatting, Linting, Automated Tests and Secret Scanning All will run automatically via GitHub Actions on every push to the repository. You can also run them locally: - `npm run format` or `. scripts/docker-run-dev-cmd.sh npm run format` to format the code with Prettier, ESLint and Fixpack - `npm run lint` or `. scripts/docker-run-dev-cmd.sh npm run lint` to lint the code with Next.js ESLint and Prettier - `npm run test` or `. scripts/docker-run-dev-cmd.sh npm run test` to run the automated tests with Node Test Runner - `npm run scan` or `. scripts/docker-run-dev-cmd.sh npm run scan` to scan the code for secrets with GitLeaks and action security vulnerabilities with Zizmor Formatting can be set to happen automatically on save in your editor. For VS Code, install the [Prettier](https://marketplace.visualstudio.com/items?itemName=esbenp.prettier-vscode) extension and add `"editor.formatOnSave": true` to your settings. ### Migrations When you have made changes to the schema in `db/schema.sql`, you will need to make new migration files with `npm run makemigrations` or `. scripts/docker-run-dev-cmd.sh npm run makemigrations`. Everyone will need to apply them with `npm run migrate` to update their local database (applied automatically if running with docker-compose). The production database will be updated automatically when the migration files are deployed. Migration files are immutable and should not be edited once created. ### Directory Structure ``` webapp/ ├── .github # GitHub Actions and issue templates ├── app/ # Application files ├── components/ # React components ├── data/ # Postgres database files when running with Docker ├── db/ # Database schema and utilities ├── migrations/ # Database migration files ├── hooks/ # React hooks ├── pages/ # Next.js pages │ └── api/ # API routes ├── public/ # Static assets to be served ├── scripts/ # Shell scripts ├── utils/ # Utility functions ├── editorconfig # Editor configuration ├── .env.example # Example environment variables ├── .eslintrc.json # ESLint configuration ├── .gitignore # Git ignore rules ├── .prettierignore # Prettier ignore rules ├── .prettierrc # Prettier configuration ├── components.json # Component metadata ├── CONTRIBUTING.md # Contribution guidelines ├── DEVELOPMENT.md # Development setup ├── drizzle.config.ts # Drizzle configuration ├── jsconfig.json # JavaScript configuration ├── LICENSE # License information ├── middleware.ts # Middleware functions (auth) ├── next-env.d.ts # Next.js environment types ├── next.config.mjs # Next.js configuration ├── package-lock.json # Lock file ├── package.json # NPM configuration ├── postcss.config.mjs # PostCSS configuration ├── README.md # Readme ├── tailwind.config.js # Tailwind CSS configuration └── tsconfig.json # TypeScript configuration ``` ## Deployment The pushes and merged pull requests to the `main` branch will automatically deploy to the production server. Pull requests will automatically be assigned a preview URL hosted via the test server. ================================================ FILE: drizzle.config.ts ================================================ import { config } from 'dotenv'; import { defineConfig } from 'drizzle-kit'; config({ path: './.env.local', }); export default defineConfig({ schema: './db/schema.ts', out: './migrations', dialect: 'postgresql', dbCredentials: { url: process.env.DATABASE_URL!, }, verbose: true, strict: true, }); ================================================ FILE: jsconfig.json ================================================ { "compilerOptions": { "jsx": "react", "paths": { "@/*": ["./*"] } } } ================================================ FILE: LICENSE ================================================ # Functional Source License, Version 1.1, Apache 2.0 Future License ## Abbreviation FSL-1.1-Apache-2.0 ## Notice Copyright 2024 Koel Labs ## Terms and Conditions ### Licensor ("We") The party offering the Software under these Terms and Conditions. ### The Software The "Software" is each version of the software that we make available under these Terms and Conditions, as indicated by our inclusion of these Terms and Conditions with the Software. ### License Grant Subject to your compliance with this License Grant and the Patents, Redistribution and Trademark clauses below, we hereby grant you the right to use, copy, modify, create derivative works, publicly perform, publicly display and redistribute the Software for any Permitted Purpose identified below. ### Permitted Purpose A Permitted Purpose is any purpose other than a Competing Use. A Competing Use means making the Software available to others in a commercial product or service that: 1. substitutes for the Software; 2. substitutes for any other product or service we offer using the Software that exists as of the date we make the Software available; or 3. offers the same or substantially similar functionality as the Software. Permitted Purposes specifically include using the Software: 1. for your internal use and access; 2. for non-commercial education; 3. for non-commercial research; and 4. in connection with professional services that you provide to a licensee using the Software in accordance with these Terms and Conditions. ### Patents To the extent your use for a Permitted Purpose would necessarily infringe our patents, the license grant above includes a license under our patents. If you make a claim against any party that the Software infringes or contributes to the infringement of any patent, then your patent license to the Software ends immediately. ### Redistribution The Terms and Conditions apply to all copies, modifications and derivatives of the Software. If you redistribute any copies, modifications or derivatives of the Software, you must include a copy of or a link to these Terms and Conditions and not remove any copyright notices provided in or with the Software. ### Disclaimer THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT. IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE. ### Trademarks Except for displaying the License Details and identifying us as the origin of the Software, you have no right under these Terms and Conditions to use our trademarks, trade names, service marks or product names. ## Grant of Future License We hereby irrevocably grant you an additional license to use the Software under the Apache License, Version 2.0 that is effective on the second anniversary of the date we make the Software available. On or after that date, you may use the Software under the Apache License, Version 2.0, in which case the following will apply: Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: middleware.ts ================================================ import { NextResponse } from 'next/server'; import type { NextRequest } from 'next/server'; import { getSessionCookie, setSessionCookie } from 'better-auth/cookies'; const PUBLIC_URLS = [ new RegExp('^/$'), new RegExp('^/blog$'), new RegExp('^/blog/.*$'), new RegExp('^/pricing$'), new RegExp('^/about$'), new RegExp('^/contact$'), new RegExp('^/previews$'), new RegExp('^/research$'), new RegExp('^/research/.*$'), ]; export async function middleware(request: NextRequest) { if (PUBLIC_URLS.some(regex => regex.test(request.nextUrl.pathname))) { return NextResponse.next(); // trying to access public page -> continue } const sessionCookie = getSessionCookie(request); // THIS IS NOT SECURE! // This is the recommended approach to optimistically redirect users // Auth checks should be handled in each page/route if (!sessionCookie) { // Logged out if (request.nextUrl.pathname === '/sign-in' || request.nextUrl.pathname === '/sign-up') { // trying to access sign-in or sign-up -> continue return NextResponse.next(); } else { // trying to access private page -> redirect to sign-in (disabled for now) return NextResponse.redirect(new URL('/sign-in', request.url)); } } else { // Logged in if (request.nextUrl.pathname === '/sign-in' || request.nextUrl.pathname === '/sign-up') { // trying to access sign-in or sign-up -> redirect to /dashboard return NextResponse.redirect(new URL('/dashboard', request.url)); } else { // not trying to access sign-in or sign-up -> continue return NextResponse.next(); } } } export const config = { matcher: [ /* * Match all request paths except for: * - .swa (Azure Static Web Apps) * - _next (Next.js internal paths) * - api (API routes) * - paths with file extensions (e.g., .css, .js, .png, etc.) */ '/((?!_next|api|.*\\.|.swa).*)', '/', ], }; ================================================ FILE: next-env.d.ts ================================================ /// /// /// /// // NOTE: This file should not be edited // see https://nextjs.org/docs/app/api-reference/config/typescript for more information. ================================================ FILE: next.config.mjs ================================================ import path from 'path'; import CssMinimizerPlugin from 'css-minimizer-webpack-plugin'; const __dirname = new URL('.', import.meta.url).pathname; const nextConfig = { output: 'standalone', reactStrictMode: false, compiler: { // Enable React compiler for better performance reactRemoveProperties: process.env.NODE_ENV === 'production', removeConsole: process.env.NODE_ENV === 'production', styledComponents: true, }, webpack: config => { config.resolve.alias = { ...config.resolve.alias, '@': path.resolve(__dirname, './'), }; config.optimization.minimizer = [ config.optimization.minimizer[0], new CssMinimizerPlugin({ minimizerOptions: { preset: [ 'default', { colormin: { hsl: false }, // see https://github.com/cssnano/cssnano/issues/1515 }, ], }, }), ]; return config; }, }; export default nextConfig; ================================================ FILE: package.json ================================================ { "name": "koel-labs-webapp", "description": "Contains the website for Koel Labs and web interface for the pronunciation learning application.", "version": "0.1.0", "dependencies": { "@custom-react-hooks/use-media-query": "^1.5.1", "@headlessui/react": "^2.1.3", "@internationalized/date": "^3.8.2", "@radix-ui/react-avatar": "^1.1.0", "@radix-ui/react-checkbox": "^1.1.2", "@radix-ui/react-collapsible": "^1.1.0", "@radix-ui/react-context-menu": "^2.2.1", "@radix-ui/react-dialog": "^1.1.14", "@radix-ui/react-dropdown-menu": "^2.1.1", "@radix-ui/react-icons": "^1.3.0", "@radix-ui/react-label": "^2.1.0", "@radix-ui/react-navigation-menu": "^1.2.14", "@radix-ui/react-popover": "^1.1.1", "@radix-ui/react-progress": "^1.1.0", "@radix-ui/react-radio-group": "^1.2.3", "@radix-ui/react-scroll-area": "^1.2.3", "@radix-ui/react-select": "^2.1.2", "@radix-ui/react-separator": "^1.1.0", "@radix-ui/react-slider": "^1.3.5", "@radix-ui/react-slot": "^1.1.2", "@radix-ui/react-tabs": "^1.1.3", "@radix-ui/react-toast": "^1.2.4", "@radix-ui/react-tooltip": "^1.1.2", "@tailwindcss/postcss": "^4.0.14", "@types/uuid": "^10.0.0", "@vidstack/react": "^1.12.12", "@wavesurfer/react": "^1.0.11", "better-auth": "^1.2.12", "class-variance-authority": "^0.7.0", "clsx": "^2.1.1", "cmdk": "^1.1.1", "drizzle-kit": "^0.31.8", "drizzle-orm": "^0.44.2", "embla-carousel-autoplay": "^8.6.0", "embla-carousel-react": "^8.3.0", "formidable": "^3.5.2", "framer-motion": "^11.3.30", "fuse.js": "^7.1.0", "lucide-react": "^0.407.0", "media-chrome": "^4.2.3", "motion": "^12.5.0", "next": "^15.2.6", "next-themes": "^0.3.0", "pg": "^8.12.0", "postcss": "^8", "react": "^18", "react-aria-components": "^1.10.1", "react-canvas-confetti": "^2.0.7", "react-dom": "^18", "react-google-recaptcha-v3": "^1.10.1", "react-image-crop": "^11.0.10", "react-scan": "^0.4.3", "react-syntax-highlighter": "^16.1.0", "recharts": "^2.15.3", "swr": "^2.3.3", "tailwind-merge": "^2.5.2", "tailwind-scrollbar-hide": "^1.1.7", "tailwindcss": "^4.0.14", "tailwindcss-animate": "^1.0.7", "tw-animate-css": "^1.3.7", "uuid": "^11.1.0", "vaul": "^0.9.4", "wavesurfer.js": "^7.10.1", "zod": "^3.23.8" }, "devDependencies": { "@types/formidable": "^3.4.5", "@types/node": "22.5.2", "@types/pg": "^8.15.4", "@types/react": "18.3.5", "@types/react-syntax-highlighter": "^15.5.13", "css-minimizer-webpack-plugin": "^7.0.0", "dotenv": "^16.4.5", "eslint": "^9.39.1", "eslint-config-next": "^16.0.7", "eslint-config-prettier": "^9.1.0", "fixpack": "^4.0.0", "prettier": "^3.3.3" }, "optionalDependencies": { "@tailwindcss/oxide-linux-x64-gnu": "^4.0.1", "lightningcss-linux-x64-gnu": "^1.29.1" }, "engines": { "node": "^18.18" }, "overrides": { "react-is": "^19.0.0-rc-69d4b800-20241021" }, "private": true, "proxy": "http://127.0.0.1:8080", "scripts": { "build": "next build && cp -r .next/static .next/standalone/.next/ && cp -r public .next/standalone/", "dev": "next dev", "docker": ". ./scripts/docker-run-dev.sh", "format": "(fixpack || true) && (eslint . --fix || true) && prettier --write .", "license-report": "npx license-report --output=markdown --fields=name --fields=licensePeriod --fields=licenseType --fields=link --fields=author", "lint": "npx eslint-config-prettier **/*.jsx && next lint && prettier --check .", "makemigrations": "drizzle-kit generate", "migrate": "eval $(cat .env.local) && drizzle-kit migrate", "scan": "gitleaks detect && npm audit && zizmor .", "start": "next start", "test": "echo \"Error: not implemented yet \" && exit 1" } } ================================================ FILE: postcss.config.mjs ================================================ /** @type {import('postcss-load-config').Config} */ const config = { plugins: { '@tailwindcss/postcss': {}, }, }; export default config; ================================================ FILE: tsconfig.json ================================================ { "compilerOptions": { "lib": [ "dom", "dom.iterable", "esnext" ], "allowJs": true, "skipLibCheck": true, "strict": false, "noEmit": true, "incremental": true, "module": "esnext", "esModuleInterop": true, "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "paths": { "@/*": [ "./*" ] }, "plugins": [ { "name": "next" } ], "strictNullChecks": true, "target": "ES2017" }, "include": [ "next-env.d.ts", ".next/types/**/*.ts", "**/*.ts", "**/*.tsx", "components/ui/dashboard-stat-chart.jsx", "components/ui/dashboard-stat-chart.jsx", "app/dashboard/videoPlayer.tsx" ], "exclude": [ "node_modules" ] } ================================================ FILE: .editorconfig ================================================ # http://editorconfig.org root = true [*] charset = utf-8 insert_final_newline = true trim_trailing_whitespace = true max_line_length = 100 [{*.js,*.json,*.yml}] indent_size = 2 indent_style = space ================================================ FILE: .env.example ================================================ NEXT_PUBLIC_SERVER_ORIGIN=http://localhost:8080 NEXT_PUBLIC_SERVER_HOST=localhost:8080 NEXT_PUBLIC_RECAPTCHA_SITE_KEY=6Lfm4a0qAAAAADOh_Y470P2JtrGq5V84RHd2wSOv RECAPTCHA_PRIVATE_KEY=check https://www.google.com/recaptcha CONTACT_FORM_URL=google formResponse url POSTGRES_HOST=localhost POSTGRES_PORT=5432 POSTGRES_USER=postgres POSTGRES_PASSWORD=postgres POSTGRES_DB=postgres DATABASE_URL=postgresql://postgres:postgres@localhost:5432/postgres BETTER_AUTH_SECRET=randomvalue BETTER_AUTH_URL=http://localhost:3000 GOOGLE_CLIENT_ID=from gcp console GOOGLE_CLIENT_SECRET=from gcp console MICROSOFT_CLIENT_ID=from ENTRA app in Azure MICROSOFT_CLIENT_SECRET=from ENTRA app in Azure MICROSOFT_TENANT_ID=from ENTRA app in Azure ================================================ FILE: .eslintrc.json ================================================ { "extends": "next/core-web-vitals", "rules": { "react/no-unescaped-entities": 0 } } ================================================ FILE: .gitleaksignore ================================================ 9e719a59eaae067db632e575e419abf3303a5ffc:.env.example:generic-api-key:1 9e719a59eaae067db632e575e419abf3303a5ffc:.github/workflows/azure-static-web-apps-salmon-sky-0a6b6c10f.yml:generic-api-key:37 ================================================ FILE: .prettierignore ================================================ # dependencies node_modules # Minified Files **/*.min.js # Cache .firebase # next.js /.next/ /out/ # production /build # misc .DS_Store *.pem migrations # postgres data # local env files .env*.local ================================================ FILE: .prettierrc ================================================ { "printWidth": 100, "tabWidth": 2, "useTabs": false, "semi": true, "singleQuote": true, "quoteProps": "as-needed", "trailingComma": "all", "bracketSpacing": true, "arrowParens": "avoid", "endOfLine": "auto" } ================================================ FILE: app/globals.css ================================================ @import '../styles/animations.css' layer(base); @import 'tailwindcss'; @plugin 'tailwindcss-animate'; @plugin 'tailwind-scrollbar-hide'; @plugin '@vidstack/react/tailwind.cjs' { selector: '.media-player'; prefix: 'media'; } @custom-variant dark (&:is(.dark *)); @theme { --font-inter: 'Inter', sans-serif; --breakpoint-xss: 390px; --breakpoint-special: 768px; --color-border: hsl(var(--border)); --color-input: hsl(var(--input)); --color-ring: hsl(var(--ring)); --color-background: hsl(var(--background)); --color-foreground: hsl(var(--foreground)); --color-primary: hsl(var(--primary)); --color-primary-foreground: hsl(var(--primary-foreground)); --color-secondary: hsl(var(--secondary)); --color-secondary-foreground: hsl(var(--secondary-foreground)); --color-destructive: hsl(var(--destructive)); --color-destructive-foreground: hsl(var(--destructive-foreground)); --color-muted: hsl(var(--muted)); --color-muted-foreground: hsl(var(--muted-foreground)); --color-accent: hsl(var(--accent)); --color-accent-foreground: hsl(var(--accent-foreground)); --color-popover: hsl(var(--popover)); --color-popover-foreground: hsl(var(--popover-foreground)); --color-card: hsl(var(--card)); --color-card-foreground: hsl(var(--card-foreground)); --color-chart-1: hsl(var(--chart-1)); --color-chart-2: hsl(var(--chart-2)); --color-chart-3: hsl(var(--chart-3)); --color-chart-4: hsl(var(--chart-4)); --color-chart-5: hsl(var(--chart-5)); --radius-lg: var(--radius); --radius-md: calc(var(--radius) - 2px); --radius-sm: calc(var(--radius) - 4px); --animate-accordion-down: accordion-down 0.2s ease-out; --animate-accordion-up: accordion-up 0.2s ease-out; @keyframes accordion-down { from { height: 0; } to { height: var(--radix-accordion-content-height); } } @keyframes accordion-up { from { height: var(--radix-accordion-content-height); } to { height: 0; } } } @font-face { font-family: 'OpenRundeRegular'; src: url('/fonts/OpenRunde-Regular.woff2') format('woff2'); font-weight: 400; font-style: normal; } @font-face { font-family: 'OpenRundeMedium'; src: url('/fonts/OpenRunde-Medium.woff2') format('woff2'); font-weight: 500; font-style: normal; } @font-face { font-family: 'OpenRundeSemibold'; src: url('/fonts/OpenRunde-Semibold.woff2') format('woff2'); font-weight: 600; font-style: normal; } @font-face { font-family: 'OpenRundeBold'; src: url('/fonts/OpenRunde-Bold.woff2') format('woff2'); font-weight: 700; font-style: normal; } .open-runde-regular { font-family: 'OpenRundeRegular', sans-serif; } .open-runde-medium { font-family: 'OpenRundeMedium', sans-serif; } .open-runde-semibold { font-family: 'OpenRundeSemibold', sans-serif; } .open-runde-bold { font-family: 'OpenRundeBold', sans-serif; } @utility container { margin-inline: auto; padding-inline: 2rem; @media (width >= --theme(--breakpoint-xss)) { max-width: none; } @media (width >= 1400px) { max-width: 1400px; } } /* The default border color has changed to `currentColor` in Tailwind CSS v4, so we've added these compatibility styles to make sure everything still looks the same as it did with Tailwind CSS v3. If we ever want to remove these styles, we need to add an explicit border color utility to any element that depends on these defaults. */ @layer base { *, ::after, ::before, ::backdrop, ::file-selector-button { border-color: var(--color-gray-200, currentColor); } } /* .screen-minus-sidebar-max-width { max-width: calc(100vw - 256px); } @media (max-width: 768px) { .screen-minus-sidebar-max-width { max-width: 100vw; } } */ @utility main-transition { transition: max-width 150ms ease-in-out; } @layer base { :root { --background: 0 0% 100%; --foreground: 0 0% 3.9%; --card: 0 0% 100%; --card-foreground: 0 0% 3.9%; --popover: 0 0% 100%; --popover-foreground: 0 0% 3.9%; --primary: 0 0% 9%; --primary-foreground: 0 0% 98%; --secondary: 0 0% 96.1%; --secondary-foreground: 0 0% 9%; --muted: 0 0% 96.1%; --muted-foreground: 0 0% 45.1%; --accent: 0 0% 96.1%; --accent-foreground: 0 0% 9%; --destructive: 0 84.2% 60.2%; --destructive-foreground: 0 0% 98%; --border: 0 0% 89.8%; --input: 0 0% 89.8%; --ring: 0 0% 3.9%; --radius: 0.5rem; --chart-1: 12 76% 61%; --chart-2: 173 58% 39%; --chart-3: 197 37% 24%; --chart-4: 43 74% 66%; --chart-5: 27 87% 67%; } .dark { --background: 0 0% 3.9%; --foreground: 0 0% 98%; --card: 0 0% 3.9%; --card-foreground: 0 0% 98%; --popover: 0 0% 3.9%; --popover-foreground: 0 0% 98%; --primary: 0 0% 98%; --primary-foreground: 0 0% 9%; --secondary: 0 0% 14.9%; --secondary-foreground: 0 0% 98%; --muted: 0 0% 14.9%; --muted-foreground: 0 0% 63.9%; --accent: 0 0% 14.9%; --accent-foreground: 0 0% 98%; --destructive: 0 62.8% 30.6%; --destructive-foreground: 0 0% 98%; --border: 0 0% 14.9%; --input: 0 0% 14.9%; --ring: 0 0% 83.1%; --chart-1: 220 70% 50%; --chart-2: 160 60% 45%; --chart-3: 30 80% 55%; --chart-4: 280 65% 60%; --chart-5: 340 75% 55%; } } @layer base { * { @apply border-border; } body { @apply bg-background text-foreground; } } .color-bg { background-image: radial-gradient(at 27% 37%, #0284c7 0, transparent 0), radial-gradient(at 97% 21%, #0284c7b0 0, transparent 50%), radial-gradient(at 52% 99%, #0284c7b0 0, transparent 50%), radial-gradient(at 10% 29%, #0284c7 0, transparent 50%), radial-gradient(at 97% 96%, #95c6e4 0, transparent 50%), radial-gradient(at 33% 50%, #ffffff 0, transparent 50%), radial-gradient(at 79% 53%, #ffffff00 0, transparent 50%); } .color-bg-ui { background-image: radial-gradient(at 27% 37%, #0284c7 0, transparent 0), radial-gradient(at 100% 0%, #0285c7 0, transparent 40%), radial-gradient(at 0% 100%, #0285c7 0, transparent 40%), radial-gradient(at 100% 96%, #95c6e4 0, transparent 20%), radial-gradient(at 13% 60%, #ffffff 0, transparent 50%), radial-gradient(at 79% 53%, #ffffff 0, transparent 50%); } .drops { position: relative; overflow: hidden; } .drops::before { content: ''; position: absolute; top: -100%; /* Start above the bar */ left: 0; right: 0; width: 100%; height: 50%; background: linear-gradient(to bottom, transparent, #1e3a8a); animation: drop-animation 15s linear infinite; } @keyframes drop-animation { 0% { top: -200%; } 100% { top: 200%; } } .drops2 { position: relative; overflow: hidden; } .drops2::before { content: ''; position: absolute; top: -100%; /* Start above the bar */ left: 0; right: 0; width: 100%; height: 50%; background: linear-gradient(to bottom, transparent, #0284c7); animation: drop-animation 10s linear infinite; } .drops3 { position: relative; overflow: hidden; } .drops3::before { content: ''; position: absolute; top: -100%; /* Start above the bar */ left: 0; right: 0; width: 100%; height: 50%; background: linear-gradient(to bottom, transparent, #1e3a8a); animation: drop-animation 18s linear infinite; } .drops4 { position: relative; overflow: hidden; } .drops4::before { content: ''; position: absolute; top: -100%; /* Start above the bar */ left: 0; right: 0; width: 100%; height: 50%; background: linear-gradient(to bottom, transparent, #1d4ed8); animation: drop-animation 22s linear infinite; } .shimmer path { opacity: 0.5; animation: shimmer 3s infinite; } .shimmer path:nth-child(1) { animation-delay: 0s; } .shimmer path:nth-child(2) { animation-delay: 2s; } .shimmer path:nth-child(3) { animation-delay: 4s; } @keyframes shimmer { 0%, 100% { opacity: 0.5; scale: 1; } 50% { opacity: 1; scale: 1.05; } } html { overflow-x: hidden; } .box { position: relative; transform-style: preserve-3d; } .box::before { content: ''; position: absolute; inset: 0px; background: radial-gradient(49% 81% at 45% 47%, rgb(5, 62, 115) 0%, #073aff00 100%), radial-gradient(113% 91% at 17% -2%, rgba(1, 40, 75, 0.548) 0%, #ff000000 99%), radial-gradient(142% 91% at 83% 7%, rgba(6, 30, 75, 0) 1%, #ff000000 99%), radial-gradient(142% 91% at -6% 74%, rgba(174, 196, 255, 0) 1%, #ff000000 99%), radial-gradient(142% 91% at 111% 84%, rgb(52, 125, 193) 0%, #ffffffff 99%); filter: blur(15px); opacity: 0.3; transform: translate3d(0px, 0px, -1px); pointer-events: none; } .box-inside { background: radial-gradient(49% 81% at 45% 47%, rgba(52, 125, 193, 0.249) 0%, #073aff00 100%), radial-gradient(113% 91% at 17% -2%, rgba(131, 170, 207, 0.348) 0%, #ff000000 99%), radial-gradient(142% 91% at 83% 7%, rgba(164, 138, 249, 0.326) 1%, #ff000000 99%), radial-gradient(142% 91% at -6% 74%, rgba(174, 196, 255, 0.735) 1%, #ff000000 99%), radial-gradient(142% 91% at 111% 84%, rgba(52, 125, 193, 0.37) 0%, #ffffffff 99%); } .vds-video-layout .vds-time-slider .vds-slider-value { background-color: var(--video-time-bg, unset); text-shadow: 0px 0px 0px #333333, 0px 0px 0px #333333, 0px 0px 0px #333333, 0px 0px 0px #333333 !important; } :where(.vds-video-layout .vds-time-slider .vds-slider-chapter-title) { text-shadow: 0px 0px 0px #333333, 0px 0px 0px #333333, 0px 0px 0px #333333, 0px 0px 0px #333333 !important; } :where(.vds-poster) { height: auto !important; } .vds-audio-layout, .vds-video-layout { /* Shared. */ --media-brand: #f5f5f5; --media-controls-color: #f5f5f5; --media-font-family: 'Inter', sans-serif; /* Buffering. */ --media-buffering-animation: vds-buffering-spin 1s linear infinite; --media-buffering-size: 96px; --media-buffering-track-color: #f5f5f5; --media-buffering-track-fill-color: var(--media-brand); --media-buffering-track-fill-offset: 50; --media-buffering-track-fill-opacity: 0.75; --media-buffering-track-fill-width: 9; --media-buffering-track-opacity: 0.25; --media-buffering-track-width: 8; --media-buffering-transition: opacity 200ms ease; /* Buttons. */ --media-button-border-radius: 8px; --media-button-color: var(--media-controls-color, #f5f5f5); --media-button-hover-bg: rgb(255 255 255 / 0.2); --media-button-hover-transform: scale(1); --media-button-hover-transition: transform 0.2s ease-in; --media-button-icon-size: 80%; --media-button-padding: 0px; --media-button-size: 40px; --media-button-touch-hover-bg: rgb(255 255 255 / 0.2); --media-button-touch-hover-border-radius: 100%; --media-sm-fullscreen-button-size: 42px; --media-fullscreen-button-size: 42px; /* Tooltips. */ --media-tooltip-bg-color: black; --media-tooltip-border-radius: 4px; --media-tooltip-border: 1px solid rgb(255 255 255 / 0.1); --media-tooltip-color: hsl(0, 0%, 80%); --media-tooltip-font-size: 13px; --media-tooltip-font-weight: 500; --media-tooltip-padding: 2px 8px; --media-tooltip-enter-animation: vds-tooltip-enter 0.2s ease-in; --media-tooltip-exit-animation: vds-tooltip-exit 0.2s ease-out; /* Live Indicator. */ --media-live-button-bg: #8a8a8a; --media-live-button-border-radius: 2px; --media-live-button-color: #161616; --media-live-button-edge-bg: #dc2626; --media-live-button-edge-color: #f5f5f5; --media-live-button-font-size: 12px; --media-live-button-font-weight: 600; --media-live-button-height: 40px; --media-live-button-letter-spacing: 1.5px; --media-live-button-padding: 1px 4px; --media-live-button-width: 40px; /* Captions. */ --media-captions-padding: 1%; --media-cue-backdrop: blur(8px); --media-cue-bg: rgba(0, 0, 0, 0.7); --media-cue-border-radius: 2px; --media-cue-border: unset; --media-cue-box-shadow: var(--cue-box-shadow); --media-cue-color: white; --media-cue-display-bg: unset; --media-cue-display-border-radius: unset; --media-cue-display-padding: unset; --media-cue-font-size: calc(var(--overlay-height) / 100 * 4.5); --media-cue-line-height: calc(var(--cue-font-size) * 1.2); --media-cue-padding-x: calc(var(--cue-font-size) * 0.4); --media-cue-padding-x: calc(var(--cue-font-size) * 0.6); /* Chapter Title. */ --media-chapter-title-color: rgba(255 255 255 / 0.64); --media-chapter-title-font-size: 14px; --media-chapter-title-font-weight: 500; --media-chapter-title-separator-color: var(--color); --media-chapter-title-separator-gap: 6px; --media-chapter-title-separator: '\2022'; /* Controls. */ --media-controls-padding: 0px; --media-controls-in-transition: opacity 0.2s ease-in; --media-controls-out-transition: opacity 0.2s ease-out; /* Thumbnails. */ --media-thumbnail-bg: black; --media-thumbnail-border: 1px solid white; --media-thumbnail-aspect-ratio: 16 / 9; --media-thumbnail-min-width: 120px; --media-thumbnail-min-height: calc(var(--media-thumbnail-min-width) / var(--aspect-ratio)); --media-thumbnail-max-width: 180px; --media-thumbnail-max-height: calc(var(--media-thumbnail-max-width) / var(--aspect-ratio)); /* Time. */ --media-time-bg: unset; --media-time-border-radius: unset; --media-time-border: unset; --media-time-color: #f5f5f5; --media-time-divider-color: #e0e0e0; --media-time-divider-gap: 2.5px; --media-time-font-size: 14px; --media-time-font-weight: 400; --media-time-letter-spacing: 0.025em; /* Sliders. */ --media-slider-width: 100%; --media-slider-height: 48px; /* Slider Thumb. */ --media-slider-thumb-bg: #fff; --media-slider-thumb-border-radius: 9999px; --media-slider-thumb-border: 1px solid #cacaca; --media-slider-thumb-size: 15px; --media-slider-thumb-transition: opacity 0.2s ease-in, box-shadow 0.2s ease; /* Slider Tracks. */ --media-slider-track-width: 100%; --media-slider-track-bg: rgb(255 255 255 / 0.3); --media-slider-track-border-radius: 1px; --media-slider-track-fill-bg: var(--media-brand); --media-slider-track-fill-live-bg: #dc2626; --media-slider-track-height: 5px; --media-slider-track-progress-bg: rgb(255 255 255 / 0.5); --media-slider-focused-thumb-shadow: 0 0 0 0px hsla(0, 0%, 100%, 0); --media-slider-focused-thumb-size: calc(var(--thumb-size) * 1.1); --media-slider-focused-track-height: calc(var(--track-height) * 1.25); --media-slider-focused-track-height: var(--track-height); --media-slider-focused-track-width: calc(var(--track-width) * 1.25); --media-slider-focused-track-width: var(--track-width); /* Slider Steps. */ --media-slider-step-width: 2.5px; --media-slider-step-color: rgb(124, 124, 124); /* Slider Chapter. */ --media-slider-chapter-hover-transform: scaleY(2); --media-slider-chapter-hover-transition: transform 0.1s cubic-bezier(0.4, 0, 1, 1); /* Slider Preview. */ --media-slider-preview-bg: unset; --media-slider-preview-border-radius: 2px; /* Slider Chapter Title. */ --media-slider-chapter-title-bg: unset; --media-slider-chapter-title-color: #f5f5f5; --media-slider-chapter-title-font-size: 14px; --media-slider-chapter-title-gap: 6px; /* Slider Value. */ --media-slider-value-bg: black; --media-slider-value-border-radius: 2px; --media-slider-value-border: unset; --media-slider-value-color: white; --media-slider-value-gap: 0px; --media-slider-value-padding: 1px 10px; /* Menu Theme. */ --media-menu-color-gray-50: rgb(245 245 245 / 0.1); --media-menu-color-gray-100: rgb(245 245 245 / 0.45); --media-menu-color-gray-200: rgb(10 10 10 / 0.6); --media-menu-color-gray-300: rgb(27 27 27); /* Menu Text. */ --media-menu-text-color: #f5f5f5; --media-menu-text-secondary-color: #6b6b6b; /* Menu. */ --media-menu-bg: var(--media-menu-bg, var(--color-gray-400)); --media-menu-border-radius: 4px; --media-menu-border: 1px solid rgb(255 255 255 / 0.1); --media-menu-box-shadow: 1px 1px 1px rgb(10 10 10 / 0.5); --media-menu-divider: 1px solid var(--color-gray-50); --media-menu-font-size: 14px; --media-menu-font-weight: 500; --media-menu-max-height: 250px; --media-menu-min-width: 220px; --media-menu-padding: 12px; --media-menu-top-bar-bg: rgb(10 10 10 / 0.6); --media-menu-arrow-icon-size: 18px; --media-menu-icon-rotate-deg: 90deg; --media-menu-enter-animation: vds-menu-enter 0.3s ease-out; --media-menu-exit-animation: vds-menu-exit 0.2s ease-out; --media-menu-scrollbar-track-bg: transparent; --media-menu-scrollbar-thumb-bg: var(--color-gray-50); --media-sm-menu-landscape-max-height: min(70vh, 400px); --media-sm-menu-portrait-max-height: 40vh; /* Menu Section. */ --media-menu-section-bg: var(--color-gray-300); --media-menu-section-border: unset; --media-menu-section-divider: var(--divider); --media-menu-section-header-font-size: 12px; --media-menu-section-header-font-weight: 500; --media-menu-section-gap: 8px; --media-menu-section-border-radius: 2px; /* Menu Item. */ --media-menu-item-bg: transparent; --media-menu-item-border-radius: 2px; --media-menu-item-border: 0; --media-menu-item-height: 40px; --media-menu-item-hover-bg: var(--color-gray-50); --media-menu-item-icon-size: 18px; --media-menu-item-icon-spacing: 6px; --media-menu-item-padding: 10px; /* Menu Radio. */ --media-menu-radio-icon-color: var(--text-color); /* Menu Checkbox. */ --media-menu-checkbox-width: 40px; --media-menu-checkbox-height: 18px; --media-menu-checkbox-bg-active: #1ba13f; --media-menu-checkbox-bg: var(--color-gray-100); --media-menu-checkbox-handle-bg: #f5f5f5; --media-menu-checkbox-handle-border: unset; --media-menu-checkbox-handle-diameter: calc(var(--checkbox-height) - 2px); /* Menu Slider. */ --media-menu-slider-height: 32px; --media-menu-slider-track-bg: var(--color-gray-50); --media-menu-slider-track-fill-bg: var(--color-inverse); /* Menu Hint. */ --media-menu-hint-color: var(--text-secondary-color); --media-menu-hint-font-size: 13px; --media-menu-hint-font-weight: 400; /* Chapters Menu. */ --media-chapters-divider: var(--divider); --media-chapters-duration-bg: unset; --media-chapters-duration-border-radius: 2px; --media-chapters-focus-padding: 4px; --media-chapters-item-active-bg: var(--color-gray-50); --media-chapters-item-active-border-left: unset; --media-chapters-min-width: var(--media-menu-min-width, 220px); --media-chapters-padding: 0; --media-chapters-progress-bg: var(--color-inverse); --media-chapters-progress-border-radius: 0; --media-chapters-progress-height: 4px; --media-chapters-start-time-border-radius: 2px; --media-chapters-start-time-letter-spacing: 0.4px; --media-chapters-start-time-padding: 1px 4px; --media-chapters-thumbnail-border: 0; --media-chapters-thumbnail-gap: 12px; --media-chapters-thumbnail-max-height: 68px; --media-chapters-thumbnail-max-width: 120px; --media-chapters-thumbnail-min-height: 56px; --media-chapters-thumbnail-min-width: 100px; --media-chapters-time-font-size: 12px; --media-chapters-time-font-weight: 500; --media-chapters-time-gap: 6px; --media-chapters-with-thumbnails-min-width: 300px; } .NavigationMenuIndicator[data-state='visible'] { animation: fadeIn 200ms ease; } .NavigationMenuIndicator[data-state='hidden'] { animation: fadeOut 200ms ease; } @keyframes float-left-right { 0%, 100% { transform: translateX(-200px); } 50% { transform: translateX(700px); } } @keyframes float-right-left { 0%, 100% { transform: translateX(200px); } 50% { transform: translateX(-700px); } } @keyframes fadeIn { from { opacity: 0; } to { opacity: 1; } } @keyframes fadeOut { from { opacity: 1; } to { opacity: 0; } } strong { font-weight: semibold; } body { background-color: #ffffff; } main { background-color: #ffffff; } .path { animation: pathAnimation 24s ease-in-out infinite; stroke-dasharray: 612.5; stroke-dashoffset: 0; } .path0 { --stroke-dashoffset: 1000; } .path2 { --stroke-dashoffset: 2000; } .path4 { --stroke-dashoffset: 3000; } .path6 { --stroke-dashoffset: 4000; } .path8 { --stroke-dashoffset: 5000; } .grecaptcha-badge { visibility: hidden; } @keyframes pathAnimation { 0% { stroke-dashoffset: var(--stroke-dashoffset); } 50% { stroke-dashoffset: 0; } 100% { stroke-dashoffset: var(--stroke-dashoffset); } } /* Navigation Menu Animations */ @keyframes enterFromLeft { from { opacity: 0; transform: translateX(-100px) translateY(-8px); } to { opacity: 1; transform: translateX(0) translateY(0); } } @keyframes enterFromRight { from { opacity: 0; transform: translateX(100px) translateY(-8px); } to { opacity: 1; transform: translateX(0) translateY(0); } } @keyframes exitToLeft { from { opacity: 1; transform: translateX(0) translateY(0); } to { opacity: 0; transform: translateX(-100px) translateY(-8px); } } @keyframes exitToRight { from { opacity: 1; transform: translateX(0) translateY(0); } to { opacity: 0; transform: translateX(100px) translateY(-8px); } } @keyframes scaleInContent { from { opacity: 0; transform: scale(0.95) translateY(-8px); } to { opacity: 1; transform: scale(1) translateY(0); } } @keyframes scaleOutContent { from { opacity: 1; transform: scale(1) translateY(0); } to { opacity: 0; transform: scale(0.9) translateY(-8px); } } .animate-enter-from-left { animation: enterFromLeft 0.35s cubic-bezier(0.16, 1, 0.3, 1); } .animate-enter-from-right { animation: enterFromRight 0.35s cubic-bezier(0.16, 1, 0.3, 1); } .animate-exit-to-left { animation: exitToLeft 0.25s cubic-bezier(0.4, 0, 0.2, 1); } .animate-exit-to-right { animation: exitToRight 0.25s cubic-bezier(0.4, 0, 0.2, 1); } .animate-scale-in-content { animation: scaleInContent 0.35s cubic-bezier(0.16, 1, 0.3, 1); transform-origin: top right !important; } .animate-scale-out-content { animation: scaleOutContent 0.25s cubic-bezier(0.4, 0, 0.2, 1); transform-origin: top center !important; } /* Ensure Radix NavigationMenu viewport animates in/out based on data-state */ .nav-viewport { transition: width 300ms, height 300ms, transform 300ms; } .nav-viewport[data-state='open'] { animation: scaleInContent 0.35s cubic-bezier(0.16, 1, 0.3, 1); transform-origin: top right !important; } .nav-viewport[data-state='closed'] { animation: scaleOutContent 0.25s cubic-bezier(0.4, 0, 0.2, 1); transform-origin: top center !important; } ================================================ FILE: app/layout.js ================================================ import { Inter } from 'next/font/google'; import './globals.css'; import '../styles/animations.css'; import { Toaster } from '@/components/ui/toaster'; import Script from 'next/script'; import { ReactScan } from '@/components/react-scan'; const inter = Inter({ subsets: ['latin'] }); export const viewport = { themeColor: 'white', }; export const metadata = { metadataBase: new URL('https://koellabs.com'), title: { template: '%s | Koel Labs', default: 'Koel Labs - Building out state-of-the-art models, tools, and datasets to make speech technologies more inclusive for all dialects.', }, description: 'A research lab building out state-of-the-art models, tools, and datasets to make speech technologies more inclusive for all dialects. We are a team of researchers and engineers inspired by our own experiences with language.', keywords: [ 'pronunciation learning', 'AI language learning', 'speech technology', 'language education', 'pronunciation assessment', ], openGraph: { title: 'Koel Labs', description: 'A research lab building out state-of-the-art models, tools, and datasets to make speech technologies more inclusive for all dialects. We are a team of researchers and engineers inspired by our own experiences with language.', url: 'https://koellabs.com', siteName: 'Koel Labs', images: [ { url: '/openGraph.png', width: 1600, height: 900, alt: 'An image with Koel Labs written on it.', }, ], locale: 'en_US', type: 'website', }, robots: { index: true, follow: true, googleBot: { index: true, follow: true, 'max-video-preview': -1, 'max-image-preview': 'large', 'max-snippet': -1, }, }, twitter: { title: 'Koel Labs', card: 'summary_large_image', description: 'A research lab building out state-of-the-art models, tools, and datasets to make speech technologies more inclusive for all dialects. We are a team of researchers and engineers inspired by our own experiences with language.', images: ['/openGraph.png'], }, alternates: { canonical: 'https://koellabs.com', }, }; export default function RootLayout({ children }) { return ( {children} ); } ================================================ FILE: app/not-found.js ================================================ import React from 'react'; import { Button } from '@/components/ui/base/button'; import { ArrowLeftIcon, ArrowRightIcon, MailIcon } from 'lucide-react'; import Link from 'next/link'; export default function NotFound() { return (

404

Page Not Found

Sorry, we couldn’t find the page you’re looking for.

); } ================================================ FILE: app/page.js ================================================ 'use client'; import Header from '@/components/ui/header'; import CTA from '@/components/sections/cta'; import Footer from '@/components/sections/footer'; import HeroNew from '@/components/sections/hero'; import Research from '@/components/sections/research'; import { useRef } from 'react'; import Models from '@/components/sections/models'; import Image from 'next/image'; import Autoplay from 'embla-carousel-autoplay'; import { Carousel, CarouselContent, CarouselItem } from '@/components/ui/base/carousel'; import Previews from '@/components/sections/previews'; import MagicHighlighter from '@/components/magic-highlighter'; const sliderImages = [ '/images/frontpage/1-group.jpg', '/images/frontpage/2-aruna.jpg', '/images/frontpage/3-group.jpg', '/images/frontpage/4-alex.jpg', '/images/frontpage/5-group.jpg', '/images/frontpage/6-ruslan.jpg', ]; export default function Home() { const containerRef = useRef(null); return (
{Array(500) .fill(0) .map((_, i) => (
))}
{sliderImages.map((src, i) => (
{`Gallery
))}
{Array(500) .fill(0) .map((_, i) => (
))}
{Array(500) .fill(0) .map((_, i) => (
))}
{Array(500) .fill(0) .map((_, i) => (
))}
); } ================================================ FILE: app/about/page.tsx ================================================ import React from 'react'; import { Button } from '@/components/ui/base/button'; import Header from '@/components/ui/header'; import { ChevronRight, UserRoundPlus } from 'lucide-react'; import Link from 'next/link'; import { Card } from '@/components/ui/base/card'; import HeroVideoDialog from '@/components/ui/magicui/hero-video-dialog'; import CTA from '@/components/sections/cta'; import Footer from '@/components/sections/footer'; const people = [ { name: 'Alexander Metzger', role: 'Chief Executive Officer', imageUrl: '/images/alexShot.png', linkedinUrl: 'https://www.linkedin.com/in/alexander-le-metzger/', }, { name: 'Aruna Srivastava', role: 'Chief Technology Officer', imageUrl: '/images/arunaShot.png', linkedinUrl: 'https://www.linkedin.com/in/arunasr/', }, { name: 'Ruslan Mukhamedvaleev', role: 'Chief Product Officer', imageUrl: '/images/ruslanShot.png', linkedinUrl: 'https://www.linkedin.com/in/ruslan-muk/', }, ]; export default function About() { return (
{Array(165) .fill(0) .map((_, i) => (
))}
{Array(165) .fill(0) .map((_, i) => (
))}
{/*

We’re changing the way people connect

*/}

Building Inclusive Speech Technology

Your voice is unique and should be understood not just by those around you, but also by the technology you interact with every day.

Too often, voice assistants and smart speakers struggle with accents, speech differences, or non-standard dialects, leaving millions of people frustrated and unheard.

As a research lab, we collaborate with top universities, other researchers, and domain experts to create technologies that actually understand your voice.

Koel Labs Signature
A photo of the Koel Labs founders, Alexander Metzger, Aruna Srivastava, and Ruslan Mukhamedvaleev standing in front of the bay bridge in San Francisco.
{Array(120) .fill(0) .map((_, i) => (
))}

The Team

Our Co-Founders

Koel Labs began with three students sharing a common vision. As immigrants and children of immigrants, the experience of language barriers shaped our understanding of how pronunciation challenges impact confidence and opportunities.

    {people.map(person => (
  • {`A

    {person.name}

    {person.role}

  • ))}
{Array(500) .fill(0) .map((_, i) => (
))}
{Array(500) .fill(0) .map((_, i) => (
))}
); } ================================================ FILE: app/api/auth/[...all]/route.ts ================================================ import { auth } from '@/lib/auth'; import { toNextJsHandler } from 'better-auth/next-js'; export const { POST, GET } = toNextJsHandler(auth); ================================================ FILE: app/api/auth/update-user/route.ts ================================================ import { NextResponse } from 'next/server'; import { auth } from '@/lib/auth'; import { db, users } from '@/db/schema'; import { eq } from 'drizzle-orm'; export async function POST(request: Request) { try { // Get the current session const session = await auth.api.getSession({ headers: request.headers }); if (!session || !session.user) { return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); } // Parse the request body const { name } = await request.json(); // Update the user record with the new name await db .update(users) .set({ name: name, updatedAt: new Date(), }) .where(eq(users.id, session.user.id)); return NextResponse.json({ success: true }); } catch (error) { console.error('Error updating user information:', error); return NextResponse.json({ error: 'Failed to update user information' }, { status: 500 }); } } ================================================ FILE: app/api/user/preferences/route.ts ================================================ import { NextResponse } from 'next/server'; import { auth } from '@/lib/auth'; import { db, users } from '@/db/schema'; import { eq } from 'drizzle-orm'; export async function POST(request: Request) { try { const session = await auth.api.getSession({ headers: request.headers }); if (!session || !session.user) { return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); } const body = await request.json(); const { nativeLanguage, nativeLanguageCountry, birthday, targetLanguage, experienceLevel, learningCity, challengingWords, hasConsented, avatar, } = body; const metadata = { nativeLanguage, nativeLanguageCountry, birthday, targetLanguage, experienceLevel, learningCity, challengingWords, hasConsented, lastUpdated: new Date().toISOString(), }; await db .update(users) .set({ metadata: JSON.stringify(metadata), ...(avatar ? { image: avatar } : {}), onboardingCompleted: true, updatedAt: new Date(), }) .where(eq(users.id, session.user.id)); return NextResponse.json({ success: true }); } catch (error) { console.error('Error updating user preferences:', error); return NextResponse.json({ error: 'Failed to update preferences' }, { status: 500 }); } } ================================================ FILE: app/blog/bloglist.tsx ================================================ // Component to display the lists of blog posts by category. import React from 'react'; import { ArrowUpRight } from 'lucide-react'; import { Card } from '@/components/ui/base/card'; import HeroVideoDialog from '@/components/ui/magicui/hero-video-dialog'; import Link from 'next/link'; import { getPosts } from './posts'; import type { Post } from './posts'; import { ArrowUpRightIcon } from 'lucide-react'; import CTA from '@/components/sections/cta'; import Footer from '@/components/sections/footer'; export const metadata = { title: 'Blog | Koel Labs Research and Updates', description: 'Stay up to date with the latest research, technical reports, and announcements from Koel Labs about pronunciation learning and speech technology.', }; function formatDate(dateString: string): string { const date = new Date(dateString); return date.toLocaleDateString('en-US', { year: 'numeric', month: 'long', day: 'numeric', }); } function PostCard({ post }: { post: Post }) { return (
{`Cover

{post.title}

{/* {formatDate(dateISO)} {byline ? ` • ${byline}` : ''} */}
A photo of Alexander Metzger A photo of Aruna Srivastava A photo of Ruslan Mukhamedvaleev
By Koel Labs ·

{post.summary.slice(0, 350)} {post.summary.length > 350 && '...'} {post.summary.length > 350 && ' Read More →'}

{/*
{post.category}
*/}
{Array(90) .fill(0) .map((_, i) => (
))}
{Array(70) .fill(0) .map((_, i) => (
))}
{Array(40) .fill(0) .map((_, i) => (
))}
{post.category}
); } function BlogList({ posts }: { posts: Post[] }) { return (
{/*
*/} {/*
*/}

Most Recent

Newsroom

{/*
*/} {posts .sort((a, b) => new Date(b.date).getTime() - new Date(a.date).getTime()) .splice(0, 2) .map(post => ( ))}
{/*
*/} {/*
{Array(500) .fill(0) .map((_, i) => (
))}
*/}
{Array(500) .fill(0) .map((_, i) => (
))}
); } export default async function BlogListWrapper() { const posts = await getPosts(); return (

Technical Reports

{/*
*/} {posts .sort((a, b) => new Date(b.date).getTime() - new Date(a.date).getTime()) .filter(post => post.category === 'Technical Report') .map(post => ( ))}
{Array(500) .fill(0) .map((_, i) => (
))}

Announcements

{/*
*/} {posts .sort((a, b) => new Date(b.date).getTime() - new Date(a.date).getTime()) .filter(post => post.category === 'Announcement') .map(post => ( ))}
{Array(500) .fill(0) .map((_, i) => (
))}
{Array(500) .fill(0) .map((_, i) => (
))}
); } ================================================ FILE: app/blog/page.tsx ================================================ // The main blog page where the user can see all the blog posts. // Checkout posts.ts for general utility functions, post loading/caching, and post type definitions. // Checkout [slug]/page.tsx for the individual blog post page (this is the general layout for all blog posts). // Checkout posts/*.tsx for individual blog post content. import Header from '@/components/ui/header'; import BlogList from './bloglist'; export default function BlogPage() { return (
); } ================================================ FILE: app/blog/posts.ts ================================================ import fs from 'fs/promises'; import path from 'path'; import { cache } from 'react'; export type Tag = { text: string; url: string; }; export type Metadata = { title: string; slug: string; date: string; published: boolean; image: string; summary: string; category: 'Technical Report' | 'Announcement' | 'Tutorial' | 'Language Learning' | 'Other'; tags: Tag[]; }; export type Post = Metadata & { content: () => JSX.Element; }; export const getPosts = cache(async () => { const files = await fs.readdir('./app/blog/posts/'); const posts = await Promise.all( files .filter(file => path.extname(file) === '.tsx') .map(async file => { const post = await import(`./posts/${file}`); const metadata = post.metadata; if (!metadata.published) { return null; } const content = post.default; return { ...metadata, content } as Post; }), ); return posts.filter(p => p !== null); }); export async function getPost(slug: string) { const posts = await getPosts(); return posts.find(post => post.slug === slug); } ================================================ FILE: app/blog/[slug]/page.tsx ================================================ // General layout that is shared between all blog posts goes here (e.g. footer, header, display author, etc). // The posts are defined in app/blog/posts/*.tsx. import { getPost } from '../posts'; import { notFound } from 'next/navigation'; import Header from '@/components/ui/header'; import CTA from '@/components/sections/cta'; import Footer from '@/components/sections/footer'; export async function generateMetadata({ params }: { params: Promise<{ slug: string }> }) { const { slug } = await params; const post = await getPost(slug); if (!post) return {}; return { title: `${post.title} | Koel Labs Blog`, description: post.summary, openGraph: { title: post.title, description: post.summary, type: 'article', publishedTime: post.date, authors: ['Koel Labs', 'Alexander Metzger', 'Aruna Srivastava', 'Ruslan Mukhamedvaleev'], images: [ { url: post.image, width: 1200, height: 630, alt: post.title, }, ], }, twitter: { card: 'summary_large_image', title: post.title, description: post.summary, images: [post.image], }, }; } export default async function PostPage({ params, }: { params: Promise<{ slug: string; }>; }) { const { slug } = await params; const post = await getPost(slug); if (!post) return notFound(); return (
{Array(500) .fill(0) .map((_, i) => (
))}
{Array(500) .fill(0) .map((_, i) => (
))}
); } ================================================ FILE: app/blog/blog-components/body.tsx ================================================ import React, { PropsWithChildren } from 'react'; export default function BlogBody({ children }: PropsWithChildren) { return (
{Array(500) .fill(0) .map((_, i) => (
))}
{Array(500) .fill(0) .map((_, i) => (
))}
{children}
); } ================================================ FILE: app/blog/blog-components/code.tsx ================================================ import React from 'react'; import { CodeBlock } from '@/components/ui/code-block'; export default function BlogCode({ code }: { code: string }) { return (
); } ================================================ FILE: app/blog/blog-components/decorations.tsx ================================================ import React from 'react'; export default function BlogDecorations() { return ( <> ); } ================================================ FILE: app/blog/blog-components/heading.tsx ================================================ import React from 'react'; import { ArrowLeft } from 'lucide-react'; import Link from 'next/link'; type MaxWidthOption = '5xl' | '4xl' | '3xl' | '2xl' | 'xl' | number | undefined; export type BlogHeadingProps = { category: string; categoryColorClass?: string; // e.g. 'text-sky-600' | 'text-purple-600' title: string; dateISO: string; byline?: string; // defaults to 'By Koel Labs' maxWidth?: MaxWidthOption; // default lg:max-w-4xl }; function formatDate(dateString: string): string { const date = new Date(dateString); return date.toLocaleDateString('en-US', { year: 'numeric', month: 'long', day: 'numeric', }); } function resolveMaxWidthClass(maxWidth: MaxWidthOption): string { switch (maxWidth) { case '5xl': return 'lg:max-w-5xl'; case '3xl': return 'lg:max-w-3xl'; case 'xl': return 'lg:max-w-xl'; case '2xl': return 'lg:max-w-2xl'; case '4xl': default: return 'lg:max-w-4xl'; } } export default function BlogHeading({ category, categoryColorClass = 'text-sky-600', title, dateISO, byline = 'By Koel Labs', maxWidth, }: BlogHeadingProps) { const wrapperMax = resolveMaxWidthClass(maxWidth); return (
Back to Blog

{category}

{title}

{/* {formatDate(dateISO)} {byline ? ` • ${byline}` : ''} */}
A photo of Alexander Metzger A photo of Aruna Srivastava A photo of Ruslan Mukhamedvaleev
By Koel Labs ·
); } ================================================ FILE: app/blog/blog-components/hero-image.tsx ================================================ import React from 'react'; export type BlogHeroImageProps = { src: string; alt: string; }; export default function BlogHeroImage({ src, alt }: BlogHeroImageProps) { return (
{alt}
); } ================================================ FILE: app/blog/blog-components/image.tsx ================================================ import React from 'react'; import Image from 'next/image'; import { cn } from '@/lib/styles'; export default function BlogImage({ src, alt, width, height, expanded = false, sizes, className, link, linkSide, }: { src: string; alt: string; width: number; height: number; expanded: boolean; sizes?: string; className: string; link?: string; linkSide?: 'left' | 'right'; }) { return ( ); } ================================================ FILE: app/blog/blog-components/subheading.tsx ================================================ import React, { PropsWithChildren } from 'react'; export default function BlogSubheading({ children }: PropsWithChildren) { return

{children}

; } ================================================ FILE: app/blog/blog-components/utils.ts ================================================ export function formatDate(dateString: string): string { const date = new Date(dateString); return date.toLocaleDateString('en-US', { year: 'numeric', month: 'long', day: 'numeric', }); } ================================================ FILE: app/blog/posts/building-open-source-leaderboards.tsx ================================================ // This is the individual blog post page content. It is rendered insides [slug]/page.tsx. // The metadata object is used to display the blog post on the main blog page (app/blog/page.tsx). // It will also be used to set meta tags for SEO once I get around to adding that. import type { Metadata } from '../posts'; import Link from 'next/link'; import { Source_Serif_4 } from 'next/font/google'; import BlogBody from '../blog-components/body'; import BlogHeading from '../blog-components/heading'; import BlogHeroImage from '../blog-components/hero-image'; import BlogSubheading from '../blog-components/subheading'; import BlogImage from '../blog-components/image'; import BlogDecorations from '../blog-components/decorations'; const sourceSerif = Source_Serif_4({ subsets: ['latin'], weight: ['400', '700'], }); export const metadata: Metadata = { title: 'Building Open Source Hugging Face Leaderboards', slug: 'building-open-source-leaderboards', date: '2025-01-11', published: true, image: '/images/blogLeaderboards.png', summary: "Sometimes, the best machine learning models are hidden in plain sight. During our work on phonemic transcription, we stumbled upon a specialized ginic model that had been finetuned on Facebook's XLSR-53 model using the Buckeye corpus. This discovery proved significant: Ginic performs 1.2x better than Facebook, and iterating on their approach, our model performs 2.2x better than ginic. However, finding this model was more a product of extensive searching than systematic discovery, highlighting a broader challenge in the phoneme transcription space.", category: 'Technical Report', tags: [ { text: 'Hugging Face', url: 'https://huggingface.co/KoelLabs', }, ], }; export default function PostBody() { return (


An Introduction

Sometimes, the best machine learning models are hidden in plain sight. During our work on phonemic transcription, we stumbled upon a specialized{' '} ginic model {' '} that had been finetuned on{' '} Facebook's XLSR-53 model {' '} using the Buckeye corpus. This discovery proved significant: Ginic performs 1.2x better than Facebook, and iterating on their approach, our model performs 2.2x better than ginic. However, finding this model was more a product of extensive searching than systematic discovery, highlighting a broader challenge in the phoneme transcription space that led us to build{' '} this open-source leaderboard .

The Need for Better Model Discovery and Standardized Evaluation

While leaderboards have become fundamental infrastructure in many areas of machine learning - from{' '} large language models {' '} to{' '} automatic speech recognition {' '} - the field of phonemic transcription notably lacks such standardized evaluation frameworks. This gap isn't just about missing leaderboards - it reflects a broader absence of unified evaluation standards and comprehensive survey papers that could allow researchers and practitioners to track progress and compare approaches effectively.

A Streamlined Architecture for Open Evaluation

To address this gap, we implemented a system that handles both the queue backend and the leaderboard frontend. This design prioritizes transparency and accessibility - crucial elements often missing in evaluation. Our architecture consists of two main components:

  • app.py: Handles the front-end logic using Gradio, providing a clean interface for viewing the leaderboard, checking model status, and submitting new models
  • tasks.py: Manages the back-end operations, interfacing with three JSON files in the queue directory:
    • leaderboard.json:{' '} Stores the final, processed rankings
    • tasks.json: Tracks newly submitted models
    • results.json:{' '} Contains detailed metadata for completed evaluations

Transparency was a key consideration in our design. Unlike some existing leaderboards like{' '} Open ASR {' '} that require users to request model evaluation and wait, our system automates the process. Most models can be evaluated on the whole test set within hours of submission.

Additionally, the front-end leaderboard and queue backend are visible to all Hugging Face users - a deliberate choice to promote transparency. The results file provides detailed metadata about evaluations and model outputs, allowing users to understand precisely how models perform and evaluations are conducted. that require users to request model evaluation and wait, our system automates the process.

This openness and automation represent a step toward the kind of standardized evaluation infrastructure that has helped accelerate progress in other areas of machine learning but has been notably absent in phonemic transcription.

Technical Implementation Details

Our evaluation system measures model performance using two key metrics:

  1. PER (Phoneme Error Rate): Calculated using Levenshtein distance between predicted and actual phoneme sequences
  2. PWED (Phoneme Weighted Edit Distance): A more nuanced metric that considers phonemic feature distances, implemented using the panphon library

You can read more about these evaluation methods{' '} in our blog post here .

We use the TIMIT speech corpus as our evaluation dataset, providing a standardized benchmark widely recognized in the speech recognition community. The evaluation runs on a consistent compute environment (16GB RAM, 2vCPUs) to ensure reproducibility.

We Need More of These Projects

The success of platforms like the Open LLM Leaderboard, with nearly 3,000 submissions, demonstrates the community's appetite for transparent model comparison. While some argue that gamifying model development could lead to metric gaming, we've seen how leaderboards can transform competition into collaboration. They provide standardized benchmarks, foster innovation through transparency, and create an engaging entry point for newcomers to the field. Most importantly, they help surface promising but lesser-known models that might otherwise remain undiscovered.

Looking Forward

Creating leaderboards has historically been challenging, with many templates becoming quickly outdated. Hugging Face has recently streamlined this process through their Space SDK, which is ideal for evaluating models of varying sizes and computational requirements. To create a leaderboard using their template:

  1. Navigate to{' '} https://huggingface.co/new-space
  2. Select "Space SDK" as the template type
  3. Choose "Gradio" as the framework
  4. Select "Leaderboard" from the template options
  5. You will be asked for an access token in the UI before you create the space. This can be created in{' '} your settings {' '} and needs read access for the leaderboard to function.

For evaluating collections of smaller models that don't require extensive pre-testing, our lightweight implementation offers a practical working example. It demonstrates a complete end-to-end leaderboard system while maintaining simplicity in both setup and maintenance. We've made our codebase publicly available - feel free to duplicate it for your own specialized evaluation needs or use it as a reference implementation when building more complex systems. We are actively working on adding more evaluation datasets/metrics and support for more model architectures and{' '} welcome contributions !

The path to better model evaluation shouldn't be blocked by infrastructure complexity. Whether using Hugging Face's template or our simpler architecture, the goal remains the same: making model discovery and comparison more accessible to the community.

); } ================================================ FILE: app/blog/posts/dialect-sensitivity.tsx ================================================ // This is the individual blog post page content. It is rendered insides [slug]/page.tsx. // The metadata object is used to display the blog post on the main blog page (app/blog/page.tsx). // It will also be used to set meta tags for SEO once I get around to adding that. import type { Metadata } from '../posts'; import Image from 'next/image'; import { Source_Serif_4 } from 'next/font/google'; import BlogBody from '../blog-components/body'; import BlogHeading from '../blog-components/heading'; import BlogHeroImage from '../blog-components/hero-image'; import BlogSubheading from '../blog-components/subheading'; import BlogCode from '../blog-components/code'; const sourceSerif = Source_Serif_4({ subsets: ['latin'], weight: ['400', '700'], }); export const metadata: Metadata = { title: 'Dialect Sensitivity — Why it matters for the future of language acquisition', slug: 'dialect-sensitivity', date: '2024-12-23', published: false, image: '/images/blogDialect.png', summary: 'Dialect sensitivity is a crucial aspect of language learning. It refers to the ability of a language learning platform to adapt to the specific pronunciation and accent of a learner. This is important because it allows learners to better understand and speak the language, and it can also help them avoid making mistakes that could be embarrassing or even harmful in certain contexts.', category: 'Language Learning', tags: [], }; export default function PostBody() { return (


{metadata.summary}

At Koel Labs, we use two key metrics to evaluate phonemic transcription models:

  1. Phonemic Error Rate (PER): The classic "how many mistakes did you make?" metric
  2. Weighted Phonemic Edit Distance (WPED): A smarter approach that considers how similar sounds are to each other
Why Traditional Metrics Fall Short: A Tale of Three Words

Let's say we're trying to transcribe the word "Bop". Our model could make different types of mistakes, and this is where things get interesting.

Consider two models making different predictions:

  • Model 1 predicts: "Pop"
  • Model 2 predicts: "Sop"

From a linguistics perspective, these mistakes are not created equal:

  • 'B' and 'P' are like cousins—they're both plosive bilabial consonants, made by stopping airflow with your lips. The only difference is that 'B' is voiced (your vocal cords vibrate) and 'P' isn't.
  • 'B' and 'S', on the other hand, are more like distant relatives. 'S' is a fricative alveolar consonant, made by forcing air between your tongue and the ridge behind your upper teeth—a completely different sound!

This is where traditional PER falls short. It calculates errors based on simple substitutions, deletions, and insertions. In our example:

This is like saying someone who almost hit the bullseye did just as poorly as someone who hit the wall next to the dartboard. You can imagine that this would create very misleading evaluations.

Weighted Phonemic Edit Distance

This is where WPED comes to the rescue, powered by the{' '} Panphon library . Instead of treating each phoneme as completely different or identical, it represents them as a sequence of features—things like:

  • Is it voiced?
  • Where in the mouth is it made?
  • How is the air released?

Each phoneme becomes a feature vector, something like:

When we calculate the distance between these vectors, we get a much more nuanced view:

Diagram illustrating phonemic distances
Why This Matters

When you're teaching a model to transcribe speech, you want it to understand that predicting a similar sound is better than predicting a completely different one. This is especially important because different models might use different phoneme vocabularies—some might have 40 symbols, others up to 400.

Traditional PER might unfairly favor models that happen to use the exact same phoneme set as your ground truth data, even if other models are making more linguistically sensible predictions. WPED helps level the playing field by considering phonetic similarity.

The Takeaway

By using WPED alongside traditional metrics like PER, we can better understand how well our models are really performing at phonemic transcription. It's not just about getting the exact right symbol—it's about understanding the underlying sounds of language.

As we continue to develop better speech recognition models, metrics like WPED will be crucial in helping us measure progress in a way that actually reflects linguistic reality. After all, in the world of pronunciation, being close sometimes counts for a lot more than traditional metrics might suggest!

); } ================================================ FILE: app/blog/posts/feature-extraction-deep-dive.tsx ================================================ // This is the individual blog post page content. It is rendered insides [slug]/page.tsx. // The metadata object is used to display the blog post on the main blog page (app/blog/page.tsx). // It will also be used to set meta tags for SEO once I get around to adding that. import type { Metadata } from '../posts'; import BlogBody from '../blog-components/body'; import BlogHeading from '../blog-components/heading'; import BlogHeroImage from '../blog-components/hero-image'; import BlogSubheading from '../blog-components/subheading'; import BlogImage from '../blog-components/image'; import BlogDecorations from '../blog-components/decorations'; import BlogCode from '../blog-components/code'; import { Source_Serif_4 } from 'next/font/google'; const sourceSerif = Source_Serif_4({ subsets: ['latin'], weight: ['400', '700'], }); export const metadata: Metadata = { title: "The Underlying Intuition of Wav2Vec2's CNN", slug: 'wav2vec2-feature-extractor-intuition', date: '2025-10-19', published: true, image: '/blog/cnn/blogCNN.png', summary: "Typically, every explanation of the Wav2vec2 architecture begins with the iconic diagram, but without extensive background, it is hard to know what the cones labeled as the CNN are really doing. What does it actually mean to extract features from audio? Let's find a stronger visual intuition for this.", category: 'Technical Report', tags: [ { text: 'Audio Processing', url: '/tags/audio-processing' }, { text: 'Wav2Vec2', url: '/tags/wav2vec2' }, { text: 'CNN', url: '/tags/cnn' }, ], }; export default function PostBody() { return (


An Introduction

{metadata.summary}

Background

For those new to audio processing, audio is incredibly dense compared to other forms of data like text.

You can imagine sound as a continuous, smooth curve, but because we want to discretize this (represent audio numerically), we take snapshots of this wave at regular intervals. How frequently we take these snapshots in one second of sound is the sampling rate. This sampling rate decides how much information we have to store in audio.

The typical sampling rate of 16kHz represents processing 16 thousands values per second of audio. That's a lot of information! So how can the Wav2Vec2 architecture handle all of this information?

The Feature Extractor

The Feature Extractor, also called the Convolutional Neural Network (CNN), aims to extract high-level features while compressing a very dense temporal dimension . Think of it as when you want to do a quick portrait of someone: you want to capture their distinguishable facial features without spending too much time capturing every detail.

Kids asked to draw their fathers in 1949

The Feature Extractor will take 1/50th of a second (20ms) at a time and use the previous 1/200th of a second (5ms) to give itself some context. So lets take a closer look at how that single 25ms chunk gets processed.

Audio Stream to Representation

This first 25ms chunk of raw audio starts as a chunk of 400 values (seconds × sampling-rate = 0.025 × 16,000 = 400 samples) representing the audio waveform. Just a simple list like{' '}

Think of an audio waveform representing tiny vibrations of air molecules that result in changes in air pressure. While very cool, air pressure changes do not communicate any clear patterns in acoustic signals like pitch, timbre, and other audio characteristics. It would be much better to transform these 400 temporal samples into 512 higher-level features that capture these different acoustic properties across the entire 25ms window.

So that's what the feature extractor does, starting with the first layer. It takes this single monochannel audio input and projects to 512 dimensional space via 512 channels. And to load each of these channels, we look at a 1 × 10 window and take the dot product against a 10 × 1 kernel to compute a single value. We then slide over by a stride of 5 samples for the next 1 × 10 window. After the first window processes 10 unseen samples, the remaining 1 × 10 windows process 5 seen + 5 unseen samples at a time. So to find how many windows span the 400 samples, we consider just what are the unseen samples: 78 windows process 5 unseen samples at a time and the very first window processes 10 unseen samples, giving us 79 windows for this layer.

First layer convolution: kernel_size = 10, stride = 5

The output dimension should feel intuitive but if you don't want to use your brain to figure out your output dimension of a single channel you can use this nice formula:

Okay, now we have our first layer!

First Layer Convolution

Remember, we want to reduce the temporal dimension, so let's apply another convolution! To do this, the next layer will stride every second value of a 3-sample-window of the first layer. So basically look at every second value. Can you guess what the next block will look like?

Using our little formula…

39 values across 512 channels, nice! Layers 1–4 have the same kernel size and stride so lets just repeat this…

19 -> 9 -> 4`} />

Awesome! Now the last 2 layers have stride and kernel size 2..

1`} />

Wow. We now just have a single value across 512 channels.

Throughout this process, activation functions like GeLU add non-linearity between each layer, allowing the network to learn complex patterns.

Single Chunk Processed by CNN

Why not just directly compress from 400 samples to 512 features in one step?

Jumping directly would require learning 204,800 parameters in one massive linear transformation, which is both hard to optimize and limited in what patterns it can capture. Multiple smaller layers with non-linear activations between them train more reliably and can build complex representations by combining simpler patterns. A chef that takes time to make components of a dish from scratch will produce a much better, complex dish than any microwave meal.

So that's how we process a single 25ms chunk of audio - transforming 400 raw samples into a rich 512-dimensional feature vector. Now we slide this entire process across the audio stream, moving 320 samples at a time...

All of that has been nicely wrapped in a few lines of code:

We can also grab the attention masks to

Feature Projection

Okay we are almost done, the Transformer (yellow block of the first Diagram) just requires a much larger dimension input. So we will just apply a linear projection to reach 1024 features. Lastly, we make sure to mask out the features we don't care about by using the attention masks.

We did it! 🎉 You just walked through exactly what the feature extractor (CNN) does in the Wav2Vec2 architecture!

Takeaways

After walking through each convolutional layer, you can probably understand how much computation and time this can consume. To be able to get these audio models to run in real time requires some careful optimization which is critical for many of our products.{' '} Learn more about streaming optimizations here!

Part 2:{' '} The Transformer!

); } ================================================ FILE: app/blog/posts/new-look-for-koel-labs.tsx ================================================ // This is the individual blog post page content. It is rendered insides [slug]/page.tsx. // The metadata object is used to display the blog post on the main blog page (app/blog/page.tsx). // It will also be used to set meta tags for SEO once I get around to adding that. import type { Metadata } from '../posts'; import { Source_Serif_4 } from 'next/font/google'; import BlogBody from '../blog-components/body'; import BlogHeading from '../blog-components/heading'; import BlogHeroImage from '../blog-components/hero-image'; import BlogSubheading from '../blog-components/subheading'; import BlogImage from '../blog-components/image'; import BlogDecorations from '../blog-components/decorations'; const sourceSerif = Source_Serif_4({ subsets: ['latin'], weight: ['400', '700'], }); export const metadata: Metadata = { title: 'A New Look for Koel Labs', slug: 'new-look-for-koel-labs', date: '2025-11-02', published: true, image: '/images/blog/new-look/blogNewLook.png', summary: 'You might have noticed that we’ve given Koel Labs a fresh new look. This change is intended to better align us with our mission of pioneering inclusive speech technology, with our goals as a research-focused startup, and with our belief in openly sharing our work.', category: 'Announcement', tags: [ { text: 'Website', url: 'https://koellabs.com', }, ], }; export default function PostBody() { return (


A Fresh New Look

You might have noticed that we’ve given Koel Labs a fresh new look. This change is intended to better align us with our mission of pioneering inclusive speech technology, with our goals as a research-focused startup, and with our belief in openly sharing our work.

Over the last few months, we’ve built a new state-of-the-art model for phonetic transcription, published multiple datasets, and developed internal exploratory tools, all while writing papers and showcasing our research.

Explore our website and sign up for the waitlist to be the first to experience our previews!

); } ================================================ FILE: app/blog/posts/open-source-announcement.tsx ================================================ // This is the individual blog post page content. It is rendered insides [slug]/page.tsx. // The metadata object is used to display the blog post on the main blog page (app/blog/page.tsx). // It will also be used to set meta tags for SEO once I get around to adding that. import type { Metadata } from '../posts'; import BlogBody from '../blog-components/body'; import BlogHeading from '../blog-components/heading'; import BlogHeroImage from '../blog-components/hero-image'; import BlogSubheading from '../blog-components/subheading'; import BlogDecorations from '../blog-components/decorations'; import { Source_Serif_4 } from 'next/font/google'; const sourceSerif = Source_Serif_4({ subsets: ['latin'], weight: ['400', '700'], }); export const metadata: Metadata = { title: 'Hello World! — Our Open Source Project Launch', slug: 'open-source-project', date: '2024-12-23', published: true, image: '/images/blogOpenSource.png', summary: ` At Koel Labs, our goal is to make pronunciation learning more accessible and inclusive. To represent the diversity of language and dialects, we're excited to announce that everything from model weights and training code to datasets, research papers, and the frontend UI is officially open source! `, category: 'Announcement', tags: [ { text: 'GitHub', url: 'https://github.com/KoelLabs', }, { text: 'Hugging Face', url: 'https://huggingface.co/KoelLabs', }, ], }; export default function PostBody() { return (


An Introduction

{metadata.summary}

The problem of pronunciation learning

Currently, 48% of foreign speakers are anxious about their accents [1]. Pronunciation is one of the most complex parts of learning a language. It's difficult to hear the difference between what you're saying and what you should be saying (sometimes, it's impossible without a teacher, which is not affordable for many). Once you hear the difference, it is also super hard to learn to make sounds you've never made in your native language.

Technology can bridge this gap; only existing language learning tools do not value the diversity of languages and dialects. A good solution should be able to understand and teach any accent, not just define a "standard." Moreover, the feedback should be nuanced, actionable, and personalized based on your native language background, not just a human-void ASR system saying "yes" if it recognizes each word you're saying.

We want to collect datasets that represent the diversity of languages and dialects and make the entire process of training and evaluating models and then interpreting the results to surface explainable feedback to users reflect the diversity of backgrounds that language learners have.

For us, as immigrants and children of immigrants, pronunciation learning has a special meaning because we see not only the importance of fitting into our new communities but also of fitting into our extended families.

For others, pronunciation learning has other meanings, and we want to make sure that our tools can help everyone. This is why we're making the project open source — it allows for discussion and ideas from a worldwide audience.

Current progress and plans

We were fortunate to join the{' '} 2024 Mozilla Builders program . This has provided us with the resources to train state-of-the-art audio models for the first version of our tool targeting foreign English speakers. We are in the process of publishing an academic paper on our approach. We are planning on continuing to iterate on the pipeline to support more languages, dialects, and use cases such as speech pathology for speech-impaired children.

Our web application is not yet ready, but we are gearing up for a closed beta launch soon. In the meantime, check out our models on{' '} Hugging Face {' '} and training code on{' '} GitHub .

How do I get involved?

If your institution is interested in collaborating, please reach out to us at{' '} info@koellabs.com . We have already partnered with several leading HCI, Phonology, and Linguistics researchers from institutions like CMU, BCU, and UW.

If you are a developer, designer, or just interested in language learning, please partake in the discussion on our GitHub after consulting our{' '} contribution guidelines . Any feedback is welcome. User feedback is especially important to us, so if you are open to joining the beta testing program, please sign up{' '} here .


[1] Babbel Anxiety Study. Retrieved from{' '} https://www.babbel.com/en/magazine/accent-anxiety-study

); } ================================================ FILE: app/blog/posts/phonemic-transcription-metrics.tsx ================================================ // This is the individual blog post page content. It is rendered insides [slug]/page.tsx. // The metadata object is used to display the blog post on the main blog page (app/blog/page.tsx). // It will also be used to set meta tags for SEO once I get around to adding that. import type { Metadata } from '../posts'; import { Source_Serif_4 } from 'next/font/google'; import BlogBody from '../blog-components/body'; import BlogHeading from '../blog-components/heading'; import BlogHeroImage from '../blog-components/hero-image'; import BlogSubheading from '../blog-components/subheading'; import BlogCode from '../blog-components/code'; import BlogDecorations from '../blog-components/decorations'; import BlogImage from '../blog-components/image'; const sourceSerif = Source_Serif_4({ subsets: ['latin'], weight: ['400', '700'], }); // formatDate moved to blog primitives; using internal from heading export const metadata: Metadata = { title: 'A Deep Dive into Phonemic Transcription Metrics', slug: 'phonemic-transcription-metrics', date: '2024-12-30', published: true, image: '/images/blogPhonetic.png', summary: 'The International Phonetic Alphabet (IPA) is like the Swiss Army knife of pronunciation—it gives us precise symbols to represent every sound humans make in language. In recent years, predicting these phonemic transcriptions from audio has become a popular machine learning task. But how do we calculate the accuracy of these models?', category: 'Technical Report', tags: [ { text: 'Leaderboard', url: 'https://huggingface.co/spaces/KoelLabs/IPA-Transcription-EN', }, ], }; export default function PostBody() { return (

An Introduction

{metadata.summary}

At Koel Labs, we use two key metrics to evaluate phonemic transcription models:

  1. Phonemic Error Rate (PER): The classic "how many mistakes did you make?" metric
  2. Weighted Phonemic Edit Distance (WPED): A smarter approach that considers how similar sounds are to each other
Why Traditional Metrics Fall Short: A Tale of Three Words

Let's say we're trying to transcribe the word "Bop". Our model could make different types of mistakes, and this is where things get interesting.

Consider two models making different predictions:

  • Model 1 predicts: "Pop"
  • Model 2 predicts: "Sop"

From a linguistics perspective, these mistakes are not created equal:

  • 'B' and 'P' are like cousins—they're both plosive bilabial consonants, made by stopping airflow with your lips. The only difference is that 'B' is voiced (your vocal cords vibrate) and 'P' isn't.
  • 'B' and 'S', on the other hand, are more like distant relatives. 'S' is a fricative alveolar consonant, made by forcing air between your tongue and the ridge behind your upper teeth—a completely different sound!

This is where traditional PER falls short. It calculates errors based on simple substitutions, deletions, and insertions. In our example:

This is like saying someone who almost hit the bullseye did just as poorly as someone who hit the wall next to the dartboard. You can imagine that this would create very misleading evaluations.

Weighted Phonemic Edit Distance

This is where WPED comes to the rescue, powered by the{' '} Panphon library . Instead of treating each phoneme as completely different or identical, it represents them as a sequence of features—things like:

  • Is it voiced?
  • Where in the mouth is it made?
  • How is the air released?

Each phoneme becomes a feature vector, something like:

When we calculate the distance between these vectors, we get a much more nuanced view:

Why This Matters

When you're teaching a model to transcribe speech, you want it to understand that predicting a similar sound is better than predicting a completely different one. This is especially important because different models might use different phoneme vocabularies—some might have 40 symbols, others up to 400.

Traditional PER might unfairly favor models that happen to use the exact same phoneme set as your ground truth data, even if other models are making more linguistically sensible predictions. WPED helps level the playing field by considering phonetic similarity.

The Takeaway

By using WPED alongside traditional metrics like PER, we can better understand how well our models are really performing at phonemic transcription. It's not just about getting the exact right symbol—it's about understanding the underlying sounds of language.

As we continue to develop better speech recognition models, metrics like WPED will be crucial in helping us measure progress in a way that actually reflects linguistic reality. After all, in the world of pronunciation, being close sometimes counts for a lot more than traditional metrics might suggest!

); } ================================================ FILE: app/blog/posts/transformer-deep-dive.tsx ================================================ import type { Metadata } from '../posts'; import BlogBody from '../blog-components/body'; import BlogHeading from '../blog-components/heading'; import BlogHeroImage from '../blog-components/hero-image'; import BlogSubheading from '../blog-components/subheading'; import BlogDecorations from '../blog-components/decorations'; import BlogCode from '../blog-components/code'; import { Source_Serif_4 } from 'next/font/google'; import BlogImage from '../blog-components/image'; const sourceSerif = Source_Serif_4({ subsets: ['latin'], weight: ['400', '700'], }); export const metadata: Metadata = { title: 'The Underlying Intuition of Wav2Vec2’s Transformer', slug: 'transformer-deep-dive', date: '2025-10-25', published: true, image: '/blog/transformer/blogTransformer.png', summary: 'Wav2Vec2’s Transformer handles encoded audio features and aligns them to text. Building on our blog post about the feature extractor, this post dives into positional encodings tailored to audio and how CTC loss solves alignment without frame-level labels.', category: 'Technical Report', tags: [ { text: 'Audio Processing', url: '/tags/audio-processing' }, { text: 'Wav2Vec2', url: '/tags/wav2vec2' }, { text: 'Transformer', url: '/tags/transformer' }, ], }; export default function PostBody() { return (


Typically, every explanation of the{' '} Wav2vec2 architecture {' '} begins with this iconic diagram (Baevski et. al). But without extensive background, it is hard to know how this yellow block compares to the traditional Transformer.

The Wav2Vec2 architecture distinguishes itself from other Transformer-based architectures largely in processing audio input and aligning the output. In our last article, we discuss Wav2Vec2's Feature Extractor: turning raw audio into feature vectors. Now we'll trace how Wav2Vec2 encodes positional information of sound in the Transformer and how it aligns its predictions to text.

Wav2Vec2 Transformer: Processing Input

Also called the Context Network, the Transformer processes feature vectors using self- attention , which lets each feature attend to all other features in the sequence.

Visual glimpse into attention matrix computation of self-attention

But a critical challenge is that attention naturally ignores the order of the input sequence.

When we take the dot product of the query and key vectors, a different ordering of the input sequence can produce equivalent vectors. This goes back to the dot product being commutative.

We can look at another commutative operation as a toy example: addition. You can see that there is{' '} no way to distinguish a 1 in the very first position from a 1 in the very last position, {' '} the attention mechanism understands the sequences equivalently.

Why is this a problem?

The sounds we produce are often influenced by surrounding sounds. For example, in many dialects, a vowel before the “L” in “bottle” gets inserted, to create the syllabic “ ” : “bottal”. Understanding each sound individually would likely mean that these syllabic sounds like “” would be poorly predicted by the model. Surrounding consonants and vowels influence our speech making embeddings that encode these temporal relationships essential.

Positional Embeddings

Before we understand how Wav2Vec2 handles position, let's look at various attempts to develop positional embeddings. We will start with absolute positional embeddings.

The simplest approach is to give each position in the sequence a unique vector, almost like a name tag:

Position 0 gets vector A, Position 1 gets vector B, Position 2 gets vector C, …

But hardcoded positions are limiting. You can only have as many tags as the{' '} longest training example so generalizing to unseen, longer lengths during inference is difficult.

Sinusoidal Positional Embeddings

In an attempt to develop a method that could handle unseen sequence lengths, the authors of{' '} Attention Is All You Need {' '} introduced sinusoidal embeddings.

They use sine and cosine functions on even and odd positions, denoted as{' '} PE(pos, 2i) and PE(pos, 2i+1), respectively.

Break: Story Time

For an intuition of sinusoidal embeddings, let me tell you a story.

My friend Helen was attending school in Germany and told me that they did terribly in English class, she had gotten a 4! I laughed and said she was being dramatic: “A 4 isn’t bad at all!” To make her feel better, I told her: “I got a D in science”.

But then, she told me that a 5 was the worst grade you could get. Turns out, I was doing about as badly in science as she was in English…

Funny enough, our poor academics illustrate the sine function quite well. The sine function allows for strong local understanding. Within my American school system, my classmates all knew how grades compared: an A is better than a B, a B is better than a C. Within Helen’s German system, her peers also understood the relative order. So global distance is harder to understand but local relative distance can be well understood.

For a model that cares more about local distance, this function is very practical. Instead of giving every position its own unique number, which would quickly become unmanageable, they use a smaller set of values along a few smooth repeating functions. Note that the authors added cosine for additional expressability so more numbers could be represented but it follows the same principle as the sine function.

Sine and cosine functions for arbitrarily chosen dimension 16

Funny enough,{' '} Attention Is All You Need {' '} spent time adding in sinusoidal embeddings for it to perform identically to simple indexing.

The challenge appears not to be formulating positional understanding but preserving it.{' '} Yan et al. {' '} finds that relative positional understanding in the input embedding gets destroyed during the attention mechanism when projected through the weight matrices (W_Q and W_K).

https://arxiv.org/pdf/1911.04474

In the Figure, (Yan et al.), distance information is preserved in the raw positional encodings (blue line) as shown by the symmetrical peak where positions close to each other (in the center) have a higher dot product . However, after multiplication by the attention weight matrices, we get seemingly random patterns (orange/green lines) that no longer clearly encode distance.

Limitations of Absolute Positional Embeddings

Both methods of absolute positional embeddings are limited by the fact that you can only recover positional information by some global lookup table telling you the sinusoidal values and simple indices which every feature corresponds to.

Instead, a good way to find the positional information of a feature could be to bake it into the feature itself. Positional information would be innate to the feature like a puzzle piece. Each piece has grooves from the neighboring pieces that inform you where it should go. Even though the puzzle piece does not have a number indicating its position, it can be determined where the puzzle piece belongs using other neighboring pieces.

Wav2Vec2 positional encodings

Wav2Vec2 accomplishes positional understanding by capturing local dependencies through{' '} convolutions at the input level , before features reach the transformer ( methods like RoPE achieve similar goals by modifying the attention mechanism itself, but that's a story for{' '} another pos t ).

As we covered in our{' '} previous post {' '} on the feature extractor (CNN), convolutions are used to naturally encode positions by using a sliding window over adjacent frames. They effectively represent relative local patterns like "a pattern across frames t-1, t, and t+1" rather than absolute global ones like "frame t with a position tag."

Likewise, convolutions can be applied to the output feature vectors after linear projection from the feature extractor for positional understanding.

The wav2vec2 architecture uses grouped convolutions, where different groups specialize in different temporal relationships. Some might focus on quick changes between sounds, while others capture longer patterns like rhythm and intonation.

So now the positional information survives as it is intrinsic to what the feature represents. If a feature encodes "a rising pitch across three frames," that relational pattern persists through linear transformations.

From here, every 25 millisecond frame processed has positional information that the Transformer processes, outputting a single token prediction.

But this creates obvious problems: people don’t speak one character every 20 milliseconds! For example, the “o” in “hello” probably takes ~1/10th a second which is 100 milliseconds, much more than 20 milliseconds.

Finding what parts of the audio correspond to the predicted transcription is quite challenging. The audio datasets the model is trained on will (most of the time) not include timing information that says which word or syllable occurs where in the audio file because annotating this is super labor intensive. How will we know how to align the sequence to text?

CTC Loss: Aligning The Output

Training with CTC Loss:

Goal: Train the model to assign high probability to paths that match the target sequence as shown below.

Note that we will “collapse” sequences by merging repeated characters and dropping the ε character:

The challenging part of this task is that there are many ways to predict this distribution such that you collapse to the correct target sequence. For example, “CAAB” and “εCAεB” both collapse to “CAB”. So how do we train a model with a multitude of possible sequences?.

Your intuition may be to sum across the probabilities of paths that produce correct sequences.

Not a bad idea! But this will just give us some arbitrary number like 5.76 which is hard to know how well the model is performing.

In an ideal world where the model has 100% certainty of the path that produces a correct sequence, it should receive 0 penalty. Likewise, if the model is very uncertain but still produces a correct sequence, it should receive more penalty.

It is like a multiple choice exam, two students can score well but one may have guessed more than another. If we know each student’s own personal certainty can we write a function that reflects this?

Easy! We use the function -log(x). For certainty of 1, we have penalty -log(1) = 0. Likewise for low certainty like 0.2, we have penalty -log(0.2) = 0.69

Nice! This is a good loss function for training.

Efficiency

But a brute force approach to sum path probabilities as illustrated above would be very slow. Dynamic programming can be used instead where we use memoization to store total probabilities at each timestep. Here is what the DP table could look like:

Inference with CTC Loss:

Goal: Given the trained model’s outputs, find the most likely text sequence.

We could simply use{' '} greedy search {' '} to grab the maximum probability at each timestep, but greedy makes locally optimal choices that can miss the globally best path. A slightly lower probability token now might enable much higher probabilities later. So, a modified beam search is used to optimally find the best sequence even when you have multiple possible alignments mapping to the same output.

So this modified beam search on the CTC head outputs allows us to find our final output!

Conclusion

To summarize, we started with a fundamental problem: attention mechanisms don’t naturally understand order. To fix this, Wav2Vec2 uses convolutional positional encodings that capture local context in sound rather than absolute positions (crucial for variable-length audio and how adjacent sounds influence each other).

Then we tackled the alignment challenge. Without knowing exactly when each character appears in the audio, CTC provides an elegant solution: consider all possible alignments, predict at regular 20ms intervals, use blank tokens for silence, and collapse duplicates during decoding.

By focusing on relationships rather than absolute positions, and probabilities rather than hard alignments, Wav2Vec2 can learn from audio at scale.

); } ================================================ FILE: app/contact/page.tsx ================================================ 'use client'; import CTA from '@/components/sections/cta'; import Footer from '@/components/sections/footer'; import Header from '@/components/ui/header'; import { Button } from '@/components/ui/base/button'; import { Input } from '@/components/ui/base/input'; import { Label } from '@/components/ui/base/label'; import { Textarea } from '@/components/ui/base/textarea'; import { useToast } from '@/hooks/use-toast'; import { useCharacterLimit } from '@/lib/use-character-limit'; import { AtSign, BookDashed, User } from 'lucide-react'; import { useState, useCallback } from 'react'; import { GoogleReCaptchaProvider, useGoogleReCaptcha } from 'react-google-recaptcha-v3'; function ContactForm() { const { toast } = useToast(); const maxLength = 500; const [value, setValue] = useState(''); const { characterCount, handleChange, maxLength: limit } = useCharacterLimit({ maxLength }); // setup reCaptcha v3 (background score) const { executeRecaptcha } = useGoogleReCaptcha(); // event handler for reCaptcha verification on form submission const submitFormWithReCaptchaVerification = useCallback( async (formData: FormData) => { if (!executeRecaptcha) { toast({ title: 'Error', description: 'ReCAPTCHA not yet loaded. Cannot verify that you are a human, please wait a bit and try again. Double check you are not using a VPN or adblocker if the problem persists and, if all else fails, reach out via email.', status: 'error', }); return; } const token = await executeRecaptcha('contactFormSubmit'); const res = await fetch( `/api/submitGoogleFormWithReCaptcha?formName=contact&token=${token}`, { method: 'POST', body: formData, }, ); if (res.ok) { toast({ title: 'Submitted', description: 'Thank you for contacting us!', }); return true; } else { res.text().then(console.error); toast({ title: 'Error', description: 'Failed to verify your humanity. Make sure to disable VPNs and adblockers, double check you are not a robot, and email us if the issue persists.', status: 'error', }); return false; } }, [executeRecaptcha], ); return (

Contact Form

{/* Ready to start learning? */} Let's get in touch

We're here to help you with any questions, concerns, or feedback you may have.

{ e.preventDefault(); const target = e.currentTarget; const formData = new FormData(target); submitFormWithReCaptchaVerification(formData).then(success => { if (success) { setValue(''); target.reset(); } }); }} className="space-y-2" >