{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2022-06-25T08:55:39.339410Z", "start_time": "2022-06-25T08:55:38.522277Z" } }, "outputs": [], "source": [ "import os\n", "import re\n", "from tqdm import tqdm\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2022-06-25T08:55:45.550205Z", "start_time": "2022-06-25T08:55:45.521302Z" } }, "outputs": [], "source": [ " # Load data and set labels\n", "data_complaint = pd.read_csv('data/complaint1700.csv')\n", "data_complaint['label'] = 0\n", "data_non_complaint = pd.read_csv('data/noncomplaint1700.csv')\n", "data_non_complaint['label'] = 1\n", "\n", "# Concatenate complaining and non-complaining data\n", "data = pd.concat([data_complaint, data_non_complaint], axis=0).reset_index(drop=True)\n", "\n", "# Drop 'airline' column\n", "data.drop(['airline'], inplace=True, axis=1)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2022-06-25T08:55:53.310710Z", "start_time": "2022-06-25T08:55:53.295841Z" } }, "outputs": [ { "data": { "text/html": [ "
| \n", " | id | \n", "tweet | \n", "label | \n", "
|---|---|---|---|
| 2579 | \n", "82091 | \n", "@AlaskaAir @RSherman_25 Thank you so much!! Ca... | \n", "1 | \n", "
| 657 | \n", "147575 | \n", "@DeltaAssist hi. I lost my sunglasses on a fli... | \n", "0 | \n", "
| 1971 | \n", "23890 | \n", "Flights to #PuertoRico booked on @JetBlue! Can... | \n", "1 | \n", "
| 3312 | \n", "160070 | \n", "@united Do you offer open-ended tickets? CLT-B... | \n", "1 | \n", "
| 1034 | \n", "63946 | \n", "So @AmericanAir I'm going to need you all to g... | \n", "0 | \n", "