summaryrefslogtreecommitdiff
path: root/ui_security_inventory_23_parsing.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'ui_security_inventory_23_parsing.ipynb')
-rw-r--r--ui_security_inventory_23_parsing.ipynb109
1 files changed, 77 insertions, 32 deletions
diff --git a/ui_security_inventory_23_parsing.ipynb b/ui_security_inventory_23_parsing.ipynb
index aef55ec..297203f 100644
--- a/ui_security_inventory_23_parsing.ipynb
+++ b/ui_security_inventory_23_parsing.ipynb
@@ -10,7 +10,9 @@
"import numpy as np\n",
"import pandas as pd\n",
"import yfinance as yf\n",
- "import requests"
+ "import requests\n",
+ "import concurrent.futures\n",
+ "import json"
]
},
{
@@ -49,6 +51,7 @@
"op_df.insert(9, 'Industry', pd.NA)\n",
"op_df.insert(10, 'Private Placement', False)\n",
"op_df.insert(11, 'Ticker', pd.NA)\n",
+ "op_df.insert(11, 'Info', object)\n",
"# op_df = op_df.insert(7, 'Bank', pd.NA)\n",
"op_df.head()"
]
@@ -305,13 +308,12 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "f2e232f5",
+ "id": "bc68a7ab",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
- "company_name_to_ticker = dict()\n",
- "for name in company_names:\n",
+ "def get_ticker(name):\n",
" try:\n",
" # try to get the ticker\n",
" ticker = getTicker(name)\n",
@@ -321,9 +323,16 @@
" short_name = name.split(' ')[0]\n",
" ticker = getTicker(short_name)\n",
" except:\n",
- " # no ticker could be found, probably a private company, check to make sure\n",
+ " # no ticker could be found, probably a private company, check by hand to make sure\n",
" ticker = 'NO_TICKER_FOUND'\n",
- " company_name_to_ticker[name] = ticker"
+ " return (name, ticker)\n",
+ "\n",
+ "company_name_to_ticker = dict()\n",
+ "with concurrent.futures.ThreadPoolExecutor() as executor:\n",
+ " futures = [executor.submit(get_ticker, name) for name in company_names]\n",
+ " for future in concurrent.futures.as_completed(futures):\n",
+ " name, ticker = future.result()\n",
+ " company_name_to_ticker[name] = ticker"
]
},
{
@@ -337,76 +346,112 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "d80dca43",
+ "id": "e8f64024",
"metadata": {},
"outputs": [],
"source": [
- "company_name_to_ticker"
+ "for i in cb_df.index:\n",
+ " try:\n",
+ " cb_df.at[i,'Ticker'] = company_name_to_ticker[cb_df.at[i,'Company']]\n",
+ " except:\n",
+ " assert cb_df.at[i,'Company'] == 'Corporate Bonds', f\"Expected Cororate Bonds, got {cb_df.at[i,'Company']}\""
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "921c344d",
+ "id": "04e49491",
"metadata": {},
"outputs": [],
"source": [
- "company_names"
+ "cb_df.head()"
]
},
{
- "cell_type": "code",
- "execution_count": null,
- "id": "7abeab84",
+ "cell_type": "markdown",
+ "id": "2342f360",
"metadata": {},
- "outputs": [],
"source": [
- "company_name_to_ticker[cb_df.at[104,'Company']]"
+ "## Get info from ticker "
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "e8f64024",
+ "id": "6fe3d0df",
"metadata": {},
"outputs": [],
"source": [
- "for i in cb_df.index:\n",
- " try:\n",
- " cb_df.at[i,'Ticker'] = company_name_to_ticker[cb_df.at[i,'Company']]\n",
- " except:\n",
- " assert cb_df.at[i,'Company'] == 'Corporate Bonds'"
+ "def get_info_from_ticker(ticker):\n",
+ " # Search for the company on Yahoo Finance\n",
+ " search_results = yf.Tickers(ticker)\n",
+ " return search_results.tickers[ticker].info"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "04e49491",
- "metadata": {},
+ "id": "9ed7c317",
+ "metadata": {
+ "scrolled": true
+ },
"outputs": [],
"source": [
- "cb_df.head()"
+ "%%time\n",
+ "def get_info(name):\n",
+ " try:\n",
+ " ticker = company_name_to_ticker[name]\n",
+ " info = get_info_from_ticker(ticker)\n",
+ " except:\n",
+ " info = 'No Info Found'\n",
+ " return (name, info)\n",
+ "\n",
+ "## use parallelization to speed up this process\n",
+ "company_info_dict = dict()\n",
+ "with concurrent.futures.ThreadPoolExecutor() as executor:\n",
+ " futures = [executor.submit(get_info, name) for name in company_names]\n",
+ " for future in concurrent.futures.as_completed(futures):\n",
+ " name, info = future.result()\n",
+ " company_info_dict[name] = info"
]
},
{
"cell_type": "markdown",
- "id": "2342f360",
+ "id": "180e9785",
"metadata": {},
"source": [
- "## Get info from ticker "
+ "##### Link Info to Company, saved as a json dump in the dataframe"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "6fe3d0df",
+ "id": "570e70b3",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "for i in cb_df.index:\n",
+ " if cb_df.at[i,'Company'] == 'Corporate Bonds':\n",
+ " continue\n",
+ " if company_info_dict[cb_df.at[i,'Company']] is None:\n",
+ " continue\n",
+ " info_dict = dict(company_info_dict[cb_df.at[i,'Company']])\n",
+ " json_str = json.dumps(my_dict)\n",
+ " cb_df.at[i,'Info'] = json_str\n",
+ "\n",
+ "# assert cb_df.at[i,'Company'] == 'Corporate Bonds', f\"Expected Cororate Bonds, got {cb_df.at[i,'Company']}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3bd1606d",
"metadata": {},
"outputs": [],
"source": [
- "def get_info_from_ticker(ticker):\n",
- " # Search for the company on Yahoo Finance\n",
- " search_results = yf.Tickers(company_name)\n",
- " return search_results.tickers['T'].info"
+ "cb_df.head()"
]
}
],
@@ -426,7 +471,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.9"
+ "version": "3.10.6"
},
"vscode": {
"interpreter": {