sec-data-fetcher
Advanced tools
Comparing version 1.0.1 to 1.0.2
@@ -36,3 +36,7 @@ "use strict"; | ||
} | ||
// Extract tables from a URL | ||
const filingUrl = 'https://www.sec.gov/Archives/edgar/data/0000320193/0000320193-21-000010.txt'; | ||
const tablesFromUrl = yield secClient.extractTablesFromFilingUrl(filingUrl); | ||
//console.log('Extracted Tables from URL:', tablesFromUrl); | ||
}))(); | ||
//# sourceMappingURL=example.js.map |
@@ -60,2 +60,20 @@ export interface CompanyTicker { | ||
/** | ||
* Fetches the SEC filing content from a given URL. | ||
* @param url - The URL of the SEC filing. | ||
* @returns The raw HTML content of the filing. | ||
*/ | ||
fetchFiling(url: string): Promise<string>; | ||
/** | ||
* Extracts tables from a given SEC filing URL. | ||
* @param url - The URL of the SEC filing. | ||
* @returns An array of tables extracted from the filing. | ||
*/ | ||
extractTablesFromFilingUrl(url: string): Promise<Array<Array<Array<string>>>>; | ||
/** | ||
* Extracts tables from the provided HTML content of an SEC filing. | ||
* @param filingContent - The raw HTML content of the SEC filing. | ||
* @returns An array of tables extracted from the content. | ||
*/ | ||
extractTablesFromContent(filingContent: string): Array<Array<Array<string>>>; | ||
/** | ||
* Parses SEC filing content from a string into an object. | ||
@@ -62,0 +80,0 @@ * @param content - The SEC filing content as a string. |
@@ -17,2 +17,3 @@ "use strict"; | ||
const utils_1 = require("./utils"); | ||
const tableParser_1 = require("./parsers/tableParser"); | ||
class SECClient { | ||
@@ -122,2 +123,44 @@ /** | ||
/** | ||
* Fetches the SEC filing content from a given URL. | ||
* @param url - The URL of the SEC filing. | ||
* @returns The raw HTML content of the filing. | ||
*/ | ||
fetchFiling(url) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
try { | ||
const response = yield this.http.get(url, { | ||
headers: { | ||
'User-Agent': this.userAgent, | ||
'Accept-Encoding': 'gzip, deflate', | ||
Host: 'www.sec.gov', | ||
}, | ||
}); | ||
return response.data; // The raw HTML of the SEC filing | ||
} | ||
catch (error) { | ||
throw new Error(`Failed to fetch SEC filing from URL: ${url}. Error: ${error}`); | ||
} | ||
}); | ||
} | ||
/** | ||
* Extracts tables from a given SEC filing URL. | ||
* @param url - The URL of the SEC filing. | ||
* @returns An array of tables extracted from the filing. | ||
*/ | ||
extractTablesFromFilingUrl(url) { | ||
return __awaiter(this, void 0, void 0, function* () { | ||
const filingContent = yield this.fetchFiling(url); // Fetch the raw HTML content | ||
const tables = (0, tableParser_1.extractTables)(filingContent); // Extract tables using the parsing function | ||
return tables; | ||
}); | ||
} | ||
/** | ||
* Extracts tables from the provided HTML content of an SEC filing. | ||
* @param filingContent - The raw HTML content of the SEC filing. | ||
* @returns An array of tables extracted from the content. | ||
*/ | ||
extractTablesFromContent(filingContent) { | ||
return (0, tableParser_1.extractTables)(filingContent); // Extract tables from the provided HTML content | ||
} | ||
/** | ||
* Parses SEC filing content from a string into an object. | ||
@@ -124,0 +167,0 @@ * @param content - The SEC filing content as a string. |
{ | ||
"name": "sec-data-fetcher", | ||
"version": "1.0.1", | ||
"version": "1.0.2", | ||
"description": "An npm library to fetch SEC data from all supported endpoints with rate limiting.", | ||
@@ -13,2 +13,3 @@ "main": "dist/index.js", | ||
"lint:fix": "eslint 'src/**/*.ts' --fix", | ||
"example": "node dist/example.js", | ||
"clean": "rm -rf dist", | ||
@@ -23,2 +24,4 @@ "prepare": "npm run build" | ||
"finance", | ||
"parsing", | ||
"formatting", | ||
"api", | ||
@@ -33,2 +36,3 @@ "data", | ||
"axios-rate-limit": "^1.3.0", | ||
"cheerio": "^1.0.0", | ||
"fast-xml-parser": "^4.2.4" | ||
@@ -38,2 +42,3 @@ }, | ||
"@types/axios": "^0.14.0", | ||
"@types/cheerio": "^0.22.35", | ||
"@types/jest": "^29.5.13", | ||
@@ -40,0 +45,0 @@ "@types/node": "^20.4.2", |
@@ -79,3 +79,33 @@ # SEC Data Fetcher | ||
- **getObjectFromUrl(url: string):** Promise<FilingObject> | ||
- **fetchFiling(url: string):** Promise<string> | ||
Fetches the raw HTML content of an SEC filing from the provided URL. | ||
- **extractTablesFromFilingUrl(url: string):** Promise<Array<Array<Array<string>>>> | ||
Fetches the filing content from a URL and extracts tables from the filing. | ||
- **extractTablesFromContent(filingContent: string):** Array<Array<Array<string>>> | ||
Extracts tables from the provided raw HTML content of an SEC filing. | ||
### Extract Tables from SEC Filings | ||
The package provides two ways to extract tables from SEC filings: | ||
1. **Extract from a URL**: Fetch and extract tables directly from an SEC filing URL. | ||
2. **Extract from provided HTML content**: Extract tables from the provided HTML content when you already have the filing data. | ||
```typescript | ||
import { SECClient } from 'sec-data-fetcher'; | ||
const secClient = new SECClient({ | ||
userAgent: 'Your Company <your-email@example.com>', | ||
}); | ||
// Extract tables from an SEC filing URL | ||
const tablesFromUrl = await secClient.extractTablesFromFilingUrl( | ||
'https://www.sec.gov/...', | ||
); | ||
// Extract tables from provided HTML content | ||
const filingContent = '<html>...</html>'; | ||
const tablesFromContent = secClient.extractTablesFromContent(filingContent); | ||
``` | ||
## Notes | ||
@@ -82,0 +112,0 @@ |
@@ -35,2 +35,8 @@ import { SECClient } from './secAPI'; // Adjust the import path as needed | ||
} | ||
// Extract tables from a URL | ||
const filingUrl = | ||
'https://www.sec.gov/Archives/edgar/data/0000320193/0000320193-21-000010.txt'; | ||
const tablesFromUrl = await secClient.extractTablesFromFilingUrl(filingUrl); | ||
//console.log('Extracted Tables from URL:', tablesFromUrl); | ||
})(); |
@@ -7,2 +7,3 @@ // src/secAPI.ts | ||
import { padCik } from './utils'; | ||
import { extractTables } from './parsers/tableParser'; | ||
@@ -169,2 +170,48 @@ export interface CompanyTicker { | ||
/** | ||
* Fetches the SEC filing content from a given URL. | ||
* @param url - The URL of the SEC filing. | ||
* @returns The raw HTML content of the filing. | ||
*/ | ||
public async fetchFiling(url: string): Promise<string> { | ||
try { | ||
const response = await this.http.get<string>(url, { | ||
headers: { | ||
'User-Agent': this.userAgent, | ||
'Accept-Encoding': 'gzip, deflate', | ||
Host: 'www.sec.gov', | ||
}, | ||
}); | ||
return response.data; // The raw HTML of the SEC filing | ||
} catch (error) { | ||
throw new Error( | ||
`Failed to fetch SEC filing from URL: ${url}. Error: ${error}`, | ||
); | ||
} | ||
} | ||
/** | ||
* Extracts tables from a given SEC filing URL. | ||
* @param url - The URL of the SEC filing. | ||
* @returns An array of tables extracted from the filing. | ||
*/ | ||
public async extractTablesFromFilingUrl( | ||
url: string, | ||
): Promise<Array<Array<Array<string>>>> { | ||
const filingContent = await this.fetchFiling(url); // Fetch the raw HTML content | ||
const tables = extractTables(filingContent); // Extract tables using the parsing function | ||
return tables; | ||
} | ||
/** | ||
* Extracts tables from the provided HTML content of an SEC filing. | ||
* @param filingContent - The raw HTML content of the SEC filing. | ||
* @returns An array of tables extracted from the content. | ||
*/ | ||
public extractTablesFromContent( | ||
filingContent: string, | ||
): Array<Array<Array<string>>> { | ||
return extractTables(filingContent); // Extract tables from the provided HTML content | ||
} | ||
/** | ||
* Parses SEC filing content from a string into an object. | ||
@@ -171,0 +218,0 @@ * @param content - The SEC filing content as a string. |
@@ -29,2 +29,31 @@ import { SECClient } from '../src/secAPI'; | ||
}); | ||
it('should fetch and extract tables from a filing URL', async () => { | ||
// Mock filing URL from SEC (you can use a real one or mock the response) | ||
const filingUrl = | ||
'https://www.sec.gov/Archives/edgar/data/0000320193/0000320193-21-000010.txt'; | ||
const tables = await secClient.extractTablesFromFilingUrl(filingUrl); | ||
expect(tables.length).toBeGreaterThan(0); // Ensure at least one table is extracted | ||
}); | ||
it('should extract tables from provided filing content', () => { | ||
const filingContent = ` | ||
<html> | ||
<body> | ||
<table> | ||
<tr><th>Header 1</th><th>Header 2</th></tr> | ||
<tr><td>Row 1 Col 1</td><td>Row 1 Col 2</td></tr> | ||
<tr><td>Row 2 Col 1</td><td>Row 2 Col 2</td></tr> | ||
</table> | ||
</body> | ||
</html> | ||
`; | ||
const tables = secClient.extractTablesFromContent(filingContent); | ||
expect(tables.length).toBe(1); | ||
expect(tables[0][0]).toEqual(['Header 1', 'Header 2']); | ||
expect(tables[0][1]).toEqual(['Row 1 Col 1', 'Row 1 Col 2']); | ||
expect(tables[0][2]).toEqual(['Row 2 Col 1', 'Row 2 Col 2']); | ||
}); | ||
}); |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
48077
33
876
127
4
13
+ Addedcheerio@^1.0.0
+ Addedboolbase@1.0.0(transitive)
+ Addedcheerio@1.0.0(transitive)
+ Addedcheerio-select@2.1.0(transitive)
+ Addedcss-select@5.1.0(transitive)
+ Addedcss-what@6.1.0(transitive)
+ Addeddom-serializer@2.0.0(transitive)
+ Addeddomelementtype@2.3.0(transitive)
+ Addeddomhandler@5.0.3(transitive)
+ Addeddomutils@3.1.0(transitive)
+ Addedencoding-sniffer@0.2.0(transitive)
+ Addedentities@4.5.0(transitive)
+ Addedhtmlparser2@9.1.0(transitive)
+ Addediconv-lite@0.6.3(transitive)
+ Addednth-check@2.1.1(transitive)
+ Addedparse5@7.2.1(transitive)
+ Addedparse5-htmlparser2-tree-adapter@7.1.0(transitive)
+ Addedparse5-parser-stream@7.1.2(transitive)
+ Addedsafer-buffer@2.1.2(transitive)
+ Addedundici@6.21.0(transitive)
+ Addedwhatwg-encoding@3.1.1(transitive)
+ Addedwhatwg-mimetype@4.0.0(transitive)