import requests
import openpyxl

input_file = "firms.xlsx"
output_file = "Scrapped_website.xlsx"

wb_in = openpyxl.load_workbook(input_file)
ws_in = wb_in.active

wb_out = openpyxl.Workbook()
ws_out = wb_out.active
ws_out.append(["FRN", "Firm Name", "Website"])

headers = {
    "User-Agent": "Mozilla/5.0",
    "Accept": "application/json",
    "Referer": "https://register.fca.org.uk/",
}

for row in range(2, ws_in.max_row + 1):
    frn = str(ws_in[f"A{row}"].value).strip()
    firm_name = ws_in[f"B{row}"].value

    url = f"https://register.fca.org.uk/services/apexrest/FRNDetails?frn={frn}"

    try:
        r = requests.get(url, headers=headers, timeout=10)
        data = r.json()

        website = data.get("FirmWebsite", "No website listed")

    except Exception as e:
        website = "Error"

    ws_out.append([frn, firm_name, website])

wb_out.save(output_file)
print("Done.")
