# check all the links - get a list of all internal links
# find all the files that are not used
import os

def get_all_files(base):
    """Returns a set of all paths to all files below base."""
    all_files = set()
    skips = ['.git', '_site', ".quarto"]
    for root, dirs, files in os.walk(base):
        for skip in skips:
            if skip in dirs:
                dirs.remove(skip)
        for file in files:
            all_files.add(os.path.join(root, file)[2:])
    return all_files


extra_files = get_all_files(".")

print(extra_files)
{'lectures/images/StaticSingleAssignment_Part48.jpg', 'lectures/images/StaticSingleAssignment_Part73.jpg', 'issues', 'runfirst.py', 'lectures/images/StaticSingleAssignment_Part71.jpg', 'lectures/12_memory.qmd', 'Class_Overview/sylibus.qmd', 'lectures/03b_local_value_numbering.qmd', 'lectures/02b_bril.qmd.saved', 'lectures/100_mlir.qmd', 'lectures/03_local.qmd', 'lectures/05_global.qmd', 'description.txt', 'Class_Overview/about.qmd', 'lectures/04_data_flow.qmd', 'lectures/images/StaticSingleAssignment_Part69.jpg', 'index.qmd', 'notebooks/representation.ipynb', 'lectures/13_dynamic_compilers.qmd', 'lectures/images/my_ast', 'lectures/images/StaticSingleAssignment_Part40.jpg', 'lectures/images/add.json', 'lectures/images/Grace_Hopper_and_UNIVAC.jpg', 'lectures/08_classic_loop_ops.qmd', 'weekly.qmd', 'found_links.csv', 'lectures/02b_bril.qmd', 'lectures/14_gpu_compilers.qmd', 'Links-21-6-2024 83848.csv', 'lectures/junk.qmd', 'lectures/images/StaticSingleAssignment_Part40.pdf', 'Links-21-6-2024 84630.csv', 'notebooks/possible_papers.qmd', 'about.qmd', 'notebooks/02aa_reps.ipynb', 'lectures/images/Digraph.gv', 'Links-21-6-2024 84311.csv', 'lectures/images/StaticSingleAssignment_Part41.jpg', 'lectures/bril.qmd', 'lectures/images/StaticSingleAssignment_Part70.jpg', 'notebooks/llvm.ipynb', '.github/workflows/publish.yml', 'lectures/110_whole_program.qmd', 'lectures/images/Digraph.gv.png', 'requirements.txt', 'lectures/02a_representation.qmd.saved', 'lectures/02a_representation.qmd', 'lectures/06_ssa.qmd', 'lectures/images/StaticSingleAssignment_Part46.jpg', 'lectures/01a_performance_measurement.qmd', '.gitignore', 'lectures/images/toy.ts', '.vscode/settings.json', 'setenv.sh', 'lectures/images/StaticSingleAssignment_Part43.jpg', 'lectures/images/my_ast.png', 'styles.css', 'notebooks/02_reps.ipynb', 'Links-21-6-2024 84544.csv', 'check-links.ipynb', 'Links-21-6-2024 8400.csv', 'Class_Overview/schedule.qmd', 'lectures/02b_bril.ipynb', 'lectures/05b_licm.qmd', 'Links-21-6-2024 84235.csv', '_quarto.yml', 'lectures/09_poly.qmd', 'Class_Overview/What_to_do.qmd', 'lectures/07_llvm.ipynb', 'lectures/images/cfg.png', 'lectures/010_compiler_overview.qmd', 'notebooks/bril.ipynb', 'lectures/02a_representation.ipynb'}
import requests
from bs4 import BeautifulSoup
import csv

class ctx():
    def __init__(self, top):
        self.top = top
        self.external_links = set()
        self.missing_files = set()
        self.extra_files = set()
        self.seen_urls = set()
        self.seen_links = set()

def parseLinks(pageHtml, pageUrl, ctx):
    soup = BeautifulSoup(pageHtml, 'html.parser')

    #get all the <a> elements from the HTML page
    allLinks = soup.find_all('a')

    extIntLinks(allLinks, pageUrl, ctx)
def requestMaker(url, ctx):
    if (url in ctx.seen_urls):
        return
    ctx.seen_urls.add(url)
    try:
        #make the get request to the url
        response = requests.get(url)

        #if the request is successful
        if response.status_code in range(200, 300):
            #extract the page html content for parsing the links
            pageHtml = response.text
            pageUrl = response.url

            #call the parseLink function
            parseLinks(pageHtml, pageUrl, ctx)
        
        else:
            print("Sorry Could not fetch the result status code {response.status_code}!")

    except Exception as e:
        print(f"{e} Could Not Connect to url {url}")
from sympy import I


def extIntLinks(allLinks, pageUrl, ctx):

    #go through all the <a> elements list 
    for anchor in allLinks:
        link = anchor.get("href")   #get the link from the <a> element

        link_orig = link

        if link is None:
            continue
        print(f"link {link}")

        if link.startswith("."):
            if link == "./":
                continue

            if link in ctx.seen_links:
                continue
            ctx.seen_links.add(link)
            file = link[2:] # remove ./
            ## deal with the possible enddings 

            if file.startswith("revealjs-"):
                file = file[10:]

            if file.endswith(".html"):
                file_qmd =  file[:-4]+ "qmd"
                if file_qmd in extra_files:
                    extra_files.remove(file_qmd)
                    newurl = ctx.top + '/' + file
                    requestMaker(newurl, ctx)
                    continue

        elif link.startswith(ctx.top):
            print("starts with top")
            continue

        
        elif link.startswith("#") :
            print("ref link")

        elif link.startswith("https://capra"):
            ctx.external_links.add(link)

        elif link.startswith("https://github.com"):
            ctx.external_links.add(link)

        elif link.startswith("https://quarto.org"):
            ctx.external_links.add(link)
        
        else: 
            print("else ", link, link_orig)

     
url = "https://normrubin.github.io"
requestMaker(url, ctx(url))
link ./
link ./weekly.html
link ./weekly.html
link ./weekly.html
link ./
link ./weekly.html
link ./Class_Overview/about.html
link ../weekly.html
link ../Class_Overview/about.html
link ../Class_Overview/about.html
link ../
link ../weekly.html
link ../Class_Overview/about.html
link ../Class_Overview/schedule.html
link ../Class_Overview/sylibus.html
link ../Class_Overview/What_to_do.html
link ../lectures/010_compiler_overview.html
link ../lectures/01a_performance_measurement.html
link ../lectures/02a_representation.html
link ../lectures/02b_bril.html
link ../lectures/03_local.html
link ../lectures/03b_local_value_numbering.html
link ../lectures/04_data_flow.html
link ../lectures/05_global.html
link ../lectures/05b_licm.html
link ../lectures/06_ssa.html
link ../lectures/07_llvm.html
link ../lectures/08_classic_loop_ops.html
link ../lectures/09_poly.html
link ../lectures/100_mlir.html
link ../lectures/110_whole_program.html
link ../lectures/12_memory.html
link ../lectures/13_dynamic_compilers.html
link ../lectures/14_gpu_compilers.html
link ../lectures/bril.html
link ../lectures/junk.html
link https://capra.cs.cornell.edu/bril/
link https://github.com/normrubin/bril
link ../weekly.html
link ../Class_Overview/about.html
link ../Class_Overview/about.html
link ../weekly.html
link ../Class_Overview/schedule.html
link https://normrubin.github.io/
starts with top
link https://github.com/normrubin/normrubin.github.io/edit/main/Class_Overview/about.qmd
link https://github.com/normrubin/normrubin.github.io/issues/new
link https://quarto.org/
link ./Class_Overview/schedule.html
link ../weekly.html
link ../Class_Overview/about.html
link ../Class_Overview/schedule.html
link ../
link ../weekly.html
link ../Class_Overview/about.html
link ../Class_Overview/schedule.html
link ../Class_Overview/sylibus.html
link ../Class_Overview/What_to_do.html
link ../lectures/010_compiler_overview.html
link ../lectures/01a_performance_measurement.html
link ../lectures/02a_representation.html
link ../lectures/02b_bril.html
link ../lectures/03_local.html
link ../lectures/03b_local_value_numbering.html
link ../lectures/04_data_flow.html
link ../lectures/05_global.html
link ../lectures/05b_licm.html
link ../lectures/06_ssa.html
link ../lectures/07_llvm.html
link ../lectures/08_classic_loop_ops.html
link ../lectures/09_poly.html
link ../lectures/100_mlir.html
link ../lectures/110_whole_program.html
link ../lectures/12_memory.html
link ../lectures/13_dynamic_compilers.html
link ../lectures/14_gpu_compilers.html
link ../lectures/bril.html
link ../lectures/junk.html
link https://capra.cs.cornell.edu/bril/
link https://github.com/normrubin/bril
link ../weekly.html
link ../Class_Overview/about.html
link ../Class_Overview/schedule.html
link ../lectures/010_compiler_overview.html
link ../lectures/01a_performance_measurement.html
link ../lectures/02a_representation.ipynb
link ../lectures/02b_bril.ipynb
link ../lectures/03_local.html
link ../lectures/04_data_flow.html
link ../lectures/05_global.html
link ../lectures/06_ssa.html
link ../lectures/07_llvm.html
link ../lectures/08_classic_loop_ops.html
link ../lectures/09_poly.html
link ../lectures/100_mlir.html
link ../lectures/110_whole_program.html
link ../lectures/12_memory.html
link ../lectures/13_dynamic_compilers.html
link ../lectures/14_gpu_compilers.html
link ../Class_Overview/about.html
link ../Class_Overview/sylibus.html
link https://normrubin.github.io/
starts with top
link https://github.com/normrubin/normrubin.github.io/edit/main/Class_Overview/schedule.qmd
link https://github.com/normrubin/normrubin.github.io/issues/new
link https://quarto.org/
link ./Class_Overview/sylibus.html
link ../weekly.html
link ../Class_Overview/about.html
link ../Class_Overview/sylibus.html
link ../
link ../weekly.html
link ../Class_Overview/about.html
link ../Class_Overview/schedule.html
link ../Class_Overview/sylibus.html
link ../Class_Overview/What_to_do.html
link ../lectures/010_compiler_overview.html
link ../lectures/01a_performance_measurement.html
link ../lectures/02a_representation.html
link ../lectures/02b_bril.html
link ../lectures/03_local.html
link ../lectures/03b_local_value_numbering.html
link ../lectures/04_data_flow.html
link ../lectures/05_global.html
link ../lectures/05b_licm.html
link ../lectures/06_ssa.html
link ../lectures/07_llvm.html
link ../lectures/08_classic_loop_ops.html
link ../lectures/09_poly.html
link ../lectures/100_mlir.html
link ../lectures/110_whole_program.html
link ../lectures/12_memory.html
link ../lectures/13_dynamic_compilers.html
link ../lectures/14_gpu_compilers.html
link ../lectures/bril.html
link ../lectures/junk.html
link https://capra.cs.cornell.edu/bril/
link https://github.com/normrubin/bril
link ../weekly.html
link ../Class_Overview/about.html
link ../Class_Overview/sylibus.html
link ../lectures/010_compiler_overview.html
link ../lectures/01a_performance_measurement.html
link ../lectures/02a_representation.ipynb
link ../lectures/02b_bril.ipynb
link ../lectures/03_local.html
link ../lectures/04_data_flow.html
link ../lectures/05_global.html
link ../lectures/06_ssa.html
link ../lectures/07_llvm.html
link ../lectures/08_classic_loop_ops.html
link ../lectures/09_poly.html
link ../lectures/100_mlir.html
link ../lectures/110_whole_program.html
link ../lectures/12_memory.html
link ../lectures/13_dynamic_compilers.html
link ../lectures/14_gpu_compilers.html
link ../Class_Overview/schedule.html
link ../Class_Overview/What_to_do.html
link https://normrubin.github.io/
starts with top
link https://github.com/normrubin/normrubin.github.io/edit/main/Class_Overview/sylibus.qmd
link https://github.com/normrubin/normrubin.github.io/issues/new
link https://quarto.org/
link ./Class_Overview/What_to_do.html
link ../weekly.html
link ../Class_Overview/about.html
link ../Class_Overview/What_to_do.html
link ../
link ../weekly.html
link ../Class_Overview/about.html
link ../Class_Overview/schedule.html
link ../Class_Overview/sylibus.html
link ../Class_Overview/What_to_do.html
link ../lectures/010_compiler_overview.html
link ../lectures/01a_performance_measurement.html
link ../lectures/02a_representation.html
link ../lectures/02b_bril.html
link ../lectures/03_local.html
link ../lectures/03b_local_value_numbering.html
link ../lectures/04_data_flow.html
link ../lectures/05_global.html
link ../lectures/05b_licm.html
link ../lectures/06_ssa.html
link ../lectures/07_llvm.html
link ../lectures/08_classic_loop_ops.html
link ../lectures/09_poly.html
link ../lectures/100_mlir.html
link ../lectures/110_whole_program.html
link ../lectures/12_memory.html
link ../lectures/13_dynamic_compilers.html
link ../lectures/14_gpu_compilers.html
link ../lectures/bril.html
link ../lectures/junk.html
link https://capra.cs.cornell.edu/bril/
link https://github.com/normrubin/bril
link ../weekly.html
link ../Class_Overview/about.html
link ../Class_Overview/What_to_do.html
link https://quarto.org/
link ../Class_Overview/sylibus.html
link ../lectures/010_compiler_overview.html
link https://normrubin.github.io/
starts with top
link https://github.com/normrubin/normrubin.github.io/edit/main/Class_Overview/What_to_do.qmd
link https://github.com/normrubin/normrubin.github.io/issues/new
link https://quarto.org/
link ./lectures/010_compiler_overview.html
link ../weekly.html
link ../lectures/010_compiler_overview.html
link ../lectures/010_compiler_overview.html
link ../
link ../weekly.html
link ../Class_Overview/about.html
link ../Class_Overview/schedule.html
link ../Class_Overview/sylibus.html
link ../Class_Overview/What_to_do.html
link ../lectures/010_compiler_overview.html
link ../lectures/01a_performance_measurement.html
link ../lectures/02a_representation.html
link ../lectures/02b_bril.html
link ../lectures/03_local.html
link ../lectures/03b_local_value_numbering.html
link ../lectures/04_data_flow.html
link ../lectures/05_global.html
link ../lectures/05b_licm.html
link ../lectures/06_ssa.html
link ../lectures/07_llvm.html
link ../lectures/08_classic_loop_ops.html
link ../lectures/09_poly.html
link ../lectures/100_mlir.html
link ../lectures/110_whole_program.html
link ../lectures/12_memory.html
link ../lectures/13_dynamic_compilers.html
link ../lectures/14_gpu_compilers.html
link ../lectures/bril.html
link ../lectures/junk.html
link https://capra.cs.cornell.edu/bril/
link https://github.com/normrubin/bril
link revealjs-compiler_overview.html
Back to top