diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 25079d45b53d488132212ed36af9e488b929df20..04a587ba3bd6c8dfceb236bfe38a084f27959a48 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -45,3 +45,17 @@ test_mm:
     script:
       - cd hw/fpga/simulation/modelsim/
       - vsim -do "do mm_testbench.do"
+
+test_crc7:
+    stage: test
+    image: bslathi19/modelsim_18.1:lite
+    script:
+      - cd hw/fpga/simulation/modelsim/
+      - vsim -do "do crc7_testbench.do"
+
+test_sd_cmd:
+    stage: test
+    image: bslathi19/modelsim_18.1:lite
+    script:
+      - cd hw/fpga/simulation/modelsim/
+      - vsim -do "sd_cmd_testbench.do"
diff --git a/hw/fpga/addr_decode.sv b/hw/fpga/addr_decode.sv
index a145fccdfad9058502c918a661d2fce8d2423b4a..bcfde85c208f609db8ba0a76a39d7a9a45b3cbe1 100644
--- a/hw/fpga/addr_decode.sv
+++ b/hw/fpga/addr_decode.sv
@@ -6,8 +6,9 @@ module addr_decode(
     output logic uart_cs,
     output logic irq_cs,
     output logic board_io_cs,
-	output logic mm_cs1,
-	output logic mm_cs2
+    output logic mm_cs1,
+    output logic mm_cs2,
+    output logic sd_cs
 );
 
 assign rom_cs = addr >= 24'h008000 && addr < 24'h010000;
@@ -17,6 +18,7 @@ assign hex_cs = addr >= 24'h007ff0 && addr < 24'h007ff4;
 assign uart_cs = addr >= 24'h007ff4 && addr < 24'h007ff6;
 assign board_io_cs = addr == 24'h007ff6;
 assign mm_cs2 = addr == 24'h007ff7;
+assign sd_cs = addr >= 24'h007ff8 && addr < 24'h007ffe;
 assign irq_cs  = addr == 24'h007fff;
 
 endmodule
diff --git a/hw/fpga/crc7.sv b/hw/fpga/crc7.sv
new file mode 100644
index 0000000000000000000000000000000000000000..e009826b820fe7444ad4f139c57edcd5ee241761
--- /dev/null
+++ b/hw/fpga/crc7.sv
@@ -0,0 +1,106 @@
+module crc7 #(parameter POLYNOMIAL = 8'h89)
+(
+    input clk,
+    input rst,
+
+    input load,
+    input [39:0] data_in,
+
+    output logic [6:0] crc_out,
+    output logic valid
+);
+
+logic [46:0] data;
+logic [46:0] next_data;
+logic [46:0] polyshift;
+
+typedef enum bit [1:0] {IDLE, WORKING, VALID} macro_t;
+struct packed {
+    macro_t macro;
+    logic [5:0] count;
+} state, next_state;
+
+always_ff @(posedge clk) begin
+    if (rst) begin
+        polyshift <= {POLYNOMIAL, 39'b0};    //start all the way at the left
+        data <= '0;
+        state.macro <= IDLE;
+        state.count <= '0;
+    end else begin
+        if (load) begin
+            data <= {data_in, 7'b0};
+        end else begin
+            data <= next_data;
+        end
+        state <= next_state;
+
+        if (state.macro == WORKING) begin
+            polyshift <= polyshift >> 1;
+        end
+
+        if (state.macro == VALID) begin
+            polyshift <= {POLYNOMIAL, 39'b0};
+        end
+    end
+end
+
+always_comb begin
+    next_state = state;
+
+    case (state.macro)
+        IDLE: begin
+            if (load) begin
+                next_state.macro = WORKING;
+                next_state.count = '0;
+            end
+        end
+
+        WORKING: begin
+            if (state.count < 39) begin
+                next_state.count = state.count + 6'b1;
+            end else begin
+                next_state.macro = VALID;
+                next_state.count = '0;
+            end
+        end
+
+        VALID: begin            // Same as IDLE, but IDLE is just for reset.
+            if (load) begin
+                next_state.macro = WORKING;
+                next_state.count = '0;
+            end
+        end
+
+        default:;
+    endcase
+end
+
+always_comb begin
+    valid = 0;
+    next_data = '0;
+    crc_out = '0;
+
+    case (state.macro)
+        IDLE: begin
+            valid = 0;
+        end
+
+        WORKING: begin
+            if (data[6'd46 - state.count]) begin
+                next_data = data ^ polyshift;
+            end else begin
+                next_data = data;
+            end
+        end
+
+        VALID: begin
+            valid =  ~load;
+            next_data = data;
+            crc_out = data[6:0];
+        end
+
+        default:;
+    endcase
+end
+
+endmodule
diff --git a/hw/fpga/hvl/crc7_testbench.sv b/hw/fpga/hvl/crc7_testbench.sv
new file mode 100644
index 0000000000000000000000000000000000000000..935dd850c4a774809ac6c83ba57ceb08914e509a
--- /dev/null
+++ b/hw/fpga/hvl/crc7_testbench.sv
@@ -0,0 +1,65 @@
+module testbench();
+
+timeunit 10ns;
+
+timeprecision 1ns;
+
+logic clk;
+logic rst;
+
+logic load;
+logic [39:0] data_in;
+
+logic [6:0] crc_out;
+logic valid;
+
+crc7 dut(.*);
+
+always #1 clk = clk === 1'b0;
+
+task create_sd_packet(logic [5:0] cmd, logic [31:0] data, output logic [47:0] _packet);
+    @(posedge clk);
+    data_in <= {1'b0, 1'b1, cmd, data};
+    load <= '1;
+    @(posedge clk);
+    load <= '0;
+
+    while (~valid) begin
+        //$display("Working %b", dut.data);
+        @(posedge clk);
+    end
+
+    _packet = {1'b0, 1'b1, cmd, data, crc_out, 1'b1};
+endtask
+
+logic [47:0] packet;
+
+initial begin
+    rst <= '1;
+    repeat(5) @(posedge clk);
+    rst <= '0;
+
+    create_sd_packet(6'h0, 32'h0, packet);
+    $display("Result: %x", packet);
+    assert(packet == 48'h400000000095) else
+        $error("Bad crc7. Got %x expected %x", packet, 48'h400000000095);
+
+    create_sd_packet(6'd8, 32'h1aa, packet);
+    $display("Result: %x", packet);
+    assert(packet == 48'h48000001aa87) else
+        $error("Bad crc7. Got %x expected %x", packet, 48'h48000001aa87);
+
+    create_sd_packet(6'd55, 32'h0, packet);
+    $display("Result: %x", packet);
+    assert(packet == 48'h770000000065) else
+        $error("Bad crc7. Got %x expected %x", packet, 48'h770000000065);
+        
+    create_sd_packet(6'd41, 32'h40180000, packet);
+    $display("Result: %x", packet);
+    assert(packet == 48'h694018000019) else
+        $error("Bad crc7. Got %x expected %x", packet, 48'h694018000019);
+
+    $finish();
+end
+
+endmodule
diff --git a/hw/fpga/hvl/cs_testbench.sv b/hw/fpga/hvl/cs_testbench.sv
index a6e10729320faba518e995c17951c0caa59f4814..3e69518e1ad7d62ee52d6db1a7bb4cfe463976e8 100644
--- a/hw/fpga/hvl/cs_testbench.sv
+++ b/hw/fpga/hvl/cs_testbench.sv
@@ -13,8 +13,9 @@ logic uart_cs;
 logic irq_cs;
 logic mm_cs2;
 logic mm_cs1;
+logic sd_cs;
 
-int cs_count = sdram_cs + rom_cs + hex_cs + uart_cs + board_io_cs + mm_cs2 + mm_cs1;
+int cs_count = sdram_cs + rom_cs + hex_cs + uart_cs + board_io_cs + mm_cs2 + mm_cs1 + sd_cs;
 
 addr_decode dut(.*);
 
@@ -56,6 +57,11 @@ initial begin : TEST_VECTORS
             else
                 $error("Bad CS! addr=%4x should have mm_cs1!", addr);
         end
+        if (i >= 24'h007ff8 && i < 24'h007ffe) begin
+            assert(sd_cs == '1)
+            else
+            $error("Bad CS! addr=%4x should have sd_cs!", addr);
+        end
         if (i == 16'h7fff) begin
             assert(irq_cs == '1)
             else
diff --git a/hw/fpga/hvl/mm_testbench.sv b/hw/fpga/hvl/mm_testbench.sv
index b4983376ee4d60728139273fb03d7d1462f41597..1b18ee346b59d896a1ec29e6c87b0c806d4fa1da 100644
--- a/hw/fpga/hvl/mm_testbench.sv
+++ b/hw/fpga/hvl/mm_testbench.sv
@@ -25,61 +25,61 @@ assign MA = cpu_addr[15:12];
 assign mm_address = {MO, cpu_addr[11:0]};
 
 memory_mapper dut(
-	.data_in(_data_in),
-	.data_out(_data_out),
-	.*
+    .data_in(_data_in),
+    .data_out(_data_out),
+    .*
 );
 
 always #1 clk_50 = clk_50 === 1'b0;
 always #100 clk = clk === 1'b0;
 
 task write_reg(logic [3:0] addr, logic [7:0] data);
-	@(negedge clk);
-	cs <= '1;
-	RS <= addr;
-	data_in <= data;
-	rw <= '0;
-	@(posedge clk);
-	cs <= '0;
-	rw <= '1;
-	@(negedge clk);
+    @(negedge clk);
+    cs <= '1;
+    RS <= addr;
+    data_in <= data;
+    rw <= '0;
+    @(posedge clk);
+    cs <= '0;
+    rw <= '1;
+    @(negedge clk);
 endtask
 
 task enable(logic [7:0] data);
-	@(negedge clk);
-	MM_cs <= '1;
-	rw <= '0;
-	data_in <= data;
-	@(posedge clk);
-	rw <= '1;
-	MM_cs <= '0;
-	@(negedge clk);
+    @(negedge clk);
+    MM_cs <= '1;
+    rw <= '0;
+    data_in <= data;
+    @(posedge clk);
+    rw <= '1;
+    MM_cs <= '0;
+    @(negedge clk);
 endtask
 
 initial begin
-	rst <= '1;
-	repeat(5) @(posedge clk);
-	rst <= '0;
-
-	cpu_addr <= 16'h0abc;
-	write_reg(4'h0, 8'hcc);
-	$display("Address: %x", mm_address);
-	assert(mm_address == 24'h000abc) else begin
-		$error("Bad address before enable!");
-	end
-
-	enable(1);
-	$display("Address: %x", mm_address);
-	assert(mm_address == 24'h0ccabc) else begin
-		$error("Bad address after enable!");
-	end
-
-	enable(0);
-	$display("Address: %x", mm_address);
-	assert(mm_address == 24'h000abc) else begin
-		$error("Bad address after enable!");
-	end
-	$finish();
+    rst <= '1;
+    repeat(5) @(posedge clk);
+    rst <= '0;
+
+    cpu_addr <= 16'h0abc;
+    write_reg(4'h0, 8'hcc);
+    $display("Address: %x", mm_address);
+    assert(mm_address == 24'h000abc) else begin
+        $error("Bad address before enable!");
+    end
+
+    enable(1);
+    $display("Address: %x", mm_address);
+    assert(mm_address == 24'h0ccabc) else begin
+        $error("Bad address after enable!");
+    end
+
+    enable(0);
+    $display("Address: %x", mm_address);
+    assert(mm_address == 24'h000abc) else begin
+        $error("Bad address after enable!");
+    end
+    $finish();
 end
 
 endmodule
diff --git a/hw/fpga/hvl/sd_cmd_testbench.sv b/hw/fpga/hvl/sd_cmd_testbench.sv
new file mode 100644
index 0000000000000000000000000000000000000000..b90b7bce683798f24a16009f72cc83abf3fc4b40
--- /dev/null
+++ b/hw/fpga/hvl/sd_cmd_testbench.sv
@@ -0,0 +1,89 @@
+module testbench();
+
+timeunit 10ns;
+
+timeprecision 1ns;
+
+logic clk;
+logic rw;
+logic clk_50;
+logic rst;
+
+logic [2:0] addr;
+logic [7:0] data;
+logic cs;
+
+logic i_sd_cmd;
+logic o_sd_cmd;
+
+logic i_sd_data;
+logic o_sd_data;
+
+logic cpu_phi2;
+
+always @(posedge clk) begin
+    cpu_phi2 <= cpu_phi2 === '0;
+end
+
+sd_controller dut(
+    .sd_clk(cpu_phi2),
+    .*);
+
+always #1 clk_50 = clk_50 === 1'b0;
+always #100 clk = clk === 1'b0;
+
+task write_reg(logic [3:0] _addr, logic [7:0] _data);
+    @(posedge clk);
+    cs = '1;
+    addr = _addr;
+    rw = '0;
+    data = '1;
+    @(posedge clk);
+    data = _data;
+    @(posedge clk);
+    cs = '0;
+    rw = '1;
+endtask
+
+task verify_cmd(logic [5:0] cmd, logic [31:0] arg, logic [47:0] verify);
+    write_reg(0, arg[7:0]);
+    write_reg(1, arg[15:8]);
+    write_reg(2, arg[23:16]);
+    write_reg(3, arg[31:24]);
+    write_reg(4, cmd);
+
+    $display("arg: %x", dut.arg);
+    $display("dut.cmd: %x", dut.cmd);
+
+    @(posedge clk);
+    @(posedge clk);
+
+    while (dut.state.macro == dut.TXCMD) begin
+        assert(o_sd_cmd == verify[47-dut.state.count]) else begin
+            $error("cmd output error: Expected %h:%b, got %h:%b", 
+                47-dut.state.count, verify[47-dut.state.count],
+                47-dut.state.count, o_sd_cmd);
+        end
+        @(negedge clk);
+    end
+endtask
+
+localparam cmd0 = 48'h400000000095;
+localparam cmd8 = 48'h48000001aa87;
+localparam cmd55 = 48'h770000000065;
+localparam cmd41 = 48'h694018000019;
+
+initial begin
+    rst <= '1;
+    repeat(5) @(posedge clk);
+    rst <= '0;
+
+    verify_cmd(0, 0, cmd0);
+    verify_cmd(8, 'h1aa, cmd8);
+    verify_cmd('d55, 0, cmd55);
+    verify_cmd('d41, 'h40180000, cmd41);
+
+    $finish();
+end
+
+endmodule
\ No newline at end of file
diff --git a/hw/fpga/sd_controller.sv b/hw/fpga/sd_controller.sv
new file mode 100644
index 0000000000000000000000000000000000000000..b8f864f16f56a6193bbc118dfc449fee18171589
--- /dev/null
+++ b/hw/fpga/sd_controller.sv
@@ -0,0 +1,233 @@
+module sd_controller(
+    input clk,
+    input sd_clk,
+    input rst,
+
+    input [2:0] addr,
+    input [7:0] data,
+    input cs,
+    input rw,
+
+    input i_sd_cmd,
+    output logic o_sd_cmd,
+
+    input i_sd_data,
+    output logic o_sd_data,
+
+    output logic [7:0] data_out
+);
+
+logic [31:0] arg;
+logic [5:0] cmd;
+
+logic [47:0] rxcmd_buf;
+logic [31:0] rx_val;
+
+logic [7:0] rxdata_buf [512];
+logic [9:0] data_count;
+
+logic [15:0] data_crc;
+
+
+assign rx_val = rxcmd_buf[39:8];
+
+always_comb begin
+    data_out = 'x;
+
+    if (addr < 4'h4) begin
+        data_out = rx_val[8 * addr +: 8];
+    end else if (addr == 4'h4) begin
+        data_out = {data_flag, read_flag};
+    end else if (addr == 4'h5) begin
+        data_out = rxdata_buf[data_count];
+    end
+end
+
+logic read_flag, next_read_flag;
+logic data_flag, next_data_flag;
+
+typedef enum bit [2:0] {IDLE, LOAD, CRC, TXCMD, RXCMD, TXDATA, RXDATA, RXDCRC} macro_t;
+struct packed {
+    macro_t macro;
+    logic [8:0] count;
+    logic [2:0] d_bit_count;
+} state, next_state;
+
+always_ff @(posedge clk) begin
+    if (rst) begin
+        state.macro <= IDLE;
+        state.count <= '0;
+        state.d_bit_count <= '1;
+        read_flag <= '0;
+        data_flag <= '0;
+        data_count <= '0;
+    end else begin
+        if (state.macro == TXCMD || state.macro == CRC) begin
+            if (sd_clk) begin
+                state <= next_state;
+            end
+        end else if (state.macro == RXCMD || state.macro == RXDATA || state.macro == RXDCRC) begin
+            if (~sd_clk) begin
+                state <= next_state;
+            end
+        end else begin
+            state <= next_state;
+        end
+    end
+
+    if (sd_clk) begin
+        read_flag <= next_read_flag;
+        data_flag <= next_data_flag;
+    end
+
+    if (cs & ~rw) begin
+        if (addr < 4'h4) begin
+            arg[8 * addr +: 8] <= data;
+        end else if (addr == 4'h4) begin
+            cmd <= data[6:0];
+        end
+    end
+
+    if (cs & addr == 4'h5 && sd_clk) begin
+        data_count <= data_count + 9'b1;
+    end
+
+    if (state.macro == RXCMD) begin
+        rxcmd_buf[6'd46-state.count] <= i_sd_cmd;   //we probabily missed bit 47
+    end
+
+    if (state.macro == RXDATA && ~sd_clk) begin
+        rxdata_buf[state.count][state.d_bit_count] <= i_sd_data;
+    end
+
+    if (state.macro == RXDCRC && ~sd_clk) begin
+        data_crc[4'd15-state.count] <= i_sd_data;
+    end
+
+end
+
+logic [6:0] crc;
+logic load_crc;
+logic crc_valid;
+logic [39:0] _packet;
+assign _packet = {1'b0, 1'b1, cmd, arg};
+logic [47:0] packet_crc;
+assign packet_crc = {_packet, crc, 1'b1};
+
+crc7 u_crc7(
+    .clk(clk),
+    .rst(rst),
+    .load(load_crc),
+    .data_in(_packet),
+    .crc_out(crc),
+    .valid(crc_valid)
+);
+
+always_comb begin
+    next_state = state;
+    next_read_flag = read_flag;
+    next_data_flag = data_flag;
+
+    case (state.macro)
+        IDLE: begin
+            if (~i_sd_cmd) begin        // receive data if sd pulls cmd low
+                next_state.macro = RXCMD;
+            end
+
+            if (~i_sd_data) begin
+                next_state.macro = RXDATA;
+            end
+
+            if (addr == 4'h4 & cs & ~rw) begin     // transmit if cpu writes to cmd
+                next_state.macro = LOAD;
+            end
+
+            if (addr == 4'h4 & cs & rw) begin
+                next_read_flag = '0;
+            end
+
+            if (addr == 4'h5 & cs) begin
+                next_data_flag = '0;
+            end
+        end
+
+        LOAD: begin
+            next_state.macro = CRC;
+        end
+
+        CRC: begin
+            next_state.macro = TXCMD;
+        end
+
+        TXCMD: begin
+            if (state.count < 47) begin
+                next_state.count = state.count + 6'b1;
+            end else begin
+                next_state.macro = IDLE;
+                next_state.count = '0;
+            end
+        end
+
+        RXCMD: begin
+            if (state.count < 47) begin
+                next_state.count = state.count + 6'b1;
+            end else begin
+                next_read_flag = '1;
+                next_state.macro = IDLE;
+                next_state.count = '0;
+            end
+        end
+
+        RXDATA: begin
+            if (state.count < 511 || (state.count == 511 && state.d_bit_count > 0)) begin
+                if (state.d_bit_count == 8'h0) begin
+                    next_state.count = state.count + 9'b1;
+                end
+                next_state.d_bit_count = state.d_bit_count - 3'h1;
+            end else begin
+                next_data_flag = '1;
+                next_state.macro = RXDCRC;
+                next_state.count = '0;
+            end
+        end
+
+        RXDCRC: begin
+            if (state.count < 16) begin
+                next_state.count = state.count + 9'b1;
+            end else begin
+                next_state.macro = IDLE;
+                next_state.count = '0;
+            end
+        end
+
+        default: begin
+                next_state.macro = IDLE;
+                next_state.count = '0;
+        end
+    endcase
+end
+
+always_comb begin
+    o_sd_cmd = '1;  //default to 1
+    o_sd_data = '1;
+
+    load_crc = '0;
+
+    case (state.macro)
+        IDLE:;
+
+        CRC: begin
+            load_crc = '1;
+        end
+
+        TXCMD: begin
+            o_sd_cmd = packet_crc[6'd47 - state.count];
+        end
+
+        RXCMD:;
+
+        default:;
+    endcase
+end
+
+endmodule
diff --git a/hw/fpga/simulation/modelsim/crc7_testbench.do b/hw/fpga/simulation/modelsim/crc7_testbench.do
new file mode 100644
index 0000000000000000000000000000000000000000..a79c2f447c1531a37ff883ac644b8e51e2be90ca
--- /dev/null
+++ b/hw/fpga/simulation/modelsim/crc7_testbench.do
@@ -0,0 +1,23 @@
+transcript on
+if {[file exists rtl_work]} {
+	vdel -lib rtl_work -all
+}
+vlib rtl_work
+vmap work rtl_work
+
+vlog -sv -work work  {../../crc7.sv}
+vlog -sv -work work  {../../hvl/crc7_testbench.sv}
+
+vsim -t 1ps -L altera_ver -L lpm_ver -L sgate_ver -L altera_mf_ver -L altera_lnsim_ver -L stratixv_ver -L stratixv_hssi_ver -L stratixv_pcie_hip_ver -L rtl_work -L work -voptargs="+acc"  testbench
+
+add wave -group {dut} -radix hexadecimal sim:/testbench/dut/*
+
+onfinish stop
+run -all
+
+if { [coverage attribute -name TESTSTATUS -concise] == "1"} {
+    echo Warning
+    quit -f -code 0
+}
+
+quit -code [coverage attribute -name TESTSTATUS -concise]
diff --git a/hw/fpga/simulation/modelsim/sd_cmd_testbench.do b/hw/fpga/simulation/modelsim/sd_cmd_testbench.do
new file mode 100644
index 0000000000000000000000000000000000000000..ee0bc8ffbe335c42d9bc2c934c592721149744d0
--- /dev/null
+++ b/hw/fpga/simulation/modelsim/sd_cmd_testbench.do
@@ -0,0 +1,24 @@
+transcript on
+if {[file exists rtl_work]} {
+	vdel -lib rtl_work -all
+}
+vlib rtl_work
+vmap work rtl_work
+
+vlog -sv -work work  {../../sd_controller.sv}
+vlog -sv -work work  {../../crc7.sv}
+vlog -sv -work work  {../../hvl/sd_cmd_testbench.sv}
+
+vsim -t 1ps -L altera_ver -L lpm_ver -L sgate_ver -L altera_mf_ver -L altera_lnsim_ver -L stratixv_ver -L stratixv_hssi_ver -L stratixv_pcie_hip_ver -L rtl_work -L work -voptargs="+acc"  testbench
+
+add wave -group {dut} -radix hexadecimal sim:/testbench/dut/*
+
+onfinish stop
+run -all
+
+if { [coverage attribute -name TESTSTATUS -concise] == "1"} {
+    echo Warning
+    quit -f -code 0
+}
+
+quit -code [coverage attribute -name TESTSTATUS -concise]
diff --git a/hw/fpga/super6502.qsf b/hw/fpga/super6502.qsf
index fdb728991730b0d8498d3348b73f00b230641c90..191ee02a409ae91db28fb6f5ada58d62e929ab54 100644
--- a/hw/fpga/super6502.qsf
+++ b/hw/fpga/super6502.qsf
@@ -188,7 +188,7 @@ set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to cpu_sob
 set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to cpu_sync
 set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to clk_50
 set_global_assignment -name ENABLE_SIGNALTAP OFF
-set_global_assignment -name USE_SIGNALTAP_FILE output_files/stp2.stp
+set_global_assignment -name USE_SIGNALTAP_FILE output_files/sd.stp
 set_location_assignment PIN_F20 -to HEX4[6]
 set_location_assignment PIN_F19 -to HEX4[5]
 set_location_assignment PIN_H19 -to HEX4[4]
@@ -205,6 +205,20 @@ set_location_assignment PIN_F18 -to HEX4[0]
 set_location_assignment PIN_E20 -to HEX4[1]
 set_location_assignment PIN_AB5 -to UART_RXD
 set_location_assignment PIN_AB6 -to UART_TXD
+set_location_assignment PIN_AB7 -to ARDUINO_IO[2]
+set_location_assignment PIN_AB8 -to ARDUINO_IO[3]
+set_location_assignment PIN_AB9 -to ARDUINO_IO[4]
+set_location_assignment PIN_Y10 -to ARDUINO_IO[5]
+set_location_assignment PIN_AA11 -to ARDUINO_IO[6]
+set_location_assignment PIN_AA12 -to ARDUINO_IO[7]
+set_location_assignment PIN_AB17 -to ARDUINO_IO[8]
+set_location_assignment PIN_AA17 -to ARDUINO_IO[9]
+set_location_assignment PIN_AB19 -to ARDUINO_IO[10]
+set_location_assignment PIN_AA19 -to ARDUINO_IO[11]
+set_location_assignment PIN_Y19 -to ARDUINO_IO[12]
+set_location_assignment PIN_AB20 -to ARDUINO_IO[13]
+set_location_assignment PIN_AB21 -to ARDUINO_IO[14]
+set_location_assignment PIN_AA20 -to ARDUINO_IO[15]
 set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to UART_RXD
 set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to UART_TXD
 set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to HEX0[6]
@@ -350,6 +364,8 @@ set_location_assignment PIN_V22 -to DRAM_LDQM
 set_location_assignment PIN_U22 -to DRAM_RAS_N
 set_location_assignment PIN_J21 -to DRAM_UDQM
 set_location_assignment PIN_V20 -to DRAM_WE_N
+set_global_assignment -name SYSTEMVERILOG_FILE sd_controller.sv
+set_global_assignment -name SYSTEMVERILOG_FILE crc7.sv
 set_global_assignment -name SYSTEMVERILOG_FILE memory_mapper.sv
 set_global_assignment -name SYSTEMVERILOG_FILE board_io.sv
 set_global_assignment -name SYSTEMVERILOG_FILE sdram.sv
@@ -365,4 +381,23 @@ set_global_assignment -name SYSTEMVERILOG_FILE SevenSeg.sv
 set_global_assignment -name QIP_FILE cpu_clk.qip
 set_global_assignment -name SIGNALTAP_FILE output_files/stp1.stp
 set_global_assignment -name SIGNALTAP_FILE output_files/stp2.stp
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[15]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[14]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[13]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[12]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[11]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[10]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[9]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[8]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[7]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[6]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[5]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[4]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[3]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[2]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[1]
+set_instance_assignment -name IO_STANDARD "3.3-V LVTTL" -to ARDUINO_IO[0]
+set_instance_assignment -name WEAK_PULL_UP_RESISTOR ON -to ARDUINO_IO[11]
+set_instance_assignment -name WEAK_PULL_UP_RESISTOR ON -to ARDUINO_IO[12]
+set_global_assignment -name SIGNALTAP_FILE output_files/sd.stp
 set_instance_assignment -name PARTITION_HIERARCHY root_partition -to | -section_id Top
\ No newline at end of file
diff --git a/hw/fpga/super6502.sv b/hw/fpga/super6502.sv
index fdfdafa1ba66304c831739e971b75a3e0eb6591f..6cb2502f4fb39e53abed2f9f33f30cf417e750bb 100644
--- a/hw/fpga/super6502.sv
+++ b/hw/fpga/super6502.sv
@@ -26,8 +26,10 @@ module super6502(
     input   logic           UART_RXD,
     output  logic           UART_TXD,
 
-    input [7:0] SW,
-    output [7:0] LED,
+    input                   [7:0] SW,
+    output  logic           [7:0] LED,
+
+    inout   logic [15: 2]   ARDUINO_IO,
 
     ///////// SDRAM /////////
     output             DRAM_CLK,
@@ -54,6 +56,16 @@ assign cpu_data_in = cpu_data;
 logic [7:0] cpu_data_out;
 assign cpu_data = cpu_rwb ? cpu_data_out : 'z;
 
+logic o_sd_cmd, i_sd_cmd;
+logic o_sd_data, i_sd_data;
+
+assign ARDUINO_IO[11] = o_sd_cmd ? 1'bz : 1'b0;
+assign ARDUINO_IO[12] = o_sd_data ? 1'bz : 1'b0;
+assign ARDUINO_IO[13] = cpu_phi2;
+assign ARDUINO_IO[6] = 1'b1;
+
+assign i_sd_cmd = ARDUINO_IO[11];
+assign i_sd_data = ARDUINO_IO[12];
 
 logic [7:0] rom_data_out;
 logic [7:0] sdram_data_out;
@@ -61,6 +73,7 @@ logic [7:0] uart_data_out;
 logic [7:0] irq_data_out;
 logic [7:0] board_io_data_out;
 logic [7:0] mm_data_out;
+logic [7:0] sd_data_out;
 
 logic sdram_cs;
 logic rom_cs;
@@ -70,10 +83,11 @@ logic irq_cs;
 logic board_io_cs;
 logic mm_cs1;
 logic mm_cs2;
+logic sd_cs;
 
 cpu_clk cpu_clk(
-	.inclk0(clk_50),
-	.c0(clk)
+    .inclk0(clk_50),
+    .c0(clk)
 );
 
 always @(posedge clk) begin
@@ -93,16 +107,16 @@ logic [23:0] mm_addr;
 assign mm_addr = {mm_MO, cpu_addr[11:0]};
 
 memory_mapper memory_mapper(
-	.clk(clk),
+    .clk(clk),
     .rst(rst),
-	.rw(cpu_rwb),
-	.cs(mm_cs1),
-	.MM_cs(mm_cs2),
-	.RS(cpu_addr[3:0]),
-	.MA(cpu_addr[15:12]),
-	.data_in(cpu_data_in),
-	.data_out(mm_data_out),
-	.MO(mm_MO)
+    .rw(cpu_rwb),
+    .cs(mm_cs1),
+    .MM_cs(mm_cs2),
+    .RS(cpu_addr[3:0]),
+    .MA(cpu_addr[15:12]),
+    .data_in(cpu_data_in),
+    .data_out(mm_data_out),
+    .MO(mm_MO)
 );
 
 addr_decode decode(
@@ -113,8 +127,9 @@ addr_decode decode(
     .uart_cs(uart_cs),
     .irq_cs(irq_cs),
     .board_io_cs(board_io_cs),
-	.mm_cs1(mm_cs1),
-	.mm_cs2(mm_cs2)
+    .mm_cs1(mm_cs1),
+    .mm_cs2(mm_cs2),
+    .sd_cs(sd_cs)
 );
 
 
@@ -129,8 +144,10 @@ always_comb begin
         cpu_data_out = irq_data_out;
     else if (board_io_cs)
         cpu_data_out = board_io_data_out;
-	else if (mm_cs1)
-		cpu_data_out = mm_data_out;
+    else if (mm_cs1)
+        cpu_data_out = mm_data_out;
+    else if (sd_cs)
+        cpu_data_out = sd_data_out;
     else
         cpu_data_out = 'x;
 end
@@ -204,6 +221,24 @@ uart uart(
     .data_out(uart_data_out)
 );
 
+sd_controller sd_controller(
+    .clk(clk),
+    .sd_clk(cpu_phi2),
+    .rst(rst),
+    .addr(cpu_addr[2:0]),
+    .data(cpu_data_in),
+    .cs(sd_cs),
+    .rw(cpu_rwb),
+
+    .i_sd_cmd(i_sd_cmd),
+    .o_sd_cmd(o_sd_cmd),
+
+    .i_sd_data(i_sd_data),
+    .o_sd_data(o_sd_data),
+
+    .data_out(sd_data_out)
+);
+
 always_ff @(posedge clk_50) begin
     if (rst)
         irq_data_out <= '0;
diff --git a/sw/io.inc65 b/sw/io.inc65
index 5e87bc20afdbf83dd4da432cc3b26a9bd2dd5c02..bad732c293127fd7baf0ddae9f5b8c3cbfa85241 100644
--- a/sw/io.inc65
+++ b/sw/io.inc65
@@ -11,4 +11,8 @@ SW          = LED
 MM_CTRL		= $7ff7
 MM_DATA		= $7fe0
 
+SD_ARG      = $7ff8
+SD_CMD      = $7ffc
+SD_DATA     = $7ffd
+
 IRQ_STATUS  = $7fff
diff --git a/sw/main.c b/sw/main.c
index 4f894d84a807584a560860b95bd34d37102987e6..dc7182ba82ade5305fbf7b85ac715ae19e694e56 100644
--- a/sw/main.c
+++ b/sw/main.c
@@ -4,15 +4,17 @@
 #include "board_io.h"
 #include "uart.h"
 #include "mapper.h"
+#include "sd_card.h"
 
 int main() {
-    int i;
-    uint8_t sw;
-    char s[16];
-    s[15] = 0;
+	int i;
+	uint8_t sw;
+	uint32_t resp;
+	char s[16];
+	s[15] = 0;
 
-    clrscr();
-    cprintf("Hello, world!\n");
+	clrscr();
+	cprintf("Hello, world!\n");
 
 	for (i = 0; i < 16; i++){
 		cprintf("Mapping %1xxxx to %2xxxx\n", i, i);
@@ -46,18 +48,67 @@ int main() {
 	cprintf("Reading from 0x4000: %x\n", *(unsigned int*)(0x4000));
 	cprintf("Reading from 0x5000: %x\n", *(unsigned int*)(0x5000));
 
-    while (1) {
+	// This will read a 512 block from the sd card.
+	// The RCA is hard coded for the one that I have on hand as responses
+	// are not implemented yet.
+	sd_card_command(0, 0);
 
-        sw = sw_read();
-        led_set(sw);
+	sd_card_command(0x000001aa, 8);
+	sd_card_resp(&resp);
+	cprintf("CMD8: %lx\n", resp);
 
-        cscanf("%15s", s);
-        cprintf("\n");
-        for (i = 0; i < 16; i++)
-            cprintf("s[%d]=%c ", i, s[i]);
-        cprintf("\n");
-        cprintf("Read string: %s\n", s);
-    }
+	sd_card_command(0, 55);
+	sd_card_command(0x40180000, 41);
+	sd_card_resp(&resp);
+	cprintf("CMD41: %lx\n", resp);
 
-    return 0;
+	sd_card_command(0, 55);
+	sd_card_command(0x40180000, 41);
+	sd_card_resp(&resp);
+	cprintf("CMD41: %lx\n", resp);
+
+	sd_card_command(0, 2);
+	sd_card_resp(&resp);
+	cprintf("CMD2: %lx\n", resp);
+
+	sd_card_command(0, 3);
+	sd_card_resp(&resp);
+	cprintf("CMD3: %lx\n", resp);
+
+	sd_card_command(0x59b40000, 7);
+	sd_card_resp(&resp);
+	cprintf("CMD7: %lx\n", resp);
+
+	sd_card_command(0x59b41000, 13);
+	sd_card_resp(&resp);
+	cprintf("CMD13: %lx\n", resp);
+
+	sd_card_command(0, 17);
+	sd_card_resp(&resp);
+	cprintf("CMD17: %lx\n", resp);
+
+
+	while(sw_read());
+
+	sd_card_wait_for_data();
+
+	cprintf("Read data: \n");
+	for (i = 0; i < 512; i++){
+		cprintf("%c", sd_card_read_byte());
+	}
+
+	while (1) {
+
+		sw = sw_read();
+		led_set(sw);
+
+		cscanf("%15s", s);
+		cprintf("\n");
+		for (i = 0; i < 16; i++)
+			cprintf("s[%d]=%c ", i, s[i]);
+		cprintf("\n");
+		cprintf("Read string: %s\n", s);
+	}
+
+	return 0;
 }
diff --git a/sw/sd_card.h b/sw/sd_card.h
new file mode 100644
index 0000000000000000000000000000000000000000..8dbe972472d7cb6093cdcaeda98d23ac0ba9c46b
--- /dev/null
+++ b/sw/sd_card.h
@@ -0,0 +1,12 @@
+#ifndef _SD_CARD_H
+#define _SD_CARD_H
+
+#include <stdint.h>
+
+void sd_card_command(uint32_t arg, uint8_t cmd);
+
+void sd_card_resp(uint32_t* resp);
+uint8_t sd_card_read_byte();
+void sd_card_wait_for_data();
+
+#endif
\ No newline at end of file
diff --git a/sw/sd_card.s b/sw/sd_card.s
new file mode 100644
index 0000000000000000000000000000000000000000..fe4f4e2b972a289bd931816eba70eb68ba732785
--- /dev/null
+++ b/sw/sd_card.s
@@ -0,0 +1,66 @@
+.include "io.inc65"
+
+.importzp sp, sreg, ptr1
+
+.export _sd_card_command
+.export _sd_card_resp
+.export _sd_card_read_byte
+.export _sd_card_wait_for_data
+
+.autoimport on
+
+.code
+
+; Send sd card command.
+; command is in A register, the args are on the stack
+; I think the order is high byte first?
+_sd_card_command:
+    pha
+
+    jsr popeax
+    sta SD_ARG
+    stx SD_ARG+1
+    lda sreg
+    sta SD_ARG+2
+    lda sreg+1
+    sta SD_ARG+3
+
+    pla
+    sta SD_CMD
+    rts
+
+; void sd_card_resp(uint32_t* resp);
+_sd_card_resp:
+        phy
+        sta ptr1        ; store pointer
+        stx ptr1+1
+@1:     lda SD_CMD      ; wait for status flag
+        and #$01
+        beq @1
+        lda SD_ARG
+        ldy #$0
+        sta (ptr1),y
+        lda SD_ARG+1
+        iny
+        sta (ptr1),y
+        lda SD_ARG+2
+        iny
+        sta (ptr1),y
+        lda SD_ARG+3
+        iny
+        sta (ptr1),y
+        ply
+        rts
+
+_sd_card_read_byte:
+        lda SD_DATA
+        ldx #$00
+        rts
+
+_sd_card_wait_for_data:
+        pha
+@1:     lda SD_CMD      ; wait for status flag
+        and #$02
+        beq @1
+        pla
+        rts
\ No newline at end of file