;*******************************************************************************
;**                                                                           **
;**                  BlinkenPlus Software, (C) Arne Rossius                   **
;**                                                                           **
;*******************************************************************************
;
;This software is the main firmware for the following projects:
;  * BlinkenLEDs Plus (Prototype, Rev. 1 and Rev. 3)
;  * BlinkstroemAdvanced Plus
;  * ArcadeMicro
;The following projects are *NOT* (yet) supported:
;  * BlinkstroemAdvanced (by Kai Gossner, 2004)
;  * BlinkstroemAdvanced-Stream (by Kai Gossner & Arne Rossius, 2008)
;
;144 pixels: ATmega8515 @ 14.7456 MHz XTAL, Fuses: low=0x9F, high=0xC9
;            or ATmega16 @ 14.7456 MHz XTAL, Fuses: low=0x1F, high=0xC9
;520 pixels: ATmega162 @ 14.7456 MHz XTAL, Fuses: low=0xDF, high=0xD9, ext=0xFB
;USE "AVRA" VERSION >= 1.3.0 FOR ASSEMBLY, OTHER ASSEMBLERS MIGHT FAIL!
;
;PORTS (ATmega8515 or ATmega16, 144 pixels):
;  PA0-7 = LED Col 8..1 (MSB left) [BSA-Plus: LED Col 1..8 (MSB right)]
;  PB0-7 = LED Row 1..8 (MSB bottom)
;  PC0-7 = LED Col 16..9 (MSB left)
;  PD0/1 = RS232 Rx/Tx
;  PD2   = SD-Card #CS (voltage divider 1k/2k2)
;  PD3   = SD-Card D_in (MOSI) (voltage divider 1k/2k2)
;  PD4   = SD-Card D_out (MISO) (resistor 1k)
;  PD5   = SD-Card Clock (voltage divider 1k/2k2)
;  PD6/7 = LED Col 18/17 (MSB left) [Mega16: + Baud Rate jumpers (1k to +5V)]
;  PE0   = SD-Card Detect Switch [not on Mega16]
;  PE1/2 = Baud Rate jumpers (to ground) [not on Mega16 or BSA-Plus PCB Rev.1]
;
;PORTS (ATmega162, 520 pixels):
;  PA0-7 = LED Col 5..12
;  PB0-3 = LED Col 4..1
;  PB4   = SD-Card #CS (voltage divider 1k/2k2)
;  PB5   = SD-Card D_in (MOSI) (voltage divider 1k/2k2)
;  PB6   = SD-Card D_out (MISO) (resistor 1k)
;  PB7   = SD-Card Clock (voltage divider 1k/2k2)
;  PC0-7 = LED Col 20..13
;  PD0/1 = RS232 Rx/Tx
;  PD2-7 = LED Col 26..21
;  PE0   = SD-Card Detect Switch
;  PE1   = LED Row shift register data (Q1 = row 20 ... Q20 = row 1)
;  PE2   = LED Row shift register clock
;
;VERSIONS:
;  2011-12-19  Arne Rossius
;   * first version for BlinkenLEDs Plus Rev. 3
;   * added support for BlinkstroemAdvanced Plus
;   * switched from Mega16 to Mega8515
;   * reduced frame rate from 112.5 Hz to 100 Hz and sync'ed with 100 Hz ticks
;   * changed multiplexing to once per PWM step to increase refresh rate
;   * removed BlinkstroemAdvanced compatible mode (~124 Hz instead of 100 Hz)
;   * added support for SD card detect switch
;   * removed SD card sector buffer because of RAM limitations in Mega8515
;   * FAT bugfixes
;   * code clean-up
;   * more debugging output
;  2011-12-23  Arne Rossius
;   * Arcade (520 pixels) support for "ArcadeMicro" project
;   * changed name from "BlinkenLEDs Plus" to "BlinkenPlus Software"
;   * switched to packed framebuffer (2 pixels per byte)
;   * FAT and file parsing bugfixes & code clean-up
;  2011-12-27  Arne Rossius
;   * RAM buffer for stream input (fixes display for very fast streams)
;  2012-04-13  Arne Rossius
;   * BUGFIX: not all outputs correct for COLUMNS_INVERTED = 0 with 144 pixels
;   * BUGFIX: program crashed (stack messed up) on SD card error with DEBUG = 0
;   * corrected BS2BIN option for ARCADEMICRO (now set to 0)
;   * support for ATmega16 (BLplus Prototype & Rev.1)
;   * support for BLplus Rev.1 kit layout error correction
;  2012-04-14  Arne Rossius
;   * BUGFIX: program hangs after playing BS2.BIN (missing sd_finish_block)
;   * BUGFIX: program tried to play empty BS2.BIN (flag set before size check)

;TODO: SD Card timeouts during sd_init and sd_start_block

;select project (or set all to 0 to set options manually [see below]):
.equ	ARCADEMICRO = 0 ;ArcadeMicro
.equ	BSAPLUS = 0 ;BlinkstroemAdvanced Plus
.equ	BLPLUS0 = 0 ;BlinkenLEDs Plus Prototype
.equ	BLPLUS1 = 1 ;BlinkenLEDs Plus Rev. 1
.equ	BLPLUS3 = 0 ;BlinkenLEDs Plus Rev. 3

;generic options:

;Duration (in seconds) after which to return to flash animation if no valid data
;was received for the entire duration (set to 0 to disable timeout)
.equ	STREAM_TIMEOUT_VAL = 30

;Receive and display MCUF stream (when no SD Card is inserted)
.equ	STREAM_INPUT = 1

;Output an MCUF stream of the display contents (regardless of the source of the
;display contents)
.equ	STREAM_OUTPUT = 1

;Enable SD card functionality (disable to save program memory)
.equ	USE_SDCARD = 1

;Output debug messages to RS232 (no stream output possible). Debugging modes 2
;and above may affect display quality and animation playback speed!
;  0 = no debugging
;  1 = normal debugging
;  2 = heavy FAT debugging
;  3 = heavy animation playback debugging
;  4 = SD card: display <address> & {dumped}, Stream: display ? for call to getc
.equ	DEBUG = 0

;Disable scrolling messages (for faster debugging)
.equ	MESSAGE_DISABLE = 0

;===============================================================================

.if (ARCADEMICRO) ;ArcadeMicro
	.equ	ARCADE = 1
	.equ	MEGA16 = 0
	.equ	BLPLUS_REV1_CORRECTION = 0
	.equ	COLUMNS_INVERTED = 1
	.equ	REVERSE_PORTA = 0
	.equ	JUMPERS = 0
	.equ	BS2BIN = 0
	.macro	_DIRNAME
	.db "ARCADE     ",0
	.endmacro
.elseif (BSAPLUS) ;BlinkstroemAdvanced Plus
	.equ	ARCADE = 0
	.equ	MEGA16 = 0
	.equ	BLPLUS_REV1_CORRECTION = 0
	.equ	COLUMNS_INVERTED = 1
	.equ	REVERSE_PORTA = 1
	.equ	JUMPERS = 0
	.equ	BS2BIN = 1
	.macro	_DIRNAME
	.db "BLPLUS     ",0
	.endmacro
.elseif (BLPLUS0) ;BlinkenLEDs Plus Prototype
	.equ	ARCADE = 0
	.equ	MEGA16 = 1
	.equ	BLPLUS_REV1_CORRECTION = 0
	.equ	COLUMNS_INVERTED = 0
	.equ	REVERSE_PORTA = 0
	.equ	JUMPERS = 0
	.equ	BS2BIN = 1
	.macro	_DIRNAME
	.db "BLPLUS     ",0
	.endmacro
.elseif (BLPLUS1) ;BlinkenLEDs Plus Rev. 1
	.equ	ARCADE = 0
	.equ	MEGA16 = 1
	.equ	BLPLUS_REV1_CORRECTION = 1
	.equ	COLUMNS_INVERTED = 1
	.equ	REVERSE_PORTA = 0
	.equ	JUMPERS = 1
	.equ	BS2BIN = 1
	.macro	_DIRNAME
	.db "BLPLUS     ",0
	.endmacro
.elseif (BLPLUS3) ;BlinkenLEDs Plus Rev. 3
	.equ	ARCADE = 0
	.equ	MEGA16 = 0
	.equ	BLPLUS_REV1_CORRECTION = 0
	.equ	COLUMNS_INVERTED = 1
	.equ	REVERSE_PORTA = 0
	.equ	JUMPERS = 1
	.equ	BS2BIN = 1
	.macro	_DIRNAME
	.db "BLPLUS     ",0
	.endmacro
.else
	.warning "No known project selected."
	
	;assemble for Arcade (520 pixel) projects
	.equ	ARCADE = 0
	
	;use ATmega16 controller (144-pixel projects only)
	.equ	MEGA16 = 1
	
	;correction for BlinkenLEDs Plus Kit Rev.1 (Mega16) layout error
	.equ	BLPLUS_REV1_CORRECTION = 0
	
	;Column Output Polarity: active-low column outputs when set to 1
	.equ	COLUMNS_INVERTED = 1
	
	;reverse PORT A bit order (144-pixel projects only)
	.equ	REVERSE_PORTA = 0
	
	;enable baud rate jumpers on PORT E (144-pixel projects only)
	.equ	JUMPERS = 0
	
	;play BS2.BIN file in root dir if animation directory not present
	.equ	BS2BIN = 0
	
	;directory name in which to look for animations (exactly 11 characters,
	;last three are the filename extension, padded with spaces, zero-
	;terminated, examples:
	;  ANIMS      = "ANIMS      ",0  (standard directory name)
	;  ANIM.DIR   = "ANIM    DIR",0  (directory name with extension)
	;  Animations = "ANIMAT~1   ",0  (long name, no similar names exist)
	.macro _DIRNAME
	.db "ARCADE     ",0
	.endmacro

.endif
	
;===============================================================================

.if (ARCADE)
	.message "Assembling for 520-pixel (Arcade) projects."
	.equ	WIDTH = 26
	.equ	HEIGHT = 20
	.equ	PIXELS = 520
	.include "../include/m162def.inc"
	.equ	UCSRA = UCSR0A
	.equ	UCSRB = UCSR0B
	.equ	UBRRL = UBRR0L
	.equ	UBRRH = UBRR0H
.else
	.message "Assembling for 144-pixel (Blinkenlights) projects."
	.equ	WIDTH = 18
	.equ	HEIGHT = 8
	.equ	PIXELS = 144
  .if (MEGA16)
	.include "../include/m16def.inc"
  .else
	.include "../include/m8515def.inc"
  .endif
.endif

;R1:R0 = 'mul' result
.def	size1 = R2
.def	size2 = R3
.def	size3 = R4
.def	size4 = R5
.def	sectorL = R6
.def	sectorH = R7
.def	framelen = R8
.def	rowlen = R9
.def	WL = R10 ;WH:WL = general purpose 16 bit register
.def	WH = R11
.def	W1 = R10 ;W4:W3:W2:W1 = general purpose 32 bit register
.def	W2 = R11
.def	W3 = R12
.def	W4 = R13

.def	zero = R15

.def	temp = R16
.def	temp2 = R17
.def	count = R18
.def	pwm = R19
.def	timeL = R20
.def	timeH = R21
.def	row = R22
.def	flags = R23
	.equ	fFlashAnimation = 0 ;flash animation enabled
	.equ	fActiveFrame = 1 ;active frame (currently being displayed), 0 or 1
	.equ	fBS2BIN = 2 ;BS2.BIN file found
	.equ	fSDHC = 3 ;SDHC card (uses sector address, not byte address)
	.equ	fFAT32 = 4 ;FAT32 file system
	.equ	fNewFrame = 5 ;new frame ready for display
	.equ	fReadError = 6 ;SD card read error (usually [premature] EOF)
	.equ	fCardRejected = 7 ;SD card rejected, don't try again until removed
.def	Z3 = R24 ;additional bytes for 32 bit Z pointer (SD card address)
.def	Z4 = R25
;R27:R26 = X pointer (general purpose word register, RAM pointer for copy/compare)
;R29:R28 = Y pointer (sector RAM pointer, RAM pointer for copy/compare)
;R31:R30 = Z pointer (flash pointer)
.def	Z1 = R30
.def	Z2 = R31

;SD card pins
.if (ARCADE)
	;520-pixel projects use hardware SPI on PORTB
	.equ	SD_PORT = PORTB
	.equ	SD_CS = 4
.else
	;144-pixel projects use software SPI on PORTD
	.equ	SD_PORT = PORTD
	.equ	SD_PIN = PIND
	.equ	SD_CS = 2
	.equ	SD_MOSI = 3
	.equ	SD_MISO = 4
	.equ	SD_CK = 5
.endif
.equ	SD_DETECT = 0 ;Card Detect switch pin on PORTE

;===============================================================================

.macro _breq
	;branch if equal, long distance
	brne	PC+2
	rjmp	@0
.endmacro

.macro _brne
	;branch if not equal, long distance
	breq	PC+2
	rjmp	@0

.endmacro

.macro _brlo
	;branch if lower, long distance
	brsh	PC+2
	rjmp	@0
.endmacro

.macro _brsh
	;branch if same or higher, long distance
	brlo	PC+2
	rjmp	@0
.endmacro

.macro _clr_w
	;clear word
	clr	@0L
	clr	@0H
.endmacro

.macro _clr_d
	;clear double-word
	clr	@01
	clr	@02
	clr	@03
	clr	@04
.endmacro

.macro _ldi_w
	;load immediate word
	ldi	@0L, LOW(@1)
	ldi	@0H, HIGH(@1)
.endmacro

.macro _ldi_d
	;load immediate double-word
	ldi	@01, BYTE1(@1)
	ldi	@02, BYTE2(@1)
	ldi	@03, BYTE3(@1)
	ldi	@04, BYTE4(@1)
.endmacro

.macro _mov_d
	;move double-word
	movw	@02:@01, @12:@11
	movw	@04:@03, @14:@13
.endmacro

.macro _lsr_w
	;shift right word
	lsr	@0H
	ror	@0L
.endmacro

.macro _lsl_w
	;shift left word
	lsl	@0L
	rol	@0H
.endmacro

.macro _lsl_d
	;shift left double-word
	lsl	@01
	rol	@02
	rol	@03
	rol	@04
.endmacro

.macro _sub_w
	;subtract word
	sub	@0L, @1L
	sbc	@0H, @1H
.endmacro

.macro _subi_w
	;subtract immediate word from high registers
	subi	@0L, LOW(@1)
	sbci	@0H, HIGH(@1)
.endmacro

.macro _subi_d
	;subtract immediate double-word from high registers
	subi	@01, BYTE1(@1)
	sbci	@02, BYTE2(@1)
	sbci	@03, BYTE3(@1)
	sbci	@04, BYTE4(@1)
.endmacro

.macro _addi_w
	;add immediate word to high registers
	subi	@0L, LOW(-(@1))
	sbci	@0H, HIGH(-(@1))
.endmacro

.macro _addi_d
	;add immediate double-word to high registers
	subi	@01, BYTE1(-(@1))
	sbci	@02, BYTE2(-(@1))
	sbci	@03, BYTE3(-(@1))
	sbci	@04, BYTE4(-(@1))
.endmacro

.macro _lds_w
	;load from immediate RAM address word
	lds	@0L, @1
	lds	@0H, @1+1
.endmacro

.macro _lds_d
	;load from immediate RAM address double-word
	lds	@01, @1
	lds	@02, @1+1
	lds	@03, @1+2
	lds	@04, @1+3
.endmacro

.macro _ldd_w
	;load from indexed RAM pointer word
	ldd	@0L, @1
	ldd	@0H, @1+1
.endmacro

.macro _ldd_d
	;load from indexed RAM pointer double-word
	ldd	@01, @1
	ldd	@02, @1+1
	ldd	@03, @1+2
	ldd	@04, @1+3
.endmacro

.macro _sts_w
	;store to immediate RAM address word
	sts	@0, @1L
	sts	@0+1, @1H
.endmacro

.macro _sts_d
	;store to immediate RAM address double-word
	sts	@0, @11
	sts	@0+1, @12
	sts	@0+2, @13
	sts	@0+3, @14
.endmacro

.macro _push_w
	;push word
	push	@0L
	push	@0H
.endmacro

.macro _push_d
	;push double-word
	push	@01
	push	@02
	push	@03
	push	@04
.endmacro

.macro _pop_w
	;pop word
	pop	@0H
	pop	@0L
.endmacro

.macro _pop_d
	;pop double-word
	pop	@04
	pop	@03
	pop	@02
	pop	@01
.endmacro

.macro _tst_w
	;compare word to zero
	cp	@0L, zero
	cpc	@0H, @0L
.endmacro

.macro _tst_d
	;compare double-word to zero
	cp	@01, zero
	cpc	@02, @01
	cpc	@03, @01
	cpc	@04, @01
.endmacro

.macro activeframe
	;load active frame address
	_ldi_w	@0, RAM_Frame0
	sbrs	flags, fActiveFrame
	rjmp	activeframe_end
	_ldi_w	@0, RAM_Frame1
activeframe_end:
.endmacro

.macro inactiveframe
	;load inactive (buffer) frame address
	_ldi_w	@0, RAM_Frame1
	sbrs	flags, fActiveFrame
	rjmp	inactiveframe_end
	_ldi_w	@0, RAM_Frame0
inactiveframe_end:
.endmacro

;===============================================================================

.dseg
	RAM_Frame0: .byte PIXELS/2 ;one frame
	RAM_Frame1: .byte PIXELS/2 ;another frame
	RAM_MuxAddress: .byte 2 ;current address within active frame
	RAM_Duration: .byte 2 ;duration of next frame
	
	RAM_Line: .byte 80 ;one line of text
	RAM_String: .byte 10 ;one short line of text (for string comparision)
	RAM_BitsPerPixel: .byte 1 ;bits per pixel in BML file
	
	RAM_FAT_Start: .byte 4 ;start address of first FAT
	RAM_FAT_Base: .byte 4 ;start address of first cluster (cluster #2)
	RAM_FAT_Clustersize: .byte 1 ;sectors per cluster
	RAM_FAT_Cluster: .byte 4 ;current cluster number
	RAM_FAT_RemainingSectors: .byte 1 ;remaining sectors in current cluster
	RAM_FAT_Filestart: .byte 4 ;first cluster of file (for rewinding file/dir)
	RAM_FAT_Filesize: .byte 4 ;backup of file size (for rewinding file)
	
	RAM_RxBuffer: .byte PIXELS/2 ;buffer for stream reception
	RAM_Timeout_Stream: .byte 1 ;timeout counter for Stream display
	RAM_Timeout_RS232: .byte 1 ;timeout counter for RS232 reception
	RAM_TxPos: .byte 2 ;position in frame for stream output

	RAM_Stack: .byte 96

;===============================================================================

.cseg
.org 0x000
	rjmp	reset
	
.if (STREAM_OUTPUT && (ARCADE == 0))
  .org UDREaddr
	rjmp	uart_tx
.endif

.org OC0addr
	rjmp	oc0
	
.if (STREAM_OUTPUT && ARCADE)
  .org UDRE0addr
	rjmp	uart_tx
.endif

;===============================================================================

.if (USE_SDCARD)
charset:
	;8x8 pixel charset, chars 0x20 to 0x7E, 8 bytes per char:
	;bytes = columns (left to right), MSB = bottom pixel
	.include "charset8x8.asm"
.endif

;===============================================================================

.if (ARCADE)

;Example PWM durations (for 520-pixel projects):
;0, 4, 8, 13, 20, 34, 63, 115  ;equivalent to old optimized exponential
;0, 1, 6, 13, 24, 37, 53, 72 ;Gamma 2.0 (y = x^2 * 72/49, x = [0..7])
;0, 2, 6, 13, 24, 37, 53, 72 ;Safe Gamma 2.0 (delta >= 2)
;0, 1, 3, 9, 18, 31, 49, 72  ;Gamma 2.5
;absolute minimum step between two values: 4

	.equ	PWMVAL_0 = 0 ;don't change this value
	.equ	PWMVAL_1 = 4
	.equ	PWMVAL_2 = 8
	.equ	PWMVAL_3 = 13
	.equ	PWMVAL_4 = 20
	.equ	PWMVAL_5 = 34
	.equ	PWMVAL_6 = 63
	.equ	PWMVAL_7 = 115 ;don't change this value (timing depends on it)
	
.else

;Example PWM durations (for 144-pixel projects):
;0, 1, 3, 6, 11, 21, 39, 72  ;exponential, base 1.8
;0, 2, 5, 8, 12, 21, 39, 72  ;equivalent to old optimized safe exponential
;0, 1, 6, 13, 24, 37, 53, 72 ;Gamma 2.0 (y = x^2 * 72/49, x = [0..7])
;0, 2, 6, 13, 24, 37, 53, 72 ;Safe Gamma 2.0 (delta >= 2)
;0, 1, 3, 9, 18, 31, 49, 72  ;Gamma 2.5
;absolute minimum step between two values:    1
;recommended minimum step between two values: 2 ("safe")

	.equ	PWMVAL_0 = 0 ;don't change this value
	.equ	PWMVAL_1 = 2
	.equ	PWMVAL_2 = 5
	.equ	PWMVAL_3 = 8
	.equ	PWMVAL_4 = 12
	.equ	PWMVAL_5 = 21
	.equ	PWMVAL_6 = 39
	.equ	PWMVAL_7 = 72 ;don't change this value (timing depends on it)
	
.endif
	
pwm_duration:
	;(PWM value 0 doesn't exist [LED off])
	.db 0,                   PWMVAL_1-PWMVAL_0-1
	.db PWMVAL_2-PWMVAL_1-1, PWMVAL_3-PWMVAL_2-1
	.db PWMVAL_4-PWMVAL_3-1, PWMVAL_5-PWMVAL_4-1
	.db PWMVAL_6-PWMVAL_5-1, PWMVAL_7-PWMVAL_6-1

;===============================================================================

.if (STREAM_INPUT || STREAM_OUTPUT)

mcuf_header:
	;MCUF header for transmitted frames
	.db 0x23, 0x54, 0x26, 0x66 ;magic
	.db 0x00, HEIGHT, 0x00, WIDTH ;height, width
	.db 0x00, 0x01, 0x00, 0x07 ;1 channel, maxval 0x07 (8 grayscales)

.endif

;--------------------

.if (STREAM_OUTPUT)

uart_tx: ;UART data register empty interrupt => send next byte
	cbi	UCSRB, UDRIE ;disable UDR empty interrupt
	;sei <== TODO: does this work?
	_push_w	Y
	_push_w	Z
	in	YL, SREG
	push	YL
	
	_lds_w	Z, RAM_TxPos
	cpi	ZL, 12 ;header length
	cpc	ZH, zero
	brsh	uart_tx_frame
	
	;send header
	_addi_w	Z, mcuf_header*2
	lpm	YL, Z
	out	UDR, YL
	rjmp	uart_tx_end
	
uart_tx_frame:
	;send frame data
	activeframe Y
	sbiw	ZH:ZL, 12 ;frame data starts at offset 12
	bst	ZL, 0
	_lsr_w	Z
	add	YL, ZL
	adc	YH, ZH
	ld	ZL, Y
	brtc	PC+2
	swap	ZL
	andi	ZL, 0x07
	out	UDR, ZL
	
uart_tx_end:
	_lds_w	Z, RAM_TxPos
	adiw	ZH:ZL, 1
	_ldi_w	Y, PIXELS + 12
	cp	ZL, YL
	cpc	ZH, YH
	brsh	PC+2
	sbi	UCSRB, UDRIE ;re-enable UDR empty interrupt
	brlo	PC+3
	clr	ZL
	clr	ZH
	_sts_w	RAM_TxPos, Z
	
	pop	ZL
	out	SREG, ZL
	_pop_w	Z
	_pop_w	Y
	reti
	
.endif ;if (STREAM_OUTPUT)

;===============================================================================

.if (ARCADE)
	;========================================
	;==                                    ==
	;==  520 pixel output interrupt        ==
	;==                                    ==
	;========================================

.equ	SHIFT_DATA = 1
.equ	SHIFT_CLOCK = 2

.macro _set_led_A ;LEDs 26 to 21
	ld	temp, -Z
	cp	temp, pwm
	ror	R0
	swap	temp
	cp	temp, pwm
	ror	R0
.endmacro

.macro _set_led_B ;LEDs 20 to 13
	ld	temp, -Z
	cp	temp, pwm
	ror	R1
	swap	temp
	cp	temp, pwm
	ror	R1
.endmacro

.macro _set_led_C ;LEDs 12 to 5
	ld	temp, -Z
	cp	temp, pwm
	rol	R2
	swap	temp
	cp	temp, pwm
	rol	R2
.endmacro

.macro _set_led_D ;LEDs 4 to 1
	ld	temp2, -Z
	cp	temp2, pwm
	brlo	PC+2
  .if (COLUMNS_INVERTED)
	andi	temp, ~(@0)
  .else
	ori	temp, @0
  .endif
	swap	temp2
	cp	temp2, pwm
	brlo	PC+2
  .if (COLUMNS_INVERTED)
	andi	temp, ~(@1)
  .else
	ori	temp, @1
  .endif
.endmacro

oc0:	;Timer 0 output compare interrupt (PWM steps)
	push	temp
	push	temp2
	_push_w	Z
	push	R0 ;TODO: use W1~W4 instead of R0~R3 (?)
	push	R1
	push	R2
	push	R3
	in	temp, SREG
	push	temp
	
	;decrement row
	cbi	PORTE, SHIFT_DATA
	dec	row
	brne	oc0_rowinc_end
	ldi	row, 20
	sbi	PORTE, SHIFT_DATA
	;increment PWM value ('pwm' register stores pwm value in high nibble!)
	subi	pwm, -0x30 ;PWM value sequence: 3, 6, 1, 4, 7, 2, 5, (0)
	andi	pwm, 0x70
	brne	oc0_pwminc_end ;zero: end of PWM sequence
	ldi	pwm, 0x30 ;start with first value of PWM sequence
.if (STREAM_INPUT)
	;decrement RS232 timeout counter
	lds	temp, RAM_Timeout_RS232
	tst	temp
	breq	oc0_timeout_end
	dec	temp
	sts	RAM_Timeout_RS232, temp
oc0_timeout_end:
.endif
	;decrement frame display duration if > 0
	_tst_w	time
	breq	oc0_time_zero
	_subi_w	time, 1
	brne	oc0_time_end
oc0_time_zero:
	sbrs	flags, fNewFrame
	rjmp	oc0_time_end
	;display new frame and send new frame to UART
	;swap active/inactive frame, clear fNewFrame:
	ldi	temp, 1<<fActiveFrame | 1<<fNewFrame
	eor	flags, temp
	_lds_w	time, RAM_Duration ;load duration for new frame
.if (STREAM_OUTPUT)
	sbi	UCSRB, UDRIE ;enable UDR empty interrupt (start transmitting)
.endif
oc0_time_end:
oc0_pwminc_end:
	;load start address
	activeframe Z ;max. 6 cycles
	_addi_w	Z, 520/2
	_sts_w	RAM_MuxAddress, Z
oc0_rowinc_end:
	
	;determine next interval
	mov	temp, pwm
	swap	temp
	_ldi_w	Z, pwm_duration*2
	add	ZL, temp
	adc	ZH, zero
	lpm	temp, Z
	out	OCR0, temp
	
	;load framebuffer address for current row
	_lds_w	Z, RAM_MuxAddress
	
	;set LEDs 26 to 21
	clr	R0 ;bits 0 and 1 = '0'
.if (COLUMNS_INVERTED == 0)
	dec	R0 ;bits 0 and 1 = '1'  =>  '0' after complement
.endif
	_set_led_A ;7 cycles each
	_set_led_A
	_set_led_A
	
	;set LEDs 20 to 13
	_set_led_B ;7 cycles each
	_set_led_B
	_set_led_B
	_set_led_B
	
	;set LEDs 12 to 5
	_set_led_C ;7 cycles each
	_set_led_C
	_set_led_C
	_set_led_C
	
	;LEDs off
	in	temp, PORTB
.if (COLUMNS_INVERTED)
	ldi	temp2, 0xFF
	mov	R3, temp2
	ldi	temp2, 0xFC
	ori	temp, 0x0F
.else
	com	R0
	com	R1
	com	R2
	clr	R3
	ldi	temp2, 0x00
	andi	temp, 0xF0
.endif
	out	PORTD, temp2
	out	PORTC, R3
	out	PORTA, R3
	out	PORTB, temp
	
	;set LEDs 4 to 1 [delay until transistors have turned off]
	_set_led_D 0x01, 0x02 ;9 cycles each
	_set_led_D 0x04, 0x08
	
	;select next row
	sbi	PORTE, SHIFT_CLOCK
	cbi	PORTE, SHIFT_CLOCK
	
	;delay until FETs have switched
	_sts_w	RAM_MuxAddress, Z ;store framebuffer address for next row
	
	;LEDs on with new data
	out	PORTD, R0
	out	PORTC, R1
	out	PORTA, R2
	out	PORTB, temp
	
	;return
	pop	temp
	out	SREG, temp
	pop	R3
	pop	R2
	pop	R1
	pop	R0
	_pop_w	Z
	pop	temp2
	pop	temp
	reti

.else ;if (ARCADE)
	;========================================
	;==                                    ==
	;==  144 pixel output interrupt        ==
	;==                                    ==
	;========================================

.macro _set_led_1 ;LEDs 1 to 8
	ld	temp, Z+
	swap	temp
	cp	temp, pwm ;carry set if pwm > temp
  .if (REVERSE_PORTA)
	ror	R0
  .else
	rol	R0
  .endif
	swap	temp
	cp	temp, pwm ;carry set if pwm > temp
  .if (REVERSE_PORTA)
	ror	R0
  .else
	rol	R0
  .endif
.endmacro
	
.macro _set_led_2 ;LEDs 9 to 16
	ld	temp, Z+
	swap	temp
	cp	temp, pwm ;carry set if pwm > temp
	rol	R1
	swap	temp
	cp	temp, pwm ;carry set if pwm > temp
	rol	R1
.endmacro

.macro _set_led_3 ;LEDs 17 and 18
	ld	temp, Z+
	cp	temp, pwm
	brlo	PC+2
  .if (COLUMNS_INVERTED)
	andi	temp2, ~(@1)
  .else
	ori	temp2, @1
  .endif
	swap	temp
	cp	temp, pwm
	brlo	PC+2
  .if (COLUMNS_INVERTED)
	andi	temp2, ~(@0)
  .else
	ori	temp2, @0
  .endif
.endmacro
	
oc0:	;Timer 0 output compare interrupt (PWM steps)
	push	temp
	push	temp2
	_push_w	Z
	push	R0
	push	R1
	in	temp, SREG
	push	temp
	
	;increment row
	lsl	row
	brne	oc0_rowinc_end
	ldi	row, 0x01
	;increment PWM value ('pwm' register stores pwm value in high nibble!)
	subi	pwm, -0x30 ;PWM value sequence: 3, 6, 1, 4, 7, 2, 5, (0)
	andi	pwm, 0x70
	brne	oc0_pwminc_end ;zero: end of PWM sequence
	ldi	pwm, 0x30 ;start with first value of PWM sequence
.if (STREAM_INPUT)
	;decrement RS232 timeout counter
	lds	temp, RAM_Timeout_RS232
	tst	temp
	breq	oc0_timeout_end
	dec	temp
	sts	RAM_Timeout_RS232, temp
oc0_timeout_end:
.endif
	;decrement frame display duration if > 0
	_tst_w	time
	breq	oc0_time_zero
	_subi_w	time, 1
	brne	oc0_time_end
oc0_time_zero:
	sbrs	flags, fNewFrame
	rjmp	oc0_time_end
	;display new frame and send new frame to UART
	;swap active/inactive frame, clear fNewFrame:
	ldi	temp, 1<<fActiveFrame | 1<<fNewFrame
	eor	flags, temp
	_lds_w	time, RAM_Duration ;load duration for new frame
.if (STREAM_OUTPUT)
	sbi	UCSRB, UDRIE ;enable UDR empty interrupt (start transmitting)
.endif
oc0_time_end:
oc0_pwminc_end:
	;load start address
	activeframe Z ;max. 6 cycles
	_sts_w	RAM_MuxAddress, Z
oc0_rowinc_end:
	
	;determine next interval
	mov	temp, pwm
	swap	temp
	_ldi_w	Z, pwm_duration*2
	add	ZL, temp
	adc	ZH, zero
	lpm	temp, Z
	out	OCR0, temp
	
	;load framebuffer address for current row
	_lds_w	Z, RAM_MuxAddress
	
	;set LEDs 1 to 8
	_set_led_1 ;8 cycles each
	_set_led_1
	_set_led_1
	_set_led_1
	
	;set LEDs 9 to 16
	_set_led_2 ;8 cycles each
	_set_led_2
	_set_led_2
	_set_led_2
	
	;set LEDs 17, 18
	in	temp2, PORTD
.if (COLUMNS_INVERTED)
	ori	temp2, 0xC0
.else
	andi	temp2, 0x3F
	com	R0
	com	R1
.endif
	_set_led_3 0x80, 0x40 ;9 cycles
	
.if (MEGA16)
  .if (BLPLUS_REV1_CORRECTION)
	;correction for PCB layout error in BlinkenLEDs Plus Kit Rev. 1
	;map bits [76543210] => [23456710]
	mov	temp, R0
	swap	R0
	lsl	R0
	lsl	R0
	lsl	temp
	ror	R0
	lsl	temp
	ror	R0
	lsl	temp
	ror	R0
	lsl	temp
	ror	R0
	lsl	temp
	ror	R0
	lsl	temp
	ror	R0
  .endif
.endif
	
	;old row off
	out	PORTB, zero
	
	;delay until FET is off
	_sts_w	RAM_MuxAddress, Z ;store framebuffer address for next row
	
	;output new LED data
	out	PORTA, R0
	out	PORTC, R1
	out	PORTD, temp2
	
	;delay until transistors have switched
	pop	temp
	out	SREG, temp
	pop	R1
	pop	R0
	_pop_w	Z
	pop	temp2
	pop	temp
	
	;new row on
	out	PORTB, row

	;return
	reti

.endif ;if (ARCADE)

;===============================================================================

.if (DEBUG)
	;========================================
	;==                                    ==
	;==  debugging output routines         ==
	;==                                    ==
	;========================================

.if (STREAM_OUTPUT)
	.error "STREAM_OUTPUT and DEBUG can't be set at the same time!"
.endif

.macro _puthex_cluster
	;send cluster number (16 or 32 bit)
	sbrs	flags, fFAT32
	rjmp	puthex_cluster_16bit
	mov	temp, @04
	call	puthex
	mov	temp, @03
	call	puthex
puthex_cluster_16bit:
	mov	temp, @02
	call	puthex
	mov	temp, @01
	call	puthex
.endmacro

.macro _puthex32
	;send 32 bit hex value
	mov	temp, @04
	call	puthex
	mov	temp, @03
	call	puthex
	mov	temp, @02
	call	puthex
	mov	temp, @01
	call	puthex
.endmacro

.macro _puthex16
	;send 16 bit hex value
	mov	temp, @0H
	call	puthex
	mov	temp, @0L
	call	puthex
.endmacro

.macro _putc
	;send single character
	ldi	temp, @0
	call	putc
.endmacro

.macro _putstr
	;send debug message (modifies R1:R0 and temp)
	call	putstr
	.db @0, 0
.endmacro

.macro _putstr_lf
	;send debug message, followed by line feed (modifies R1:R0 and temp)
	call	putstr
	.db @0, 0x0a, 0
.endmacro

puthex:
	;send 2-digit hex value to UART
	push	temp
	swap	temp
	rcall	puthex_sub
	pop	temp
	;(fall through to _puthex_sub)
puthex_sub:
	andi	temp, 0x0F
	ori	temp, 0x30
	cpi	temp, 0x3A
	brlo	PC+2
	subi	temp, -7
	;(fall through to putc)

putc:
	;send char to UART
	sbis	UCSRA, UDRE
	rjmp	PC-1
	out	UDR, temp
	ret
	
putstr:
	;send a string to UART (modifies R1:R0 and temp)
	;  PARAMS: 0x00-terminated string
	movw	R1:R0, ZH:ZL
	_pop_w	Z
	_lsl_w	Z
putstr_loop:
	lpm	temp, Z+
	cpi	temp, 0
	breq	putstr_end
	rcall	putc
	rjmp	putstr_loop
putstr_end:
	;return
	adiw	ZH:ZL, 1
	_lsr_w	Z
	_push_w	Z
	movw	ZH:ZL, R1:R0
	ret

.endif ;if (DEBUG)

;===============================================================================

reset:
	;========================================
	;==                                    ==
	;==  initialization                    ==
	;==                                    ==
	;========================================
	
	;set stackpointer
	_ldi_w	Z, RAMEND
	out	SPH, ZH
	out	SPL, ZL
	
	;init ports (TODO: set PD0 to output or enable pullup if RS232 disabled)
	ldi	temp, 0xFF
	out	DDRA, temp
	out	DDRC, temp
.if (COLUMNS_INVERTED)
	ldi	temp, 0xFF
.else
	ldi	temp, 0x00
.endif
	out	PORTA, temp
	out	PORTC, temp
.if (ARCADE)
	ldi	temp, 0xBF
	out	DDRB, temp
	ldi	temp, 0xFE
	out	DDRD, temp
  .if (COLUMNS_INVERTED)
	ldi	temp, 0x7F ;SPI: all high except SCK
	ldi	temp2, 0xFC
  .else
	ldi	temp, 0x70 ;SPI: all high except SCK
	ldi	temp2, 0x00
  .endif
	out	PORTB, temp
	out	PORTD, temp2
	ldi	temp, 0x06
	out	DDRE, temp
	ldi	temp, 0x01
	out	PORTE, temp
.else
	ldi	temp, 0xFF
	out	DDRB, temp
	ldi	temp, 0x00
	out	PORTB, temp
  .if (MEGA16 && (ARCADE == 0) && JUMPERS && (STREAM_INPUT || STREAM_OUTPUT || DEBUG))
	ldi	temp, 0xC3 | 1<<SD_CS ;read jumpers: set MOSI and CK to input
	out	DDRD, temp
    .if (COLUMNS_INVERTED)
	ldi	temp, 0xC0 | 1<<SD_CS | 1<<SD_MISO ;read jumpers: no pullups
    .else
	ldi	temp, 1<<SD_CS | 1<<SD_MISO ;read jumpers: no pullups
    .endif
	out	PORTD, temp
  .else
	ldi	temp, 0xC3 | 1<<SD_CS | 1<<SD_MOSI | 1<<SD_CK
	out	DDRD, temp
    .if (COLUMNS_INVERTED)
	ldi	temp, 0xC0 | 1<<SD_CS | 1<<SD_MOSI | 1<<SD_MISO
    .else
	ldi	temp, 1<<SD_CS | 1<<SD_MOSI | 1<<SD_MISO
    .endif
	out	PORTD, temp
  .endif
  .if (MEGA16 == 0)
	ldi	temp, 0x00
	out	DDRE, temp
	ldi	temp, 0x07
	out	PORTE, temp
  .endif
.endif
	
	;init timer 0 (timing and PWM):
	;  144 pixels: 72 PWM "steps" per row, 8 rows of LEDs
	;              14.7456 MHz / 256 / 72 / 8 = 100 Hz
	;  520 pixels: 115 PWM "steps" per row, 20 rows of LEDs
	;              14.7456 MHz / 64 / 115 / 20 = ~100.174 Hz
	ldi	temp, 1
	out	OCR0, temp
.if (ARCADE)
	ldi	temp, 1<<WGM01 | 0x03 ;CTC mode, Clk/64
.else
	ldi	temp, 1<<WGM01 | 0x04 ;CTC mode, Clk/256
.endif
	out	TCCR0, temp
	
	;enable timer interrupt
	ldi	temp, 1<<OCIE0
	out	TIMSK, temp
	
	;init registers
	clr	zero
	clr	flags
	_clr_w	time
	_sts_w	RAM_TxPos, time ;clear RAM_TxPos
.if (ARCADE)
	ldi	row, 1 ;last row in sequence
	ldi	pwm, 0x50 ;last PWM value in sequence
.else
	ldi	row, 0x00 ;no row
	ldi	pwm, 5 ;last PWM value in sequence
.endif
	
;TODO: delay (?)
	
.if (STREAM_INPUT || STREAM_OUTPUT || DEBUG)
	;init UART
	ldi	temp, 1<<RXEN | 1<<TXEN
	out	UCSRB, temp
	ldi	temp, 0
	out	UBRRH, temp
  .if ((ARCADE == 0) && JUMPERS)
    .if (MEGA16)
	;read baud rate jumpers on PIND (shared pins with SD card), using legacy
	;BlinkenLEDs Plus Rev.1 Baud Rates (>200k Baud impossible with MAX232!)
	in	temp2, PIND
	andi	temp2, 0x28
	ldi	temp, 15 ;57600 Baud @ 14.7456 MHz
	cpi	temp2, 0x20
	brne	PC+2
	ldi	temp, 7 ;115200 Baud @ 14.7456 MHz
	cpi	temp2, 0x08
	brne	PC+2
	ldi	temp, 3 ;230400 Baud @ 14.7456 MHz
	cpi	temp2, 0x28
	brne	PC+2
	ldi	temp, 1 ;460800 Baud @ 14.7456 MHz
      .if (COLUMNS_INVERTED)
	ldi	temp2, 0xC0 | 1<<SD_CS | 1<<SD_MOSI | 1<<SD_MISO
      .else
	ldi	temp2, 1<<SD_CS | 1<<SD_MOSI | 1<<SD_MISO
      .endif
	out	PORTD, temp2
	ldi	temp2, 0xC3 | 1<<SD_CS | 1<<SD_MOSI | 1<<SD_CK
	out	DDRD, temp2
    .else
	;read baud rate jumpers on PINE
	in	temp2, PINE
	andi	temp2, 0x06
	ldi	temp, 7 ;115200 Baud @ 14.7456 MHz
	cpi	temp2, 0x04
	brne	PC+2
	ldi	temp, 15 ;57600 Baud @ 14.7456 MHz
	cpi	temp2, 0x02
	brne	PC+2
	ldi	temp, 23 ;38400 Baud @ 14.7456 MHz
	cpi	temp2, 0x00
	brne	PC+2
	ldi	temp, 47 ;19200 Baud @ 14.7456 MHz
    .endif
  .else
    .if (MEGA16)
	ldi	temp, 15 ;57600 Baud @ 14.7456 MHz (legacy setting)
    .else
	ldi	temp, 7 ;115200 Baud @ 14.7456 MHz
    .endif
  .endif
	out	UBRRL, temp
  .if (DEBUG >= 1)
	_putc 0x0a
	_putstr_lf "RESET"
    .if ((ARCADE == 0) && JUMPERS)
	_putstr "Jumpers set to "
	in	temp, UBRRL
	cpi	temp, 1
	brne	debug_baudrate_460800end
	_putstr_lf "460800 Baud"
	rjmp	debug_baudrate_end
debug_baudrate_460800end:
	cpi	temp, 3
	brne	debug_baudrate_230400end
	_putstr_lf "230400 Baud"
	rjmp	debug_baudrate_end
debug_baudrate_230400end:
	cpi	temp, 7
	brne	debug_baudrate_115200end
	_putstr_lf "115200 Baud"
	rjmp	debug_baudrate_end
debug_baudrate_115200end:
	cpi	temp, 15
	brne	debug_baudrate_57600end
	_putstr_lf "57600 Baud"
	rjmp	debug_baudrate_end
debug_baudrate_57600end:
	cpi	temp, 23
	brne	debug_baudrate_38400end
	_putstr_lf "38400 Baud"
	rjmp	debug_baudrate_end
debug_baudrate_38400end:
	cpi	temp, 47
	brne	debug_baudrate_19200end
	_putstr_lf "19200 Baud"
	rjmp	debug_baudrate_end
debug_baudrate_19200end:
	_putstr_lf "(unknown)"
debug_baudrate_end:
    .endif
  .endif
.endif ;if (STREAM_INPUT || STREAM_OUTPUT || DEBUG)

.if (ARCADE)
	;init hardware SPI (Master mode, data mode 0, clk/2 (double speed mode)
	ldi	temp, 1<<SPE | 1<<MSTR | 0x00
	out	SPCR, temp
	ldi	temp, 1<<SPI2X
	out	SPSR, temp
.endif
	
	;enable interrupts
	sei
	
	;display splash screen
	_ldi_w	Z, initframe*2
	rcall	frame_load
	
;-------------------------------------------------------------------------------

;if SD card support is enabled, check if an SD card is present (Mega16: try to
;initialize SD card, assume none present if init fails), otherwise fall through
;to flash animation
.if (USE_SDCARD)
  .if (MEGA16 == 0)
	sbis	PINE, SD_DETECT
  .endif
	rjmp	sdcard
.endif

;===============================================================================

flash:
	;========================================
	;==                                    ==
	;==  play animations from flash memory ==
	;==                                    ==
	;========================================
	
	;display animation from flash
.if (USE_SDCARD)
	sbi	SD_PORT, SD_CS
.endif
	_ldi_w	Z, frames*2
	_ldi_w	time, 0
.if (DEBUG >= 1)
	_putc 0x0a
	_putstr_lf "Playing animations from flash memory"
	ori	flags, 1<<fFlashAnimation
	rjmp	flash_wait
.endif

flash_continue:
	ori	flags, 1<<fFlashAnimation
.if (DEBUG >= 1)
	_putc 0x0a
	_putstr_lf "Continue playing animations from flash memory"
.endif

flash_wait:
	;wait until current frame time has elapsed
.if (DEBUG >= 3)
	_putstr " [waiting "
	_puthex16 time
	_putc ']'
.endif
flash_wait_loop:
.if (USE_SDCARD && (MEGA16 == 0))
	sbic	PINE, SD_DETECT
	rjmp	flash_no_sdcard
	sbrs	flags, fCardRejected
	rjmp	sdcard
	rjmp	flash_sdcard_end
flash_no_sdcard:
	andi	flags, LOW(~(1<<fCardRejected))
flash_sdcard_end:
.endif
.if (STREAM_INPUT)
	sbic	UCSRA, RXC ;jump to stream mode if UART received something
	rjmp	stream
.endif
	sbrc	flags, fNewFrame
	rjmp	flash_wait_loop
.if (DEBUG >= 3)
	_putstr_lf " OK"
.endif
	
	rcall	frame_load
	rjmp	flash_wait
	
;--------------------

frame_loadfirst:
	;load first frame from flash
	_ldi_w	Z, frames*2
frame_load:
	;load and display next frame from flash (Z)
	lpm	YL, Z+
	lpm	YH, Z+
	_tst_w	Y ;0x0000 = end marker
	breq	frame_loadfirst ;restart animation with first frame
	_sts_w	RAM_Duration, Y
.if (DEBUG >= 3)
	_putstr "    New Frame, duration: "
	_puthex16 Y
.endif
	inactiveframe Y
	_ldi_w	X, PIXELS/2
frame_load_loop:
	lpm	temp, Z+
	st	Y+, temp
	sbiw	XH:XL, 1
	brne	frame_load_loop
	ori	flags, 1<<fNewFrame
	ret
	
;===============================================================================

.if (STREAM_INPUT)

stream:
	;========================================
	;==                                    ==
	;==  display RS232 MCUF stream         ==
	;==                                    ==
	;========================================
	
	;RS232 Stream
	_push_w	Z ;backup flash position
	ldi	temp, 0
	sts	RAM_Timeout_Stream, temp
stream_framestart:
	;receive and check header
	ldi	temp, 100 ;timeout 1 s
	sts	RAM_Timeout_RS232, temp
	ldi	count, 11
	_ldi_w	Z, mcuf_header*2
stream_header:
	rcall	uart_getc
	lpm	temp2, Z+
	cp	temp, temp2
	brne	stream_error
	dec	count
	brne	stream_header
	;maxval LSB: must be a power of two minus one (range 1 to 255)
	rcall	uart_getc
	cpi	temp, 0
	breq	stream_error
	mov	temp2, temp
	ldi	count, -4 ;count determines shift for data bytes: neg=L, pos=R
stream_maxval_check:
	inc	count
	lsr	temp2
	brcs	stream_maxval_check
	cpi	temp2, 0
	brne	stream_error
	rjmp	stream_data
	
stream_timeout:
	;timeout waiting for next frame: return to flash
.if (DEBUG >= 1)
	_putstr_lf "Stream Timeout"
	rjmp	stream_exit
.endif
	
stream_rs232_timeout:
	;timeout waiting for data: return to flash if no previous frame received
.if (DEBUG >= 3)
	_putstr_lf "RS232 Timeout"
	rjmp	stream_error_end
.endif
stream_error:
	;received invalid data
.if (DEBUG >= 1)
	_putstr_lf "RS232 Error"
stream_error_end:
.endif
	;a valid frame was received before: wait for next frame
	sbrs	flags, fFlashAnimation
	rjmp	stream_framestart
	;no valid frame received yet: return to flash animation mode
stream_exit:
	_pop_w	Z ;restore last flash position
	rjmp	flash_continue
	
;--------------------

uart_getc:
.if (DEBUG >= 4)
	_putc	'?'
.endif
uart_getc_wait:
.if (USE_SDCARD && (MEGA16 == 0))
	;poll "card detect" switch
	sbic	PINE, SD_DETECT
	rjmp	uart_getc_no_sdcard
	sbrc	flags, fCardRejected
	rjmp	uart_getc_sdcard_end
	;SD-card inserted
	pop	temp ;remove return address from stack
	pop	temp
	rjmp	sdcard
uart_getc_no_sdcard:
	andi	flags, LOW(~(1<<fCardRejected))
uart_getc_sdcard_end:
.endif
	;check for timeout
	lds	temp, RAM_Timeout_RS232
	tst	temp
	breq	uart_getc_timeout
	;check for reception
	sbis	UCSRA, RXC
	rjmp	uart_getc_wait
	ldi	temp, 10 ;timeout 100 ms
	sts	RAM_Timeout_RS232, temp
	in	temp, UDR
	ret
	
uart_getc_timeout:
	;timeout while waiting for RS232 reception
	pop	temp ;remove return address from stack
	pop	temp
	lds	temp, RAM_Timeout_Stream
	tst	temp
	breq	stream_rs232_timeout
	dec	temp
	sts	RAM_Timeout_Stream, temp
.if (DEBUG >= 3)
	mov	temp2, temp
	_putc '['
	mov	temp, temp2
	rcall	puthex
	_putstr "] "
	tst	temp2
.endif
	_breq	stream_timeout
	rjmp	stream_rs232_timeout
	
	
;--------------------

stream_data:
	;receive data
	_ldi_w	Y, RAM_RxBuffer
	_ldi_w	X, PIXELS/2
stream_receive:
	rcall	uart_getc
	rcall	stream_shift
	mov	ZL, temp
	rcall	uart_getc
	rcall	stream_shift
	swap	temp
	or	temp, ZL
	st	Y+, temp
	sbiw	XH:XL, 1
	brne	stream_receive
	;frame complete
.if (DEBUG >= 1)
	sbrs	flags, fFlashAnimation
	rjmp	stream_debug_end
	_putc 0x0a
	_putstr_lf "MCUF Stream detected, stopping playback from flash memory"
stream_debug_end:
.endif
	andi	flags, ~(1<<fFlashAnimation) ;disable flash animation
.if (DEBUG >= 3)
	_putstr_lf "  New MCUF frame received"
.endif
	andi	flags, ~(1<<fNewFrame)
	_ldi_w	time, 0
	_sts_w	RAM_Duration, time
	;copy new frame to framebuffer
	_ldi_w	X, PIXELS/2
	_ldi_w	Y, RAM_RxBuffer
	inactiveframe Z
stream_copy:
	ld	temp, Y+
	st	Z+, temp
	sbiw	XH:XL, 1
	brne	stream_copy
	ori	flags, 1<<fNewFrame
	;done, wait for next frame
	ldi	temp, STREAM_TIMEOUT_VAL
	sts	RAM_Timeout_Stream, temp
	rjmp	stream_framestart
	
;--------------------

stream_shift:
	;convert to 3 bits per pixel by shifting received byte
	tst	count
	breq	stream_shift_end ;count zero: don't shift
	mov	R0, count
	brmi	stream_shift_left
stream_shift_right: ;count positive: shift right
	lsr	temp
	dec	R0
	brne	stream_shift_right
	rjmp	stream_shift_end
stream_shift_left: ;count negative: shift left
	lsl	temp
	inc	R0
	brne	stream_shift_left
stream_shift_end:
	andi	temp, 0x07
	ret
	
.endif ;if (STREAM_INPUT)
	
;===============================================================================

.if (USE_SDCARD)
	;========================================
	;==                                    ==
	;==  play animations from SD card      ==
	;==                                    ==
	;========================================

message:
	;display scrolling text message
	;  PARAMS: 0x00-terminated string (chars 0x20 to 0x7E only)
	;  modifies: R1:R0, temp, temp2, count, X, Y
	movw	R1:R0, ZH:ZL
	_pop_w	Z
	_lsl_w	Z
	
	;clear frame
	andi	flags, ~(1<<fNewFrame)
	_clr_w	time
	activeframe X
	ldi	count, PIXELS/4
message_clear:
	st	X+, zero
	st	X+, zero
	dec	count
	brne	message_clear
	
message_loop:
	;get next char from flash
	lpm	temp, Z+
	cpi	temp, 0
	breq	message_end
	subi	temp, 0x20
	clr	temp2 ;multiply char # by 8 (8 bytes/char)
	lsl	temp
	rol	temp2
	lsl	temp
	rol	temp2
	lsl	temp
	rol	temp2
	_push_w	Z
	_ldi_w	Z, charset*2
	add	ZL, temp
	adc	ZH, temp2
	ldi	count, 8 ;8 columns/char
message_char_loop:
	lpm	temp, Z+
	rcall	message_scroll
message_delay:
	sbrc	flags, fNewFrame
	rjmp	message_delay
	dec	count
	brne	message_char_loop
	_pop_w	Z
	rjmp	message_loop
message_end:
	;continue scrolling until message has disappeared entirely (WIDTH times)
	ldi	count, WIDTH
message_end_wait:
	ldi	temp, 0
	rcall	message_scroll
message_end_delay:
	sbrc	flags, fNewFrame
	rjmp	message_end_delay
	dec	count
	brne	message_end_wait
	;return
	adiw	ZH:ZL, 1
	_lsr_w	Z
	_push_w	Z
	movw	ZH:ZL, R1:R0
	ret
	
message_scroll:
	;scroll message to the left, add new data from temp to the right
	activeframe X
	inactiveframe Y
	_push_w	Z
	ldi	temp2, HEIGHT
	mov	framelen, temp2
message_scroll_line:
	;scroll RAM contents 1 pixel (1/2 byte) to the left
	ldi	temp2, WIDTH/2 - 1
	ld	ZL, X+
	swap	ZL
message_scroll_pixel:
	ld	ZH, X+
	swap	ZH
	push	ZH
	andi	ZL, 0x0F
	andi	ZH, 0xF0
	or	ZL, ZH
	st	Y+, ZL
	pop	ZL
	dec	temp2
	brne	message_scroll_pixel
	andi	ZL, 0x0F
	sbrc	temp, 0
	ori	ZL, 0x70 ;high nibble: max. brightness (LED on)
	st	Y+, ZL
	lsr	temp
	dec	framelen
	brne	message_scroll_line
.if (MESSAGE_DISABLE)
	_ldi_w	Z, 0 ;no delay
.else
  .if (ARCADE)
	_ldi_w	Z, 2 ;20 ms
  .else
	_ldi_w	Z, 3 ;30 ms
  .endif
.endif
	_sts_w	RAM_Duration, Z
	ori	flags, 1<<fNewFrame
	_pop_w	Z
	ret
	
;--------------------

sdcard:
	call	sd_init
	cpi	temp, 0
	breq	sdcard_mbr
	;SD card init failed, play animation from flash
	rjmp	flash
	
sdcard_mbr:
	;read first sector
	_ldi_d	Z, 0
	rcall	sd_start_block
	;dump the first 446 bytes
	ldi	count, 446/2
sdcard_mbr_dump:
	rcall	sd_clock
	rcall	sd_clock
	dec	count
	brne	sdcard_mbr_dump
	;read partition table and signature, find FAT16 or FAT32 partition
	_ldi_w	Y, RAM_Line
	ldi	count, 66
	rcall	sd_read_buffer
	_clr_w	sector ;512 - (446 dumped + 66 read) = 0 remaining
	rcall	sd_finish_block
	_lds_w	X, RAM_Line+64
	cpi	XL, 0x55
	_brne	sd_mbr_error
	cpi	XH, 0xAA
	_brne	sd_mbr_error
	ldi	count, 4
	_ldi_w	Y, RAM_Line
sd_partition:
	ldd	temp, Y+4 ;read partition type
.if (DEBUG >= 1)
	push	temp
	_putstr "Partition"
	ldi	temp, '5'
	sub	temp, count
	rcall	putc
	_putstr ": Type "
	pop	temp
	push	temp
	rcall	puthex
	_putc 0x0a
	pop	temp
.endif
	cpi	temp, 0x4 ;FAT16 <32MB
	_breq	sd_partition_fat16
	cpi	temp, 0x6 ;FAT16
	_breq	sd_partition_fat16
	cpi	temp, 0xB ;FAT32
	_breq	sd_partition_fat32
	cpi	temp, 0xC ;FAT32 LBA
	_breq	sd_partition_fat32
	adiw	YH:YL, 16
	dec	count
	brne	sd_partition
	
	;no FAT16/32 partition found
.if (DEBUG >= 1)
	_putstr_lf "No FAT16/32 Partition found."
.endif
	rcall	message
		.db "No FAT16/32 Partition found.",0
	ori	flags, 1<<fCardRejected
	rjmp	flash
	
sd_mbr_error:
	;faulty MBR
.if (DEBUG >= 1)
	_putstr_lf "Invalid MBR signature (not 0x55 0xAA)"
.endif
	rcall	message
		.db "Invalid MBR!",0
	ori	flags, 1<<fCardRejected
	rjmp	flash
	
	
sd_partition_fat32:
	;FAT32 partition found
.if (DEBUG >= 1)
	_putstr_lf "Found FAT32 partition."
.endif
	ori	flags, 1<<fFAT32
	rjmp	sd_partition_read

sd_partition_fat16:
	;FAT16 partition found
.if (DEBUG >= 1)
	_putstr_lf "Found FAT16 partition."
.endif
	andi	flags, ~(1<<fFAT32)

sd_partition_read:
	;read partition data from MBR partition table
	sbrc	flags, fSDHC
	rjmp	sd_partition_sdhc
	;SD: convert sector address to byte address
	clr	Z1
	ldd	Z2, Y+8 ;LBA of first sector, byte 1
	ldd	Z3, Y+9 ;LBA of first sector, byte 2
	ldd	Z4, Y+10 ;LBA of first sector, byte 3
	lsl	Z2
	rol	Z3
	rol	Z4
	rjmp	sd_partition_end
sd_partition_sdhc:
	;SDHC
	_ldd_d	Z, Y+8 ;LBA of first sector
sd_partition_end:
.if (DEBUG >= 1)
	_putstr "Partition starts at "
	_puthex32 Z
	_putc 0x0a
.endif

	;read FAT boot sector
	rcall	sd_start_block
	_ldi_w	Y, RAM_Line
	ldi	count, 48 ;read the first 48 bytes to RAM_Line
	rcall	sd_read_buffer
	rcall	sd_finish_block ;dump the rest
	lds	temp, RAM_Line+0xD ;sectors per cluster
	sts	RAM_FAT_Clustersize, temp
.if (DEBUG >= 1)
	rcall	puthex
	_putstr_lf " sectors/cluster"
.endif
	
	_lds_w	X, RAM_Line+0xE ;reserved sector count => start of first FAT
	sbiw	XH:XL, 1 ;first sector already read
	sbrc	flags, fSDHC
	rjmp	sd_fatstart_sdhc
	;SD: convert sector address to byte address
	clr	count
	_lsl_w	X
	rol	count
	add	Z2, XL
	adc	Z3, XH
	adc	Z4, count
	rjmp	sd_fatstart_end
sd_fatstart_sdhc:
	;SDHC
	add	Z1, XL
	adc	Z2, XH
	adc	Z3, zero
	adc	Z4, zero
sd_fatstart_end:
	_sts_d	RAM_FAT_Start, Z
.if (DEBUG >= 1)
	_putstr "First FAT at "
	_puthex32 Z
	_putc 0x0a
.endif
	
	;calculate address of first sector after FAT(s)
	_lds_d	size, RAM_FAT_Start ;start address of first FAT
	lds	temp, RAM_Line+0x10 ;number of FATs
	sbrc	flags, fFAT32
	rjmp	sd_sectors_per_fat_fat32
	clr	Z3
	clr	Z4
	_lds_w	Z, RAM_Line+0x16 ;sectors per FAT (FAT16)
	rjmp	sd_sectors_per_fat_end
sd_sectors_per_fat_fat32:
	_lds_d	Z, RAM_Line+0x24 ;sectors per FAT (FAT32)
sd_sectors_per_fat_end:
	sbrc	flags, fSDHC
	rjmp	sd_firstcluster_sdhc
	;SD: multiply sectors/FAT * FATs, convert result to byte offset
	_lsl_d	Z
	mul	Z1, temp
	add	size2, R0
	adc	size3, R1
	adc	size4, zero
	mul	Z2, temp
	add	size3, R0
	adc	size4, R1
	mul	temp, Z3
	add	size4, R0
	rjmp	sd_firstcluster_end
sd_firstcluster_sdhc:
	;SDHC: multiply sectors/FAT * FATs (result is sector offset)
	mul	Z1, temp
	add	size1, R0
	adc	size2, R1
	adc	size3, zero
	adc	size4, zero
	mul	Z2, temp
	add	size2, R0
	adc	size3, R1
	adc	size4, zero
	mul	Z3, temp
	add	size3, R0
	adc	size4, R1
	mul	Z4, temp
	add	size4, R0
sd_firstcluster_end:
	
	;get root directory address (and length for FAT16)
	sbrs	flags, fFAT32
	rjmp	sd_rootdir_fat16
	;FAT32: load root directory cluster to W
	_lds_d	W, RAM_Line+0x2C ;cluster number of root directory start
.if (DEBUG >= 1)
	_putstr "Cluster 2 at "
	_puthex32 size
	_putc 0x0a
	_putstr "Root directory at cluster "
	_puthex_cluster W
	_putc 0x0a
.endif
	rjmp	sd_rootdir_end
sd_rootdir_fat16:
	;FAT16: move root directory address to Z, add root dir length to 'size'
	_mov_d	Z, size
	_lds_w	Y, RAM_Line+0x11 ;number of root directory entries
	sbrc	flags, fSDHC
	rjmp	sd_rootdir_fat16_sdhc
	;SD
	ldi	temp, 32 ;32 bytes per directory entry
	mul	YL, temp
	add	size1, R0
	adc	size2, R1
	adc	size3, zero
	adc	size4, zero
	mul	YH, temp
	add	size2, R0
	adc	size3, R1
	adc	size4, zero
	rjmp	sd_rootdir_fat16_end
sd_rootdir_fat16_sdhc:
	;SDHC: root directory length must be a multiple of 512 bytes
	mov	temp, YL
	andi	temp, 0x0F ;16 entries/sector => lower 4 bits must be 0
	_brne	sd_rootdir_error
	andi	YL, 0xF0 ;Y >>= 4 (# of entries / 16 = root directory sectors)
	swap	YL
	mov	temp, YH
	andi	temp, 0x0F
	swap	temp
	or	YL, temp
	andi	YH, 0xF0
	swap	YH
	add	size1, YL
	adc	size2, YH
	adc	size3, zero
	adc	size4, zero
sd_rootdir_fat16_end:
.if (DEBUG >= 1)
	_putstr "Cluster 2 at "
	_puthex32 size
	_putc 0x0a
	_putstr "Root directory at "
	_puthex32 Z
	_putc 0x0a
.endif
sd_rootdir_end:
	_sts_d	RAM_FAT_Base, size
	
	;scan root directory
	andi	flags, ~(1<<fBS2BIN)
	sbrs	flags, fFAT32
	rjmp	sd_root16
	rjmp	sd_root32
	
sd_rootdir_error:
	;FAT16 root directory length on SDHC card is not a multiple of 512 bytes
.if (DEBUG >= 1)
	_putstr_lf "ERROR: Root directory length isn't a multiple of 512 bytes!"
.endif
	rcall	message
		.db "SDHC root dir error.",0
	ori	flags, 1<<fCardRejected
	rjmp	flash
	
;--------------------
	
sd_root16:
	;FAT16 root directory scan: find animation directory or file BS2.BIN
	rcall	sd_start_block
sd_root16_entry:
	_ldi_w	Y, RAM_Line
	ldi	count, 32
	rcall	sd_read_buffer
	_ldi_w	Y, RAM_Line
	ldd	temp, Y+0xB ;file attributes
	cpi	temp, 0x0F ;ignore LFN entries
	_breq	sd_root16_next
	ld	temp, Y
	cpi	temp, 0x00 ;no more entries
	_breq	sd_root_end
	cpi	temp, 0xE5 ;deleted file
	breq	sd_root16_next
	
	;check for directory
	movw	XH:XL, YH:YL
	rcall	str_compare
		_DIRNAME
	brne	sd_root16_blplus_end
	ldd	temp, Y+0xB ;file attributes
	sbrs	temp, 4 ;must be a directory
	rjmp	sd_root16_blplus_end
	andi	temp, ~(0x37) ;allowed attributes: readonly, hidden, system, directory, archive
	brne	sd_root16_blplus_end
	rjmp	sd_blplus
sd_root16_blplus_end:
	
.if (BS2BIN)
	;check for "BS2.BIN" file
	movw	XH:XL, YH:YL
	rcall	str_compare
		.db "BS2     BIN",0
	brne	sd_root16_bs2bin_end
	ldd	temp, Y+0xB ;file attributes
	andi	temp, ~(0x27) ;allowed attributes: readonly, hidden, system, archive
	brne	sd_root16_blplus_end
	_ldd_d	W, Y+0x1C ;read file size
	_tst_d	W
	breq	sd_root16_bs2bin_end ;ignore empty file
	ori	flags, 1<<fBS2BIN
	_sts_d	RAM_FAT_Filesize, W
	_ldd_w	W, Y+0x1a
	clr	W3
	clr	W4
	_sts_d	RAM_FAT_Filestart, W ;copy first cluster address to RAM_FAT_Filestart
sd_root16_bs2bin_end:
.endif

sd_root16_next:
	;next entry
	_tst_w	sector
	_brne	sd_root16_entry
	rcall	sd_finish_block ;TODO: this doesn't seem to be necessary, WHY?
	cp	Z1, size1 ;'size' contains root dir start address + root dir length
	cpc	Z2, size2
	cpc	Z3, size3
	cpc	Z4, size4
	brsh	sd_root_end
	rjmp	sd_root16

sd_root_end:
	;end of directory: animation directory not found,
	;use BS2.BIN file if present or display error message
	rcall	sd_finish_block
.if (BS2BIN)
	sbrc	flags, fBS2BIN
	rjmp	sd_bs2bin
.endif
	;SD card contains no usable data
.if (DEBUG >= 1)
	_putstr_lf "No usable files found."
.endif
	rcall	message
		.db "No usable files found.",0
	ori	flags, 1<<fCardRejected
	rjmp	flash
	
;--------------------

sd_root32:
	;FAT32 root directory scan: find animation directory or file BS2.BIN
	_sts_d	RAM_FAT_Cluster, W ;W contains first root dir cluster number
	rcall	sd_read_first_sector
	
sd_root32_entry:
	_ldi_w	Y, RAM_Line
	ldi	count, 32
	rcall	sd_read_buffer
	_ldi_w	Y, RAM_Line
	ld	temp, Y
	cpi	temp, 0x00 ;end of directory
	_breq	sd_root_end
	cpi	temp, 0xE5 ;deleted file
	breq	sd_root32_next
	
	;check for animation directory
	movw	XH:XL, YH:YL
	rcall	str_compare
		_DIRNAME
	brne	sd_root32_blplus_end
	ldd	temp, Y+0xB ;file attributes
	sbrs	temp, 4 ;must be a directory
	rjmp	sd_root32_blplus_end
	andi	temp, ~(0x37) ;allowed attributes: readonly, hidden, system, directory, archive
	brne	sd_root32_blplus_end
	rjmp	sd_blplus
sd_root32_blplus_end:
	
.if (BS2BIN)
	;check for "BS2.BIN" file
	movw	XH:XL, YH:YL
	rcall	str_compare
		.db "BS2     BIN",0
	brne	sd_root32_bs2bin_end
	ldd	temp, Y+0xB ;file attributes
	andi	temp, ~(0x27) ;allowed attributes: readonly, hidden, system, archive
	brne	sd_root32_bs2bin_end
	_ldd_d	W, Y+0x1C ;read file size
	_tst_d	W
	breq	sd_root32_bs2bin_end ;ignore empty file
	ori	flags, 1<<fBS2BIN
	_sts_d	RAM_FAT_Filesize, W
	ldd	W1, Y+0x1a
	ldd	W2, Y+0x1b
	ldd	W3, Y+0x14
	ldd	W4, Y+0x15
	_sts_d	RAM_FAT_Filestart, W ;copy first cluster address to RAM_FAT_Filestart
sd_root32_bs2bin_end:
.endif

sd_root32_next:
	;next entry
	_tst_w	sector
	breq	sd_root32_sector
	rjmp	sd_root32_entry
sd_root32_sector:
	rcall	sd_read_sector
	sbrs	flags, fReadError
	rjmp	sd_root32_entry
	;TODO: display error message? (what will happen with dir end on sector or cluster boundary)
	rjmp	sd_root_end
	
;--------------------

.if (BS2BIN)
sd_bs2bin:
  .if (DEBUG >= 1)
	_putstr_lf "Animation directory not found, but BS2.BIN present."
  .endif
	rcall	message
		.db "BS2.BIN",0
	_ldi_w	time, 0
sd_bs2bin_loop:
  .if (DEBUG >= 1)
	_putstr_lf "Playing /BS2.BIN"
  .endif
	_lds_d	W, RAM_FAT_Filestart
	_sts_d	RAM_FAT_Cluster, W
	_lds_d	size, RAM_FAT_Filesize
  .if (DEBUG >= 2)
	_putstr "  File size: "
	_puthex32 size
	_putc 0x0a
  .endif
	andi	flags, ~(1<<fReadError)
	rcall	sd_bin
	rcall	sd_finish_block
  .if (MEGA16 == 0)
	sbic	PINE, SD_DETECT
	rjmp	flash
  .endif
	rjmp	sd_bs2bin_loop
.endif
	
;--------------------

sd_blplus:
	;play files in animation directory
	_ldi_w	time, 0
	;close root directory
	rcall	sd_finish_block
	;open animation directory
	_ldd_w	W, Y+0x1a ;first cluster of directory (lower 2 bytes for FAT32)
	clr	W3
	clr	W4
	sbrs	flags, fFAT32
	rjmp	PC+3
	ldd	W3, Y+0x14 ;first cluster of directory (FAT32 byte 3)
	ldd	W4, Y+0x15 ;first cluster of directory (FAT32 byte 4)
	_sts_d	RAM_FAT_Filestart, W ;copy first directory cluster address to RAM_FAT_Filestart
.if (DEBUG >= 1)
	_putstr_lf "Playing animation directory:"
.endif
.if (BS2BIN)
	rcall	message
		.db "DIR",0
.endif

sd_blplus_loop:
	_lds_d	W, RAM_FAT_Filestart
	_sts_d	RAM_FAT_Cluster, W
	andi	flags, ~(1<<fReadError)
	rcall	sd_read_first_sector
sd_blplus_dir:
	_ldi_w	Y, RAM_Line
	ldi	count, 32
	rcall	sd_read_buffer
	_ldi_w	Y, RAM_Line
	ld	temp, Y
	cpi	temp, 0xE5 ;deleted file
	breq	sd_blplus_next
	cpi	temp, 0x00 ;end of directory
	breq	sd_blplus_eod
	ldd	temp, Y+0xB ;file attributes
	andi	temp, ~(0x27) ;allowed attributes: readonly, hidden, system, archive
	brne	sd_blplus_next
	
.if (DEBUG >= 1)
	;send filename
	ldi	temp2, 11
sd_blplus_name:
	ld	temp, Y+
	rcall	putc
	cpi	temp2, 4
	brne	PC+3
	ldi	temp, ' '
	rcall	putc
	dec	temp2
	brne	sd_blplus_name
	sbiw	YH:YL, 11
.endif
	
	;check file type
	_sts_d	RAM_FAT_Filesize, size
	_ldi_w	X, RAM_Line+8
	rcall	str_compare
		.db "BIN",0 ;BS2.BIN-format (Blinkstroem) file
	breq	sd_blplus_bin
	_ldi_w	X, RAM_Line+8
	rcall	str_compare
		.db "BLM",0 ;BLM (BlinkenLights Movie) file
	breq	sd_blplus_blm
	_ldi_w	X, RAM_Line+8
	rcall	str_compare
		.db "BML",0 ;BML (Blinkenlights Markup Language) file
	_breq	sd_blplus_bml
.if (DEBUG >= 1)
	_putc 0x0a
.endif

sd_blplus_next:
.if (MEGA16 == 0)
	sbic	PINE, SD_DETECT
	rjmp	flash
.endif
	_tst_w	sector
	brne	sd_blplus_dir
	rcall	sd_read_sector
	sbrs	flags, fReadError
	rjmp	sd_blplus_dir
	
sd_blplus_eod:
	rcall	sd_finish_block
.if (DEBUG >= 1)
	_putstr_lf "End of directory. Rewind:"
.endif
	rjmp	sd_blplus_loop
	
;--------------------

sd_blplus_bin:
	;play binary (BIN) file
.if (DEBUG >= 1)
	_putstr_lf " => Binary (Blinkstroem)"
.endif
	rcall	sd_file_open
	rcall	sd_bin
	rcall	sd_file_close
	rjmp	sd_blplus_next

;--------------------
	
sd_blplus_blm:
	;play BlinkenLights Movie (BLM) file
.if (DEBUG >= 1)
	_putstr_lf " => BlinkenLights Movie"
.endif
	rcall	sd_file_open
	rcall	sd_read_first_sector
	clr	framelen
	
sd_blm_read:
	;read one line from file
	rcall	sd_read_line
	lds	temp, RAM_Line
	cpi	temp, 0 ;empty string => EOF
	_breq	sd_blm_end
	cpi	temp, '@' ;duration
	breq	sd_blm_duration
	cpi	temp, '0' ;0 or 1: one row of pixel data
	breq	sd_blm_row
	cpi	temp, '1'
	breq	sd_blm_row
	rjmp	sd_blm_read
	
sd_blm_row:
	;one line of pixel data
	tst	framelen
	breq	sd_blm_read ;ignore row if frame is already complete
	inactiveframe Y
	ldi	temp, WIDTH/2 ;RAM offset: (current row) * WIDTH/2
	ldi	temp2, HEIGHT
	sub	temp2, framelen
	mul	temp, temp2
	add	YL, R0
	adc	YH, R1
	_ldi_w	X, RAM_Line
	ldi	temp, WIDTH
	mov	rowlen, temp
sd_blm_row_loop:
	ld	temp, X+
	cpi	temp, 0
	breq	sd_blm_row_end ;end of string
	ldi	temp2, 0
	cpi	temp, '0'
	breq	sd_blm_row_valid
	ldi	temp2, 7
	cpi	temp, '1'
	brne	sd_blm_row_loop ;ignore chars not '0' or '1'
sd_blm_row_valid:
	;valid pixel data
	sbrs	rowlen, 0
	rjmp	sd_blm_row_store
	swap	temp2 ;even pixel: swap value and 'OR' to previous RAM byte
	ld	temp, -Y
	or	temp2, temp
sd_blm_row_store:
	st	Y+, temp2
	dec	rowlen
	brne	sd_blm_row_loop
	
sd_blm_row_end:
	;end of row
	tst	rowlen
	breq	sd_blm_row_complete
	;not enough data: pad with zero (LEDs off)
	lsr	rowlen ;1 byte = 2 pixels
sd_blm_row_pad:
	st	Y+, zero
	dec	rowlen
	brne	sd_blm_row_pad
sd_blm_row_complete:
	dec	framelen
	breq	sd_blm_frame ;frame complete
	rjmp	sd_blm_read

sd_blm_duration:
	;duration of frame
	_ldi_w	Y, RAM_Line+1
	rcall	str2num
	cpi	temp2, 0xFF
	brne	sd_blm_duration_end
	_ldi_w	X, 1 ;default duration: 10 ms
sd_blm_duration_end:
	_sts_w	RAM_Duration, X
.if (DEBUG >= 3)
	_putstr "    New Frame, duration: "
	_puthex16 X
.endif
	ldi	temp, HEIGHT
	mov	framelen, temp
	rjmp	sd_blm_read
	
sd_blm_frame:
	;one frame complete
.if (DEBUG >= 3)
	_putstr " [waiting "
	_puthex16 time
	_putc ']'
.endif
	ori	flags, 1<<fNewFrame
sd_blm_wait:
.if (MEGA16 == 0)
	sbic	PINE, SD_DETECT
	rjmp	sd_blm_end
.endif
	sbrc	flags, fNewFrame
	rjmp	sd_blm_wait
.if (DEBUG >= 3)
	_putstr_lf " OK"
.endif
	rjmp	sd_blm_read
	
sd_blm_end:
	;end of file
	rcall	sd_file_close
	rjmp	sd_blplus_next

;--------------------

sd_blplus_bml:
.if (DEBUG >= 1)
	_putstr_lf " => Blinkenlights Markup Language"
.endif
	;play Blinkenlights Markup Language (BML) file
	rcall	sd_file_open
	rcall	sd_read_first_sector
sd_bml_start:
	;read until <bml> tag found
	rcall	sd_read_xml_tag
	sbrc	flags, fReadError
	rjmp	sd_bml_eof
	_ldi_w	X, RAM_Line
	rcall	str_compare
		.db "blm ",0
	brne	sd_bml_start
	;read <bml> tag parameters
	rcall	str2ram		;width
	.db "width=",0
	rcall	str_xml_param
	ldi	temp, WIDTH / 10
	cpi	temp2, WIDTH % 10
	cpc	XL, temp
	brne	sd_bml_invalid
	rcall	str2ram		;height
		.db "height=",0
	rcall	str_xml_param
	ldi	temp, HEIGHT / 10
	cpi	temp2, HEIGHT % 10
	cpc	XL, temp
	brne	sd_bml_invalid
	rcall	str2ram		;bits per pixel (valid values are 1..8)
		.db "bits=",0
	rcall	str_xml_param
	cpi	temp2, 0
	breq	sd_bml_invalid
	cpi	temp2, 9
	brsh	sd_bml_invalid
	sts	RAM_BitsPerPixel, temp2
.if (DEBUG >= 3)
	push	temp2
	_putstr "    "
	pop	temp
	call	puthex
	_putstr_lf " bits per pixel."
.endif
	
sd_bml_read:
	;read until <frame> tag found
	rcall	sd_read_xml_tag
	sbrc	flags, fReadError
	rjmp	sd_bml_eof
	_ldi_w	X, RAM_Line
	rcall	str_compare
		.db "frame",0
	breq	sd_bml_frame
	rjmp	sd_bml_read
	
sd_bml_invalid:
	;invalid BML file, skip
.if (DEBUG >= 1)
	_putstr_lf "  invalid file."
.endif
sd_bml_eof:
	;end of file
	rcall	sd_file_close
	rjmp	sd_blplus_next

sd_bml_frame:
	;read <frame> tag parameter (duration) and contents (pixel data)
	rcall	str2ram
		.db "duration=",0
	rcall	str_xml_param
	cpi	temp2, 0xFF
	brne	sd_bml_frame_read
	_ldi_w	X, 1 ;default duration: 10 ms
sd_bml_frame_read:
	_sts_w	RAM_Duration, X
.if (DEBUG >= 3)
	_putstr "    New Frame, duration: "
	_puthex16 X
.endif
	ldi	temp, HEIGHT
	mov	framelen, temp
sd_bml_frame_loop:
	;read until <row> tag or closing </frame> tag found
	rcall	sd_read_xml_tag
	sbrc	flags, fReadError
	rjmp	sd_bml_eof
	_ldi_w	X, RAM_Line
	rcall	str_compare
		.db "/frame",0
	_breq	sd_bml_frame_end
	_ldi_w	X, RAM_Line
	rcall	str_compare
		.db "row",0
	breq	sd_bml_frame_data
	rjmp	sd_bml_frame_loop
	
sd_bml_frame_data:
	;read one row from SD card
	tst	framelen
	breq	sd_bml_frame_loop ;ignore row if frame is already complete
	inactiveframe Y
	ldi	temp, WIDTH/2 ;RAM offset: (current row) * WIDTH/2
	ldi	temp2, HEIGHT
	sub	temp2, framelen
	mul	temp, temp2
	add	YL, R0
	adc	YH, R1
	ldi	temp, WIDTH ;'WIDTH' pixels per row
	mov	rowlen, temp
	lds	temp, RAM_BitsPerPixel
	cpi	temp, 5
	brlo	PC+2
	lsl	rowlen ;5 to 8 bits per pixel: 2 hex digits per pixel
sd_bml_data_loop:
	rcall	sd_read_byte
	sbrc	flags, fReadError
	rjmp	sd_bml_eof
	cpi	temp, '<'
	breq	sd_bml_data_end ;end of row data
	cpi	temp, '0'
	brlo	sd_bml_data_loop
	cpi	temp, '9'+1
	brlo	sd_bml_data_valid
	cpi	temp, 'A'
	brlo	sd_bml_data_loop
	cpi	temp, 'F'+1
	brlo	sd_bml_data_valid
	cpi	temp, 'a'
	brlo	sd_bml_data_loop
	cpi	temp, 'f'+1
	brsh	sd_bml_data_loop
sd_bml_data_valid:
	;valid byte (0..9, A..F or a..f)
	tst	rowlen
	breq	sd_bml_data_loop ;ignore byte if row is already complete
	;convert to 3 bits per pixel
	bst	rowlen, 0 ;save bit 0 to T flag for odd/even pixel detection
	lds	temp2, RAM_BitsPerPixel
	cpi	temp2, 5
	brlo	sd_bml_data_convert
	subi	temp2, 4
	sbrc	rowlen, 0 ;ignore every second hex digit (lower nibbles)
	rjmp	sd_bml_data_loop
	bst	rowlen, 1 ;save bit 1 to T flag for odd/even pixel detection
sd_bml_data_convert:
	;TODO: use both bytes for 5/6 bpp
	cpi	temp, 'a'
	brlo	PC+2
	subi	temp, ('a' - 'A') ;convert to uppercase
	subi	temp, '0'
	cpi	temp, 10 ;convert from ASCII to binary
	brlo	PC+2
	subi	temp, 7
	cpi	temp2, 4 ;4 (8) bits per pixel
	brne	PC+2
	lsr	temp
	cpi	temp2, 2 ;2 (6) bits per pixel
	brne	PC+4
	lsl	temp
	sbrc	temp, 2
	ori	temp, 0x01
	cpi	temp2, 1 ;1 (5) bit(s) per pixel
	brne	PC+5
	lsl	temp
	lsl	temp
	sbrc	temp, 2
	ori	temp, 0x03
	;store to RAM
	andi	temp, 0x07
	brtc	sd_bml_data_store
	swap	temp ;even pixel: swap value and 'OR' to previous RAM byte
	ld	temp2, -Y
	or	temp, temp2
sd_bml_data_store:
	st	Y+, temp
	dec	rowlen
	rjmp	sd_bml_data_loop
	
sd_bml_data_end:
	;end of row data
	dec	framelen
	tst	rowlen
	_breq	sd_bml_frame_loop
	;not enough data: pad with zero (LEDs off)
	lsr	rowlen ;1 byte = 2 pixels
	lds	temp, RAM_BitsPerPixel
	cpi	temp, 5
	brlo	PC+2
	lsr	rowlen ;compensate for 'lsl' above
sd_bml_data_pad:
	st	Y+, zero
	dec	rowlen
	rjmp	sd_bml_data_pad

sd_bml_frame_end:
	;frame is complete
.if (DEBUG >= 3)
	_putstr " [waiting "
	_puthex16 time
	_putc ']'
.endif
	ori	flags, 1<<fNewFrame
sd_bml_wait:
.if (MEGA16 == 0)
	sbic	PINE, SD_DETECT
	rjmp	sd_bml_eof
.endif
	sbrc	flags, fNewFrame
	rjmp	sd_bml_wait
.if (DEBUG >= 3)
	_putstr_lf " OK"
.endif
	rjmp	sd_bml_read
	
;--------------------

sd_bin:
	;play binary file starting at cluster RAM_FAT_Cluster with size 'size'
	rcall	sd_read_first_sector
sd_bin_frame:
	;read next frame
	_tst_w	sector
	brne	sd_bin_duration
	rcall	sd_finish_block
	rcall	sd_read_sector
	sbrc	flags, fReadError
	rjmp	sd_bin_error
sd_bin_duration:
	rcall	sd_read ;duration
	mov	XL, temp
	rcall	sd_read
	mov	XH, temp
	_lsl_w	X ;multiply by 2
	_sts_w	RAM_Duration, X
.if (DEBUG >= 3)
	_putstr "    New Frame, duration: "
	_puthex16 X
.endif
	ldi	temp, 2
	sub	sectorL, temp
	sbc	sectorH, zero
	inactiveframe Y
	ldi	XL, WIDTH
sd_bin_column:
	;read one column
	ldi	XH, HEIGHT
sd_bin_pixel:
	;read two pixels
	_tst_w	sector
	brne	sd_bin_pixel_read
	rcall	sd_finish_block
	rcall	sd_read_sector
	sbrc	flags, fReadError
	rjmp	sd_bin_error
sd_bin_pixel_read:
	rcall	sd_read
	andi	temp, 0x07
	mov	R0, temp
	rcall	sd_read
	andi	temp, 0x07
	sbrs	XL, 0
	rjmp	sd_bin_pixel_store
	;YL is odd: use high nibble of previous column's RAM addresses
	swap	R0
	swap	temp
	ld	temp2, Y
	or	R0, temp2
	ldd	temp2, Y+WIDTH/2
	or	temp, temp2
sd_bin_pixel_store:
	st	Y, R0
	std	Y+WIDTH/2, temp
	adiw	YH:YL, 2*(WIDTH/2) ;2 pixels down
	ldi	temp, 2
	sub	sectorL, temp
	sbc	sectorH, zero
	subi	XH, 2
	brne	sd_bin_pixel
	;column finished
	_subi_w	Y, PIXELS/2
	sbrc	XL, 0
	adiw	YH:YL, 1 ;increment column address after every two columns
	dec	XL
	brne	sd_bin_column
	;new frame is complete
.if (DEBUG >= 3)
	_putstr " [waiting "
	_puthex16 time
	_putc ']'
.endif
	ori	flags, 1<<fNewFrame
sd_bin_wait:
.if (MEGA16 == 0)
	sbic	PINE, SD_DETECT
	rjmp	sd_bin_eof
.endif
	sbrc	flags, fNewFrame
	rjmp	sd_bin_wait
.if (DEBUG >= 3)
	_putstr_lf " OK"
.endif
	;decrement remaining file size
	_ldi_w	X, PIXELS+2
	sub	size1, XL
	sbc	size2, XH
	sbc	size3, zero
	sbc	size4, zero
	brcs	sd_bin_eof
	breq	sd_bin_eof
	rjmp	sd_bin_frame

sd_bin_error:
	;file error (e.g. EOF before indicated file size was read)
.if (DEBUG >= 1)
	_putstr_lf "File read error!"
.endif
sd_bin_eof:
.if (DEBUG >= 3)
	_putstr_lf "  End of file."
.endif
	ret

;--------------------
	
sd_file_open:
	;"open" file from directory
	;remove return address from stack
	pop	R1
	pop	R0
	;backup all pointers of current position in directory
	_push_w	sector
	_push_d	Z
	_lds_d	W, RAM_FAT_Cluster
	_push_d	W
	lds	temp, RAM_FAT_RemainingSectors
	push	temp
.if (DEBUG >= 2)
	push	R0
	push	R1
	_putstr "  Open file (old addr="
	_puthex32 Z
	_putstr ", remain="
	_puthex16 sector
	_putstr_lf ")"
	pop	R1
	pop	R0
.endif
	;finish reading current sector
	rcall	sd_finish_block
	;load values for file into FAT registers
	_ldd_w	W, Y+0x1a ;first cluster of file (lower 2 bytes for FAT32)
	clr	W3
	clr	W4
	sbrs	flags, fFAT32
	rjmp	PC+3
	ldd	W3, Y+0x14 ;first cluster of file (FAT32 byte 3)
	ldd	W4, Y+0x15 ;first cluster of file (FAT32 byte 4)
	_sts_d	RAM_FAT_Cluster, W
	_ldd_d	size, Y+0x1c ;file size
	andi	flags, ~(1<<fReadError)
	;put return address back onto stack
	push	R0
	push	R1
	ret

;--------------------

sd_file_close:
	;"close" file (continue reading directory index)
	;dump any unread data bytes
	sbis	SD_PORT, SD_CS ;TODO: is it possible for SD_CS to be high here?
	rcall	sd_finish_block
	;remove return address from stack
	pop	R1
	pop	R0
	;restore directory pointers
	andi	flags, ~(1<<fReadError)
	pop	temp
	sts	RAM_FAT_RemainingSectors, temp
	_pop_d	W
	_sts_d	RAM_FAT_Cluster, W
	_pop_d	Z
.if (DEBUG >= 2)
	push	R0
	push	R1
	_putstr "  Close file (restore addr="
	_puthex32 Z
	pop	R1
	pop	R0
.endif
	;reload last used sector
	sbrc	flags, fSDHC
	rjmp	sd_file_close_sdhc
	;SD: subtract 0x200 from byte address
	subi	Z2, 2
	sbci	Z3, 0
	sbci	Z4, 0
	rjmp	sd_file_close_load
sd_file_close_sdhc:
	;SDHC: subtract 1 from sector address
	_subi_d	Z, 1
sd_file_close_load:
	rcall	sd_start_block
	_pop_w	sector
.if (DEBUG >= 2)
	push	R0
	push	R1
	_putstr ", remain="
	_puthex16 sector
	_putstr_lf ")"
	pop	R1
	pop	R0
.endif
	;read bytes until old position reached
	_ldi_w	Y, 512
	_sub_w	Y, sector
	breq	sd_file_close_dump_end
sd_file_close_dump:
	rcall	sd_clock
	sbiw	YH:YL, 1
	brne	sd_file_close_dump
sd_file_close_dump_end:
	;put return address back onto stack
	push	R0
	push	R1
	ret
	
;--------------------

sd_read_byte:
	;read 1 byte from SD card
	_tst_d	size
	breq	sd_read_byte_eof
	_tst_w	sector
	brne	sd_read_byte_read
	rcall	sd_finish_block
	rcall	sd_read_sector
	sbrc	flags, fReadError
	rjmp	sd_read_byte_eof
sd_read_byte_read:
	rcall	sd_read ;read 1 byte
	ldi	temp2, 1 ;decrement file size
	sub	size1, temp2
	sbc	size2, zero
	sbc	size3, zero
	sbc	size4, zero
	sub	sectorL, temp2 ;decrement sector byte counter
	sbc	sectorH, zero
	ret
	
sd_read_byte_eof:
	ori	flags, 1<<fReadError
	ldi	temp, 0
	ret

;--------------------

sd_read_line:
	;read one line of text from file (terminated by char < 0x20 or EOF)
	_ldi_w	X, RAM_Line
sd_read_line_start:
	sbrc	flags, fReadError
	rjmp	sd_read_line_error
	rcall	sd_read_byte
	cpi	temp, 0x20+1
	brlo	sd_read_line_start
	st	X+, temp
	ldi	temp, 78 ;read max. 79 chars (one already read => 78 remaining)
	mov	rowlen, temp
sd_read_line_loop:
	sbrc	flags, fReadError
	rjmp	sd_read_line_error
	rcall	sd_read_byte
	cpi	temp, 0x20
	brlo	sd_read_line_end
	tst	rowlen
	breq	sd_read_line_loop ;skip if buffer is full
	st	X+, temp
	dec	rowlen
	rjmp	sd_read_line_loop
sd_read_line_error:
sd_read_line_end:
	st	X, zero ;terminate line
	ret

;--------------------

sd_read_xml_tag:
	;read next XML tag from SD card
	;(doesn't modify Y)
	sbrc	flags, fReadError
	rjmp	sd_read_xml_tag_eof
	rcall	sd_read_byte
	cpi	temp, '<'
	brne	sd_read_xml_tag
	_ldi_w	X, RAM_Line
	ldi	temp, 79 ;read max. 79 chars
	mov	rowlen, temp
sd_read_xml_tag_loop:
	sbrc	flags, fReadError
	rjmp	sd_read_xml_tag_eof
	rcall	sd_read_byte
	cpi	temp, '>'
	breq	sd_read_xml_tag_end
	tst	rowlen
	breq	sd_read_xml_tag_loop ;skip if buffer is full
	st	X+, temp
	dec	rowlen
	rjmp	sd_read_xml_tag_loop
sd_read_xml_tag_eof:
sd_read_xml_tag_end:
	st	X, zero ;terminate string
	ret

;--------------------

sd_start_block:
	;initialize reading a 512 byte block from SD card at addr Z4:Z3:Z2:Z1,
	;increment SD card address by number of read bytes (SD)/sectors (SDHC)
.if (DEBUG >= 4)
	_putc '<'
	_puthex32 Z
.endif
	cbi	SD_PORT, SD_CS
	ldi	temp, 17 ;CMD17: read single block
	rcall	sd_command
	ldi	temp, HIGH(512) ;preload 'remaining bytes in sector' counter
	mov	sectorH, temp
	mov	sectorL, zero ;LOW(512) = 0
	sbrc	flags, fSDHC
	rjmp	sd_start_block_sdhc
	_addi_d	Z, 512 ;SD: add 512 to Z (byte address)
	rjmp	sd_start_block_wait
sd_start_block_sdhc:
	_addi_d	Z, 1 ;SDHC: add 1 to Z (sector address)
sd_start_block_wait:
.if (MEGA16 == 0)
	sbic	PINE, SD_DETECT
	rjmp	sd_read_removed
.endif
	rcall	sd_read
	cpi	temp, 0xFE ;wait until SD card ready
	brne	sd_start_block_wait
.if (DEBUG >= 4)
	_putc '>'
.endif
	ret
	
sd_read_removed:
	;SD card removed
	ori	flags, 1<<fReadError
	ldi	temp, 0x00
	ret

;--------------------

sd_finish_block:
	;dump any unread data bytes and flush CRC
.if (DEBUG >= 4)
	_putc '{'
.endif
	_tst_w	sector
	breq	sd_finish_block_crc
.if (DEBUG >= 4)
	_puthex16 sector
.endif
	ldi	temp, 1
sd_finish_block_dump:
	rcall	sd_clock
	sub	sectorL, temp
	sbc	sectorH, zero
	brne	sd_finish_block_dump
sd_finish_block_crc:
	;end of data block
	rcall	sd_clock ;flush CRC16
	rcall	sd_clock
	rcall	sd_clock ;additional dummy read
	sbi	SD_PORT, SD_CS
.if (DEBUG >= 4)
	_putc '}'
.endif
	ret

;--------------------

sd_write:
	;send one byte to SD card
.if (ARCADE)
	out	SPDR, temp
sd_write_wait:
	sbis	SPSR, SPIF
	rjmp	sd_write_wait
.else
	ldi	temp2, 8
sd_write_loop:
	cbi	SD_PORT, SD_MOSI
	lsl	temp
	brcc	PC+2
	sbi	SD_PORT, SD_MOSI
	rjmp	PC+1
	sbi	SD_PORT, SD_CK
	rjmp	PC+1
	cbi	SD_PORT, SD_CK
	dec	temp2
	brne	sd_write_loop
.endif
	ret

;--------------------

sd_read_buffer:
	;read 'count' bytes from SD card to RAM at YH:YL
	sub	sectorL, count
	sbc	sectorH, zero
sd_read_buffer_loop:
	rcall	sd_read
	st	Y+, temp
	dec	count
	brne	sd_read_buffer_loop
	ret

;--------------------

sd_read:
	;receive one byte from SD card
.if (ARCADE)
	ldi	temp, 0xFF
	out	SPDR, temp
sd_read_wait:
	sbis	SPSR, SPIF
	rjmp	sd_read_wait
	in	temp, SPDR
.else
	sbi	SD_PORT, SD_MOSI
	ldi	temp2, 8
sd_read_loop:
	lsl	temp
	sbic	SD_PIN, SD_MISO
	ori	temp, 0x01
	sbi	SD_PORT, SD_CK
	rjmp	PC+1
	cbi	SD_PORT, SD_CK
	dec	temp2
	brne	sd_read_loop
.endif
	ret

;--------------------

sd_clock:
	;send 8 clock pulses
.if (ARCADE)
	ldi	temp2, 0xFF
	out	SPDR, temp2
sd_clock_wait:
	sbis	SPSR, SPIF
	rjmp	sd_clock_wait
.else	
	sbi	SD_PORT, SD_MOSI
	ldi	temp2, 8
sd_clock_loop:
	sbi	SD_PORT, SD_CK
	rjmp	PC+1
	cbi	SD_PORT, SD_CK
	dec	temp2
	brne	sd_clock_loop
.endif
	ret

;--------------------

sd_read_first_sector:
	;read first sector from first cluster of file
	;(get cluster number from RAM_FAT_Cluster)
	_lds_w	W, RAM_FAT_Cluster
.if (DEBUG >= 2)
	_putstr "  First cluster: "
	_puthex_cluster W
.endif
	rjmp	sd_read_sector_addr
	
sd_read_sector:
	;read sector from SD card (find & advance to next cluster if necessary)
	lds	temp, RAM_FAT_RemainingSectors
	dec	temp
	sts	RAM_FAT_RemainingSectors, temp
	_brne	sd_read_sector_read ;same cluster as before
	
.if (DEBUG >= 2)
  .if (DEBUG >= 3)
	_putc 0x0a
  .endif
	_putstr "  End of cluster, next: "
.endif
	;read next cluster number from FAT
	_lds_d	W, RAM_FAT_Cluster
	sbrs	flags, fFAT32
	rjmp	sd_read_sector_clustershift_end
	_lsl_d	W ;multiply cluster number by 2 for FAT32 (to use FAT16 calc's)
sd_read_sector_clustershift_end:
	;calculate sector address of wanted FAT entry
	;sector = (cluster >> 8) * 2 (FAT32: ((cluster*2) >> 8) * 2)
	_lds_d	Z, RAM_FAT_Start
	sbrc	flags, fSDHC
	rjmp	sd_read_sector_fat_sdhc
	;SD
	add	Z2, W2
	adc	Z3, W3
	adc	Z4, W4
	add	Z2, W2
	adc	Z3, W3
	adc	Z4, W4
	rjmp	sd_read_sector_fat_end
sd_read_sector_fat_sdhc:
	;SDHC
	add	Z1, W2
	adc	Z2, W3
	adc	Z3, W4
	adc	Z4, zero
sd_read_sector_fat_end:
	;'W1' = FAT16 entry offset within sector (byte offset / 2)
	;FAT16: W1 = (cluster & 0xFF)
	;FAT32: W1 = ((cluster*2) & 0xFF)
	rcall	sd_start_block
	;dump bytes before wanted entry (W1 * 2 bytes)
	tst	W1
	breq	sd_read_sector_dump_end
	sub	sectorL, W1
	sbc	sectorH, zero
	sub	sectorL, W1
	sbc	sectorH, zero
sd_read_sector_dump:
	rcall	sd_clock
	rcall	sd_clock
	dec	W1
	brne	sd_read_sector_dump
sd_read_sector_dump_end:
	;get next cluster number
	rcall	sd_read
	mov	W1, temp
	rcall	sd_read
	mov	W2, temp
	clr	W3
	clr	W4
	ldi	temp, 2
	sub	sectorL, temp
	sbc	sectorH, zero
	sbrs	flags, fFAT32
	rjmp	sd_read_sector_offset_end
	rcall	sd_read
	mov	W3, temp
	rcall	sd_read
	andi	temp, 0x0F ;remove upper nibble of MSB (reserved)
	mov	W4, temp
	ldi	temp, 2
	sub	sectorL, temp
	sbc	sectorH, zero
sd_read_sector_offset_end:
	;dump additional bytes in sector
	rcall	sd_finish_block
.if (DEBUG >= 2)
	_puthex_cluster W
.endif
	;check cluster number
	ldi	temp, 2 ;cluster number must be >= 2
	cp	W1, temp
	cpc	W2, zero
	cpc	W3, zero
	cpc	W4, zero
	_brlo	sd_read_sector_error
	ldi	temp, 0xF0
	ldi	temp2, 0xFF
	sbrc	flags, fFAT32
	rjmp	sd_read_sector_cluster_fat32
	cp	W1, temp ;FAT16: cluster number must be < 0xFFF0
	cpc	W2, temp2
	_brsh	sd_read_sector_error
	rjmp	sd_read_sector_cluster_end
sd_read_sector_cluster_fat32:
	ldi	temp2, 0x0F ;FAT32: (cluster number & 0x0FFFFFFF) must be < 0x0FFFFFF0
	and	W4, temp2
	ldi	temp2, 0xFF
	cp	W1, temp
	cpc	W2, temp2
	cpc	W3, temp2
	ldi	temp2, 0x0F
	cpc	W4, temp2
	_brsh	sd_read_sector_error
sd_read_sector_cluster_end:
	_sts_d	RAM_FAT_Cluster, W
	
sd_read_sector_addr:
	;calculate start of sector address from cluster number
	ldi	temp, 2 ;base address points to cluster 2
	sub	W1, temp
	sbc	W2, zero
	sbc	W3, zero
	sbc	W4, zero
	_lds_d	Z, RAM_FAT_Base
	lds	temp, RAM_FAT_Clustersize
	sts	RAM_FAT_RemainingSectors, temp
	sbrc	flags, fSDHC
	rjmp	sd_read_sector_addr_sdhc
	;SD
	mul	W1, temp
	add	Z2, R0 ;add twice (1 sector = 2*256 bytes)
	adc	Z3, R1
	adc	Z4, zero
	add	Z2, R0
	adc	Z3, R1
	adc	Z4, zero
	mul	W2, temp
	add	Z3, R0
	adc	Z4, R1
	add	Z3, R0
	adc	Z4, R1
	mul	W3, temp
	add	Z4, R0
	add	Z4, R0
	rjmp	sd_read_sector_addr_end
sd_read_sector_addr_sdhc:
	;SDHC
	mul	W1, temp
	add	Z1, R0
	adc	Z2, R1
	adc	Z3, zero
	adc	Z4, zero
	mul	W2, temp
	add	Z2, R0
	adc	Z3, R1
	adc	Z4, zero
	mul	W3, temp
	add	Z3, R0
	adc	Z4, R1
	mul	W4, temp
	add	Z4, R0
sd_read_sector_addr_end:
.if (DEBUG >= 2)
	_putstr " at "
	_puthex32 Z
	_putc 0x0a
.endif

sd_read_sector_read:
	;read sector
	rcall	sd_start_block
	ret
	
sd_read_sector_error:
	;read invalid cluster number => error or EOF
.if (DEBUG >= 2)
	_putstr_lf " (invalid)"
.endif
	ori	flags, 1<<fReadError
	ret

;--------------------

sd_command:
	;send command to SD card, return read value (bit 7 set => error)
	ori	temp, 0x40
	rcall	sd_write
	mov	temp, Z4
	rcall	sd_write
	mov	temp, Z3
	rcall	sd_write
	mov	temp, Z2
	rcall	sd_write
	mov	temp, Z1
	rcall	sd_write
	ldi	temp, 0x95 ;real CRC needed for CMD0(0) and CMD8(0x122)
	rcall	sd_write
	push	count
	ldi	count, 9 ;max. 9 reads, then abort if read value is still >= 0x80
sd_command_read:
	rcall	sd_read
	cpi	temp, 0x80
	brlo	sd_command_end
	dec	count
	brne	sd_command_read
sd_command_end:
	pop	count
	ret

;--------------------

sd_init:
	;initalize SD card (SPI mode), return value: error code or 0 (success)
.if (DEBUG >= 1)
	_putc 0x0a
	_putstr "Initializing SD Card: "
.endif
	;delay (wait until SD card properly inserted
	_ldi_w	time, 10 ;~100 ms
sd_init_delay:
	_tst_w	time
	brne	sd_init_delay
	;send at least 74 clock pulses (10 bytes = 80 pulses)
	ldi	count, 10
sd_init_pulses:
	rcall	sd_clock
	dec	count
	brne	sd_init_pulses
	
	andi	flags, LOW(~(1<<fCardRejected | 1<<fSDHC))
	_clr_d	Z
	
	;send CMD0 (software reset)
	cbi	SD_PORT, SD_CS
	ldi	temp, 0
	rcall	sd_command
	rcall	sd_clock
	cpi	temp, 0x01
	_brne	sd_init_error
.if (DEBUG >= 1)
	_putstr "CMD0 OK, "
.endif

	;send CMD8
	_ldi_d	Z, 0x122 ;2.7~3.6V, check pattern 0x22 (=> same CRC as for CMD0)
	ldi	temp, 8
	rcall	sd_command
	cpi	temp, 0x05 ;unknown command => MMC (or old SD card?)
	_breq	sd_init_mmc
	cpi	temp, 0x01
	_brne	sd_init_error
	rcall	sd_clock ;ignore byte 1
	rcall	sd_clock ;ignore byte 2
	rcall	sd_read
	cpi	temp, 0x01 ;voltage range OK?
	_brne	sd_init_error
	rcall	sd_read
	cpi	temp, 0x22 ;matches check pattern?
	_brne	sd_init_error
	rcall	sd_clock
.if (DEBUG >= 1)
	_putstr "CMD8 OK, "
.endif
	
sd_init_41:
	;send ACMD41 until card leaves idle state
	;TODO: timeout
	_ldi_d	Z, 0
	ldi	temp, 55
	rcall	sd_command
	rcall	sd_clock
	_ldi_d	Z, 1<<30 ;bit 30 (HCS) set
	ldi	temp, 41
	rcall	sd_command
	rcall	sd_clock
.if (MEGA16 == 0)
	sbic	PINE, SD_DETECT
	rjmp	sd_init_removed
.endif
	cpi	temp, 0x01
	breq	sd_init_41
	cpi	temp, 0x05 ;unknown command => possibly a MMC (?)
	breq	sd_init_mmc
	cpi	temp, 0x00
	_brne	sd_init_error
.if (DEBUG >= 1)
	_putstr_lf "CMD41 OK"
.endif
	
	;send CMD58 (read OCR) to determine if card is SDHC
	_ldi_d	Z, 0
	ldi	temp, 58
	rcall	sd_command
	cpi	temp, 0x00
	brne	sd_init_error
	rcall	sd_read
	rcall	sd_clock ;ignore byte 2
	rcall	sd_clock ;ignore byte 3
	rcall	sd_clock ;ignore byte 4
	rcall	sd_clock
	sbrs	temp, 6 ;HCS bit set?
	rjmp	sd_init_done
	ori	flags, 1<<fSDHC
.if (DEBUG >= 1)
	_putstr_lf "SDHC card detected."
.endif

sd_init_done:
	;set block size to 512 bytes
	_ldi_d	Z, 512
	ldi	temp, 16 ;CMD16: set block size
	rcall	sd_command
	rcall	sd_clock
	
	sbi	SD_PORT, SD_CS
	ret
	
sd_init_mmc:
.if (DEBUG >= 1)
	_putstr "MMC detected, "
.endif
sd_init_mmc_loop:
	;send CMD1 until card leaves idle state
	;TODO: timeout
	ldi	temp, 1
	rcall	sd_command
	rcall	sd_clock
.if (MEGA16 == 0)
	sbic	PINE, SD_DETECT
	rjmp	sd_init_removed
.endif
	cpi	temp, 0x01
	breq	sd_init_mmc_loop
	cpi	temp, 0x00
	brne	sd_init_error
.if (DEBUG >= 1)
	_putstr_lf "CMD1 OK"
.endif
	rjmp	sd_init_done

sd_init_error:
	;error during SD card init
	sbi	SD_PORT, SD_CS
.if (DEBUG >= 1)
	push	temp
	_putstr "ERROR "
	pop	temp
	call	puthex
	_putc 0x0a
.endif
	call	message
		.db "SD Card error!",0
	ori	flags, 1<<fCardRejected
	ldi	temp, 0xFF
	ret
	
sd_init_removed:
	;SD card removed during init
	ldi	temp, 0xFF
	ret

;--------------------

str_xml_param:
	;find XML parameter RAM_String in RAM_Line and return its numeric value
	;modifies: R1:R0, temp, temp2, X, Y
	_ldi_w	Y, RAM_Line
str_xml_param_restart:
	movw	R1:R0, YH:YL
	_ldi_w	X, RAM_String
str_xml_param_loop:
	ld	temp, X+
	cpi	temp, 0
	breq	str_xml_param_found
	ld	temp2, Y+
	cpi	temp2, 0
	breq	str_xml_param_notfound
	cp	temp, temp2
	breq	str_xml_param_loop
	movw	YH:YL, R1:R0
	adiw	YH:YL, 1
	rjmp	str_xml_param_restart

str_xml_param_notfound:
	;parameter not found, return 0xFF
	ldi	temp2, 0xFF
	ret
	
str_xml_param_found:
	;parameter found, return numeric value (X = value / 10, temp2 = value % 10)
	rcall	str2num
	ret

;--------------------

str2num:
	;convert string at Y to numeric value (X = value / 10, temp2 = value % 10)
	;return 0xFF in 'temp2' if value is not numeric
	;modifies: R1:R0, temp, temp2, X, Y
	;TODO: prevent overflow
	ldi	temp2, 0xFF
str2num_loop:
	ld	temp, Y+
	cpi	temp, 0x21
	brlo	str2num_end ;whitespace, non-printable char or EOL
	cpi	temp, '0'
	brlo	str2num_loop ;ignore everything else except numbers
	cpi	temp, '9'+1
	brsh	str2num_loop
	andi	temp, 0x0F
	cpi	temp2, 0xFF ;if value is still unset: set value = 0
	brne	str2num_mul10
	_ldi_w	X, 0
	ldi	temp2, 0
str2num_mul10:
	movw	R1:R0, XH:XL ;multiply value by 10 (X = [X + X * 4] * 2)
	lsl	R0
	rol	R1
	lsl	R0
	rol	R1
	add	XL, R0
	adc	XH, R1
	_lsl_w	X
	add	XL, temp2 ;add previous digit
	adc	XH, zero
	mov	temp2, temp
	rjmp	str2num_loop
str2num_end:
	ret


;--------------------

str_compare:
	;compare string at RAM[X] with string from flash
	;  PARAMS: 0x00-terminated string
	movw	R1:R0, ZH:ZL
	_pop_w	Z
	_lsl_w	Z
	clt
str_compare_loop:
	lpm	temp, Z+
	cpi	temp, 0
	breq	str_compare_end
	ld	temp2, X+
	cp	temp, temp2
	breq	str_compare_loop
	set
	rjmp	str_compare_loop
str_compare_end:
	;return
	adiw	ZH:ZL, 1
	_lsr_w	Z
	_push_w	Z
	movw	ZH:ZL, R1:R0
	ldi	temp, 0
	bld	temp, 0 ;move T flag (set if not equal) to temp bit 0
	cpi	temp, 0 ;return value: Z flag
	ret
	
;--------------------

str2ram:
	;copy string to RAM_String
	;  PARAMS: 0x00-terminated string (max. 10 chars including 0x00)
	;  modifies: R1:R0, temp, temp2, X, Z
	movw	R1:R0, ZH:ZL
	_pop_w	Z
	_lsl_w	Z
	_ldi_w	X, RAM_String
str2ram_loop:
	lpm	temp, Z+
	st	X+, temp
	lpm	temp2, Z+
	cpi	temp, 0
	breq	str2ram_end
	st	X+, temp2
	cpi	temp2, 0
	brne	str2ram_loop
str2ram_end:
	;return
	_lsr_w	Z
	_push_w	Z
	movw	ZH:ZL, R1:R0
	ret
	
.endif ;if (USE_SDCARD)

;===============================================================================

.if (ARCADE)

initframe:
	;Logo
	.dw 100 ;1s
  .if (ARCADEMICRO)
	.include "images/am-logo.asm" ;ArcadeMicro logo
  .else
	.include "images/bp-logo-520.asm" ;generic BlinkenPlus logo
  .endif
frames:
	;frame format:
	;  * 1 word (2 bytes) duration (*10ms, little endian), must not be zero
	;  * 260 pixel data bytes (left to right, top to bottom,
	;    2 pixels per byte (high nibble = right pixel), values 0 (off) to 7
	;    NOTE: an even number of bytes per .db line is required!
	.dw 50 ;500 ms
	.include "images/chaosknoten-520.asm"
	;.include "movies520.asm"
	.dw 0 ;end marker
	
.else

initframe:
	;Logo
	.dw 100 ;1s
  .if (BSAPLUS)
	.include "images/bsaplus-logo.asm" ;BlinkstroemAdvanced Plus logo
  .elseif (BLPLUS0 || BLPLUS1 || BLPLUS3)
	.include "images/blplus-logo.asm" ;BlinkenLEDs Plus logo
  .else
	.include "images/bp-logo-144.asm" ;generic BlinkenPlus logo
  .endif

frames:
	;frame format:
	;  * 1 word (2 bytes) duration (*10ms, little endian), must not be zero
	;  * 72 pixel data bytes (left to right, top to bottom,
	;    2 pixels per byte (high nibble = right pixel), values 0 (off) to 7
	;    NOTE: an even number of bytes per .db line is required!
	.dw 50 ;500ms
	.include "images/chaosknoten-144.asm"
	;.include "movies144.asm"
	.dw 0 ;end marker

.endif ;if (ARCADE)
